mirror of
https://github.com/gnosygnu/xowa.git
synced 2024-10-27 20:34:16 +00:00
Xtn.Dpl: Change dynamicPageList to always get categories from cache [#556]
This commit is contained in:
parent
42842f0bcc
commit
2598dee844
@ -49,27 +49,30 @@ public class Xoctg_catpage_mgr implements Gfo_invk {
|
|||||||
wiki.App().Cfg().Bind_many_wiki(this, wiki, Cfg__missing_class);
|
wiki.App().Cfg().Bind_many_wiki(this, wiki, Cfg__missing_class);
|
||||||
}
|
}
|
||||||
public void Free_mem_all() {cache.Clear();}
|
public void Free_mem_all() {cache.Clear();}
|
||||||
public Xoctg_catpage_ctg Get_or_load_or_null(byte[] page_ttl, Xoctg_catpage_url catpage_url, Xoa_ttl cat_ttl, int limit) {
|
public Xoctg_catpage_ctg Get_by_cache_or_null(byte[] page_ttl, Xoctg_catpage_url catpage_url, Xoa_ttl cat_ttl, int limit) {
|
||||||
// load categories from cat dbs; exit if not found
|
// DynamicPageList categories only (b/c of many members); for regular catpages, always retrieve on demand
|
||||||
Xoctg_catpage_ctg ctg = (Xoctg_catpage_ctg)cache.Get_by(cat_ttl.Full_db());
|
Xoctg_catpage_ctg ctg = (Xoctg_catpage_ctg)cache.Get_by(cat_ttl.Full_db());
|
||||||
if (ctg == null) {
|
if (ctg == null) {
|
||||||
if (gplx.core.envs.Env_.Mode_testing()) return null; // needed for dpl test
|
if (gplx.core.envs.Env_.Mode_testing()) return null; // needed for dpl test
|
||||||
synchronized (thread_lock) { // LOCK:used by multiple wrks; DATE:2016-09-12
|
ctg = Get_by_db_or_null(page_ttl, catpage_url, cat_ttl, limit);
|
||||||
ctg = loader.Load_ctg_or_null(wiki, page_ttl, this, catpage_url, cat_ttl, limit);
|
|
||||||
}
|
|
||||||
if (ctg == null) return null; // not in cache or db; exit
|
if (ctg == null) return null; // not in cache or db; exit
|
||||||
if (limit == Int_.Max_value) // only add to cache if Max_val (DynamicPageList); for regular catpages, always retrieve on demand
|
|
||||||
cache.Add(cat_ttl.Full_db(), ctg);
|
cache.Add(cat_ttl.Full_db(), ctg);
|
||||||
}
|
}
|
||||||
return ctg;
|
return ctg;
|
||||||
}
|
}
|
||||||
|
public Xoctg_catpage_ctg Get_by_db_or_null(byte[] page_ttl, Xoctg_catpage_url catpage_url, Xoa_ttl cat_ttl, int limit) {
|
||||||
|
// load categories from cat dbs; exit if not found
|
||||||
|
synchronized (thread_lock) { // LOCK:used by multiple wrks; DATE:2016-09-12
|
||||||
|
return loader.Load_ctg_or_null(wiki, page_ttl, this, catpage_url, cat_ttl, limit);
|
||||||
|
}
|
||||||
|
}
|
||||||
public void Write_catpage(Bry_bfr bfr, Xoa_page page) {
|
public void Write_catpage(Bry_bfr bfr, Xoa_page page) {
|
||||||
try {
|
try {
|
||||||
// get catpage_url
|
// get catpage_url
|
||||||
Xoctg_catpage_url catpage_url = Xoctg_catpage_url_parser.Parse(page.Url());
|
Xoctg_catpage_url catpage_url = Xoctg_catpage_url_parser.Parse(page.Url());
|
||||||
|
|
||||||
// load categories from cat dbs; exit if not found
|
// load categories from cat dbs; exit if not found
|
||||||
Xoctg_catpage_ctg ctg = Get_or_load_or_null(page.Ttl().Page_db(), catpage_url, page.Ttl(), grp_max);
|
Xoctg_catpage_ctg ctg = Get_by_db_or_null(page.Ttl().Page_db(), catpage_url, page.Ttl(), grp_max);
|
||||||
if (ctg == null) return;
|
if (ctg == null) return;
|
||||||
|
|
||||||
// write html
|
// write html
|
||||||
|
@ -45,8 +45,7 @@ class Xoctg_catlink_loader {
|
|||||||
|
|
||||||
// sort and reduce list to 200 total
|
// sort and reduce list to 200 total
|
||||||
catlink_list.Sort_by(new Xoctg_catlink_sorter(url_is_from));
|
catlink_list.Sort_by(new Xoctg_catlink_sorter(url_is_from));
|
||||||
Xoctg_page_loader catlink_loader = new Xoctg_page_loader(wiki);
|
Ordered_hash catlink_hash = Ordered_hash_.New();
|
||||||
Ordered_hash catlink_hash = catlink_loader.Hash();
|
|
||||||
int catlink_list_len = catlink_list.Len();
|
int catlink_list_len = catlink_list.Len();
|
||||||
int max = catlink_list_len < limit ? catlink_list_len : limit;
|
int max = catlink_list_len < limit ? catlink_list_len : limit;
|
||||||
for (int i = 0; i < max; i++) {
|
for (int i = 0; i < max; i++) {
|
||||||
@ -55,7 +54,6 @@ class Xoctg_catlink_loader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// load ns / ttl for each catlink
|
// load ns / ttl for each catlink
|
||||||
page_tbl.Select_in__id(catlink_loader);
|
|
||||||
Xoctg_catpage_grp grp = rv.Grp_by_tid(grp_tid);
|
Xoctg_catpage_grp grp = rv.Grp_by_tid(grp_tid);
|
||||||
grp.Itms_((Xoctg_catpage_itm[])catlink_hash.To_ary_and_clear(Xoctg_catpage_itm.class));
|
grp.Itms_((Xoctg_catpage_itm[])catlink_hash.To_ary_and_clear(Xoctg_catpage_itm.class));
|
||||||
|
|
||||||
@ -93,7 +91,10 @@ class Xoctg_catlink_loader {
|
|||||||
, ", cl_type_id"
|
, ", cl_type_id"
|
||||||
, ", {0} AS cl_sortkey"
|
, ", {0} AS cl_sortkey"
|
||||||
, ", {1} AS cl_sortkey_prefix"
|
, ", {1} AS cl_sortkey_prefix"
|
||||||
|
, ", p.page_namespace"
|
||||||
|
, ", p.page_title"
|
||||||
, "FROM <link_db_{3}>cat_link cl{2}"
|
, "FROM <link_db_{3}>cat_link cl{2}"
|
||||||
|
, " LEFT JOIN <page_db>page p ON p.page_id = cl{2}.cl_from"
|
||||||
), sortkey_col, sortkey_prefix_fld, sortkey_join, link_db_id);
|
), sortkey_col, sortkey_prefix_fld, sortkey_join, link_db_id);
|
||||||
bfr.Add_str_u8_fmt(String_.Concat_lines_nl
|
bfr.Add_str_u8_fmt(String_.Concat_lines_nl
|
||||||
( "WHERE cl_to_id = {0}"
|
( "WHERE cl_to_id = {0}"
|
||||||
@ -108,12 +109,15 @@ class Xoctg_catlink_loader {
|
|||||||
}
|
}
|
||||||
private void Load_catlinks(List_adp catlink_list, String sql) {
|
private void Load_catlinks(List_adp catlink_list, String sql) {
|
||||||
Db_rdr rdr = Db_rdr_.Empty;
|
Db_rdr rdr = Db_rdr_.Empty;
|
||||||
|
int count = 0;
|
||||||
try {
|
try {
|
||||||
attach_mgr.Attach();
|
attach_mgr.Attach();
|
||||||
rdr = attach_mgr.Conn_main().Stmt_sql(sql).Exec_select__rls_auto();
|
rdr = attach_mgr.Conn_main().Stmt_sql(sql).Exec_select__rls_auto();
|
||||||
while (rdr.Move_next()) {
|
while (rdr.Move_next()) {
|
||||||
Xoctg_catpage_itm itm = Xoctg_catpage_itm.New_by_rdr(rdr, version);
|
Xoctg_catpage_itm itm = Xoctg_catpage_itm.New_by_rdr(wiki, rdr, version);
|
||||||
catlink_list.Add(itm);
|
catlink_list.Add(itm);
|
||||||
|
if (count >= 1000 && (count % 1000) == 0) Gfo_usr_dlg_.Instance.Prog_many("", "", "loading cat_links: count=~{0}", count);
|
||||||
|
count++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
finally {
|
finally {
|
||||||
@ -197,6 +201,10 @@ class Xoctg_catlink_loader {
|
|||||||
version = 3;
|
version = 3;
|
||||||
db_1st = cat_core_conn;
|
db_1st = cat_core_conn;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// add page_db
|
||||||
|
db_list.Add(new Db_attach_itm("page_db", page_tbl.Conn()));
|
||||||
|
|
||||||
Db_attach_mgr attach_mgr = new Db_attach_mgr(db_1st, (Db_attach_itm[])db_list.To_ary_and_clear(Db_attach_itm.class));
|
Db_attach_mgr attach_mgr = new Db_attach_mgr(db_1st, (Db_attach_itm[])db_list.To_ary_and_clear(Db_attach_itm.class));
|
||||||
return new Xoctg_catlink_loader(wiki, catpage_mgr, page_tbl, version, link_dbs_len, attach_mgr);
|
return new Xoctg_catlink_loader(wiki, catpage_mgr, page_tbl, version, link_dbs_len, attach_mgr);
|
||||||
}
|
}
|
||||||
|
@ -1,40 +0,0 @@
|
|||||||
/*
|
|
||||||
XOWA: the XOWA Offline Wiki Application
|
|
||||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
|
||||||
|
|
||||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
|
||||||
or alternatively under the terms of the Apache License Version 2.0.
|
|
||||||
|
|
||||||
You may use XOWA according to either of these licenses as is most appropriate
|
|
||||||
for your project on a case-by-case basis.
|
|
||||||
|
|
||||||
The terms of each license can be found in the source code repository:
|
|
||||||
|
|
||||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
|
||||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
|
||||||
*/
|
|
||||||
package gplx.xowa.addons.wikis.ctgs.htmls.catpages.dbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.ctgs.*; import gplx.xowa.addons.wikis.ctgs.htmls.*; import gplx.xowa.addons.wikis.ctgs.htmls.catpages.*;
|
|
||||||
import gplx.dbs.*; import gplx.xowa.wikis.data.tbls.*;
|
|
||||||
import gplx.xowa.addons.wikis.ctgs.htmls.catpages.doms.*;
|
|
||||||
public class Xoctg_page_loader implements Select_in_cbk {
|
|
||||||
private final Xow_wiki wiki;
|
|
||||||
private final Ordered_hash hash = Ordered_hash_.New();
|
|
||||||
public Xoctg_page_loader(Xow_wiki wiki) {this.wiki = wiki;}
|
|
||||||
public Ordered_hash Hash() {return hash;}
|
|
||||||
public int Hash_max() {return hash.Len();}
|
|
||||||
public void Write_sql(Bry_bfr bfr, int idx) {
|
|
||||||
Xoctg_catpage_itm itm = (Xoctg_catpage_itm)hash.Get_at(idx);
|
|
||||||
bfr.Add_int_variable(itm.Page_id());
|
|
||||||
}
|
|
||||||
public void Read_data(Db_rdr rdr) {
|
|
||||||
// read values from page_tbl
|
|
||||||
int page_id = rdr.Read_int("page_id");
|
|
||||||
int page_ns = rdr.Read_int("page_namespace");
|
|
||||||
byte[] page_ttl = rdr.Read_bry_by_str("page_title");
|
|
||||||
|
|
||||||
// get itm and set data
|
|
||||||
Xoctg_catpage_itm itm = (Xoctg_catpage_itm)hash.Get_by(page_id);
|
|
||||||
if (itm == null) return; // NOTE: itms can exist in cat_links_tbl, but not in page_tbl; EX:User:Any_page
|
|
||||||
itm.Page_ttl_(wiki.Ttl_parse(page_ns, page_ttl));
|
|
||||||
}
|
|
||||||
}
|
|
@ -76,7 +76,7 @@ public class Xoctg_catpage_itm {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static final Xoctg_catpage_itm[] Ary_empty = new Xoctg_catpage_itm[0];
|
public static final Xoctg_catpage_itm[] Ary_empty = new Xoctg_catpage_itm[0];
|
||||||
public static Xoctg_catpage_itm New_by_rdr(Db_rdr rdr, byte version) {
|
public static Xoctg_catpage_itm New_by_rdr(Xow_wiki wiki, Db_rdr rdr, byte version) {
|
||||||
byte[] sortkey_binary = Bry_.Empty;
|
byte[] sortkey_binary = Bry_.Empty;
|
||||||
byte[] sortkey_prefix = Bry_.Empty;
|
byte[] sortkey_prefix = Bry_.Empty;
|
||||||
if (version == Version__4) {
|
if (version == Version__4) {
|
||||||
@ -87,7 +87,15 @@ public class Xoctg_catpage_itm {
|
|||||||
sortkey_binary = Bry_.Empty;
|
sortkey_binary = Bry_.Empty;
|
||||||
sortkey_prefix = rdr.Read_bry_by_str("cl_sortkey");
|
sortkey_prefix = rdr.Read_bry_by_str("cl_sortkey");
|
||||||
}
|
}
|
||||||
return new Xoctg_catpage_itm(version, rdr.Read_byte("cl_type_id"), rdr.Read_int("cl_from"), sortkey_prefix, sortkey_binary);
|
Xoctg_catpage_itm rv = new Xoctg_catpage_itm(version, rdr.Read_byte("cl_type_id"), rdr.Read_int("cl_from"), sortkey_prefix, sortkey_binary);
|
||||||
|
|
||||||
|
if (version == Version__4) {
|
||||||
|
String ttl_str = rdr.Read_str("page_title");
|
||||||
|
if (ttl_str != null) {// NOTE: ttl_str will be NULL if LEFT JOIN fails on page_db.page
|
||||||
|
rv.Page_ttl_(wiki.Ttl_parse(rdr.Read_int("page_namespace"), Bry_.new_u8(ttl_str)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return rv;
|
||||||
}
|
}
|
||||||
public static Xoctg_catpage_itm New_by_ttl(byte grp_tid, int page_id, Xoa_ttl ttl) { // TEST
|
public static Xoctg_catpage_itm New_by_ttl(byte grp_tid, int page_id, Xoa_ttl ttl) { // TEST
|
||||||
Xoctg_catpage_itm rv = new Xoctg_catpage_itm(Version__4, grp_tid, page_id, ttl.Page_txt(), Bry_.Empty);
|
Xoctg_catpage_itm rv = new Xoctg_catpage_itm(Version__4, grp_tid, page_id, ttl.Page_txt(), Bry_.Empty);
|
||||||
|
@ -40,7 +40,7 @@ class Dpl_itm {
|
|||||||
public byte Quality_pages() {return quality_pages;} private byte quality_pages;
|
public byte Quality_pages() {return quality_pages;} private byte quality_pages;
|
||||||
public byte Stable_pages() {return stable_pages;} private byte stable_pages;
|
public byte Stable_pages() {return stable_pages;} private byte stable_pages;
|
||||||
private Xop_ctx sub_ctx; private Xop_tkn_mkr sub_tkn_mkr; private Xop_root_tkn sub_root;
|
private Xop_ctx sub_ctx; private Xop_tkn_mkr sub_tkn_mkr; private Xop_root_tkn sub_root;
|
||||||
public void Parse(Xowe_wiki wiki, Xop_ctx ctx, byte[] page_ttl, byte[] src, Xop_xnde_tkn xnde) { // parse kvps in xnde; EX:<dpl>category=abc\nredirects=y\n</dpl>
|
private void Parse_src(Xowe_wiki wiki, Xop_ctx ctx, byte[] page_ttl, byte[] src, Xop_xnde_tkn xnde) { // parse kvps in xnde; EX:<dpl>category=abc\nredirects=y\n</dpl>
|
||||||
this.page_ttl = page_ttl;
|
this.page_ttl = page_ttl;
|
||||||
sub_ctx = Xop_ctx.New__sub__reuse_page(ctx);
|
sub_ctx = Xop_ctx.New__sub__reuse_page(ctx);
|
||||||
sub_tkn_mkr = sub_ctx.Tkn_mkr();
|
sub_tkn_mkr = sub_ctx.Tkn_mkr();
|
||||||
@ -175,6 +175,11 @@ class Dpl_itm {
|
|||||||
// boolean ctg_date = false, ctg_date_strip = false;
|
// boolean ctg_date = false, ctg_date_strip = false;
|
||||||
// byte[] ns_include = null;
|
// byte[] ns_include = null;
|
||||||
// byte[] ctg_date_fmt;
|
// byte[] ctg_date_fmt;
|
||||||
|
public static Dpl_itm Parse(Xowe_wiki wiki, Xop_ctx ctx, byte[] page_ttl, byte[] src, Xop_xnde_tkn xnde) {
|
||||||
|
Dpl_itm rv = new Dpl_itm();
|
||||||
|
rv.Parse_src(wiki, ctx, page_ttl, src, xnde);
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
class Dpl_stable_tid {
|
class Dpl_stable_tid {
|
||||||
public static final byte Tid_null = 0, Tid_include = 1, Tid_only = 2, Tid_exclude = 3;
|
public static final byte Tid_null = 0, Tid_include = 1, Tid_only = 2, Tid_exclude = 3;
|
||||||
|
144
400_xowa/src/gplx/xowa/xtns/dynamicPageList/Dpl_page_finder.java
Normal file
144
400_xowa/src/gplx/xowa/xtns/dynamicPageList/Dpl_page_finder.java
Normal file
@ -0,0 +1,144 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||||
|
or alternatively under the terms of the Apache License Version 2.0.
|
||||||
|
|
||||||
|
You may use XOWA according to either of these licenses as is most appropriate
|
||||||
|
for your project on a case-by-case basis.
|
||||||
|
|
||||||
|
The terms of each license can be found in the source code repository:
|
||||||
|
|
||||||
|
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||||
|
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||||
|
*/
|
||||||
|
package gplx.xowa.xtns.dynamicPageList; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*;
|
||||||
|
import gplx.core.primitives.*; import gplx.core.lists.*;
|
||||||
|
import gplx.xowa.wikis.dbs.*;
|
||||||
|
import gplx.xowa.wikis.data.tbls.*;
|
||||||
|
import gplx.xowa.addons.wikis.ctgs.*; import gplx.xowa.addons.wikis.ctgs.htmls.catpages.doms.*; import gplx.xowa.addons.wikis.ctgs.htmls.catpages.urls.*;
|
||||||
|
class Dpl_page_finder {
|
||||||
|
private final Dpl_itm itm;
|
||||||
|
private final Xowe_wiki wiki;
|
||||||
|
|
||||||
|
public Dpl_page_finder(Dpl_itm itm, Xowe_wiki wiki) {
|
||||||
|
this.itm = itm;
|
||||||
|
this.wiki = wiki;
|
||||||
|
}
|
||||||
|
public Ordered_hash Find() {
|
||||||
|
// get include_ttls
|
||||||
|
List_adp include_ttls = itm.Ctg_includes();
|
||||||
|
if (include_ttls == null) return Ordered_hash_.New(); // exit early if none exists
|
||||||
|
|
||||||
|
// get exclude_pages
|
||||||
|
Ordered_hash exclude_pages = Get_exclude_pages(itm.Ctg_excludes());
|
||||||
|
|
||||||
|
// init vars for loop below
|
||||||
|
int itm_ns_filter = itm.Ns_filter();
|
||||||
|
List_adp remove_list = List_adp_.New();
|
||||||
|
Int_obj_ref tmp_id = Int_obj_ref.New_zero();
|
||||||
|
|
||||||
|
// get include_pags; note that this is a UNION of all member pages; EX: include_ttls=Ctg_A,Ctg_B,Ctg_C will only return pages in Ctg_A AND Ctg_B AND Ctg_C
|
||||||
|
Ordered_hash rv = Ordered_hash_.New();
|
||||||
|
int len = include_ttls.Len();
|
||||||
|
for (int i = 0; i < len; i++) {
|
||||||
|
// get ttl
|
||||||
|
Xoa_ttl ttl = Get_ctg_ttl_or_null(include_ttls, i);
|
||||||
|
if (ttl == null) continue;
|
||||||
|
|
||||||
|
// get pages
|
||||||
|
Ordered_hash cur_pages = Ordered_hash_.New();
|
||||||
|
Find_pages_in_ctg(cur_pages, itm.Page_ttl(), ttl);
|
||||||
|
|
||||||
|
// identify pages (a) not in previous list; (b) excluded; (c) ns_filter
|
||||||
|
remove_list.Clear();
|
||||||
|
int cur_len = cur_pages.Len();
|
||||||
|
for (int j = 0; j < cur_len; j++) {
|
||||||
|
// get item and init tmp
|
||||||
|
Xowd_page_itm page_itm = (Xowd_page_itm)cur_pages.Get_at(j);
|
||||||
|
tmp_id.Val_(page_itm.Id());
|
||||||
|
|
||||||
|
// check if should be removed
|
||||||
|
if ( (i != 0 && !rv.Has(tmp_id)) // item doesn't exist in previous set; note this doesn't apply to the 0th set
|
||||||
|
|| exclude_pages.Has(tmp_id) // item is marked as excluded
|
||||||
|
|| itm_ns_filter != Dpl_itm.Ns_filter_null && itm_ns_filter != page_itm.Ns().Id() // item does not match specified filter
|
||||||
|
) {
|
||||||
|
remove_list.Add(page_itm);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// remove pages
|
||||||
|
int remove_len = remove_list.Len();
|
||||||
|
for (int j = 0; j < remove_len; j++) {
|
||||||
|
Xowd_page_itm page_itm = (Xowd_page_itm)remove_list.Get_at(j);
|
||||||
|
cur_pages.Del(tmp_id.Val_(page_itm.Id()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// set cur_pages as main list
|
||||||
|
rv = cur_pages;
|
||||||
|
}
|
||||||
|
|
||||||
|
// sorting
|
||||||
|
rv.Sort_by
|
||||||
|
( itm.Sort_ascending() == Bool_.__byte
|
||||||
|
? (ComparerAble)Xowd_page_itm_sorter.IdAsc // sort not specified; use default;
|
||||||
|
: (ComparerAble)new Dpl_page_sorter(itm)); // sort specified
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
private Ordered_hash Get_exclude_pages(List_adp ttls) {
|
||||||
|
Ordered_hash rv = Ordered_hash_.New();
|
||||||
|
|
||||||
|
// return empty hash if no ttls
|
||||||
|
if (ttls == null)
|
||||||
|
return rv;
|
||||||
|
|
||||||
|
// loop exclude ttls
|
||||||
|
int len = ttls.Count();
|
||||||
|
for (int i = 0; i < len; i++) {
|
||||||
|
Xoa_ttl ttl = Get_ctg_ttl_or_null(ttls, i);
|
||||||
|
if (ttl == null) continue;
|
||||||
|
Find_pages_in_ctg(rv, itm.Page_ttl(), ttl);
|
||||||
|
}
|
||||||
|
return rv;
|
||||||
|
}
|
||||||
|
private void Find_pages_in_ctg(Ordered_hash rv, byte[] page_ttl, Xoa_ttl cat_ttl) {
|
||||||
|
// get ctg
|
||||||
|
Xoctg_catpage_ctg ctg = wiki.Ctg__catpage_mgr().Get_by_cache_or_null(page_ttl, Xoctg_catpage_url.New__blank(), cat_ttl, Int_.Max_value);
|
||||||
|
if (ctg == null) return;
|
||||||
|
|
||||||
|
// loop grps to get each page
|
||||||
|
Int_obj_ref tmp_id = Int_obj_ref.New_zero();
|
||||||
|
for (byte tid = 0; tid < Xoa_ctg_mgr.Tid___max; tid++) {
|
||||||
|
// get grp; EX: subc; page; file
|
||||||
|
Xoctg_catpage_grp grp = ctg.Grp_by_tid(tid);
|
||||||
|
|
||||||
|
// loop itms in grp and add to hash
|
||||||
|
int len = grp.Itms__len();
|
||||||
|
for (int i = 0; i < len; i++) {
|
||||||
|
Xoctg_catpage_itm itm = grp.Itms__get_at(i);
|
||||||
|
int itm_page_id = itm.Page_id();
|
||||||
|
|
||||||
|
if (rv.Has(tmp_id.Val_(itm_page_id))) continue; // check to make sure not already added
|
||||||
|
|
||||||
|
Xowd_page_itm page = new Xowd_page_itm();
|
||||||
|
if (itm.Page_ttl() == null) continue; // cat_link can exist without entry in page_db.page
|
||||||
|
page.Id_(itm_page_id);
|
||||||
|
page.Ttl_(itm.Page_ttl());
|
||||||
|
rv.Add(Int_obj_ref.New(itm_page_id), page);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Xoa_ttl Get_ctg_ttl_or_null(List_adp list, int i) {// helper method to extract ttl from list
|
||||||
|
// get ttl
|
||||||
|
byte[] ttl_bry = (byte[])list.Get_at(i);
|
||||||
|
Xoa_ttl ttl = wiki.Ttl_parse(gplx.xowa.wikis.nss.Xow_ns_.Tid__category, ttl_bry);
|
||||||
|
|
||||||
|
// log if invalid; NOTE: pages in en.n will pass "{{{2}}}" as category title; PAGE:en.b:Category:Egypt DATE:2016-10-18
|
||||||
|
if (ttl == null) {
|
||||||
|
Gfo_usr_dlg_.Instance.Log_many("", "", "category title is invalid; wiki=~{0} page=~{1} ttl=~{2}", wiki.Domain_str(), itm.Page_ttl(), ttl_bry);
|
||||||
|
}
|
||||||
|
return ttl;
|
||||||
|
}
|
||||||
|
}
|
@ -20,13 +20,11 @@ import gplx.xowa.wikis.dbs.*; import gplx.xowa.addons.wikis.ctgs.*; import gplx.
|
|||||||
import gplx.xowa.parsers.*; import gplx.xowa.parsers.xndes.*; import gplx.xowa.parsers.htmls.*; import gplx.xowa.parsers.amps.*;
|
import gplx.xowa.parsers.*; import gplx.xowa.parsers.xndes.*; import gplx.xowa.parsers.htmls.*; import gplx.xowa.parsers.amps.*;
|
||||||
import gplx.xowa.addons.wikis.ctgs.htmls.catpages.doms.*; import gplx.xowa.addons.wikis.ctgs.htmls.catpages.urls.*;
|
import gplx.xowa.addons.wikis.ctgs.htmls.catpages.doms.*; import gplx.xowa.addons.wikis.ctgs.htmls.catpages.urls.*;
|
||||||
public class Dpl_xnde implements Xox_xnde {
|
public class Dpl_xnde implements Xox_xnde {
|
||||||
private Dpl_itm itm = new Dpl_itm(); private List_adp pages = List_adp_.New();
|
private Dpl_itm itm; private Ordered_hash pages;
|
||||||
public void Xatr__set(Xowe_wiki wiki, byte[] src, Mwh_atr_itm xatr, Object xatr_id_obj) {} // NOTE: <dynamicPageList> has no attributes
|
public void Xatr__set(Xowe_wiki wiki, byte[] src, Mwh_atr_itm xatr, Object xatr_id_obj) {} // NOTE: <dynamicPageList> has no attributes
|
||||||
public void Xtn_parse(Xowe_wiki wiki, Xop_ctx ctx, Xop_root_tkn root, byte[] src, Xop_xnde_tkn xnde) {
|
public void Xtn_parse(Xowe_wiki wiki, Xop_ctx ctx, Xop_root_tkn root, byte[] src, Xop_xnde_tkn xnde) {
|
||||||
itm.Parse(wiki, ctx, ctx.Page().Ttl().Full_txt_w_ttl_case(), src, xnde);
|
itm = Dpl_itm.Parse(wiki, ctx, ctx.Page().Ttl().Full_txt_w_ttl_case(), src, xnde);
|
||||||
Dpl_page_finder.Find_pages(pages, wiki, itm);
|
pages = new Dpl_page_finder(itm, wiki).Find();
|
||||||
if (itm.Sort_ascending() != Bool_.__byte)
|
|
||||||
pages.Sort_by(new Dpl_page_sorter(itm));
|
|
||||||
}
|
}
|
||||||
public void Xtn_write(Bry_bfr bfr, Xoae_app app, Xop_ctx ctx, Xoh_html_wtr html_wtr, Xoh_wtr_ctx hctx, Xoae_page wpg, Xop_xnde_tkn xnde, byte[] src) {
|
public void Xtn_write(Bry_bfr bfr, Xoae_app app, Xop_ctx ctx, Xoh_html_wtr html_wtr, Xoh_wtr_ctx hctx, Xoae_page wpg, Xop_xnde_tkn xnde, byte[] src) {
|
||||||
Xowe_wiki wiki = ctx.Wiki();
|
Xowe_wiki wiki = ctx.Wiki();
|
||||||
@ -71,109 +69,11 @@ public class Dpl_xnde implements Xox_xnde {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
bfr.Add(html_mode.Grp_end()).Add_byte_nl();
|
bfr.Add(html_mode.Grp_end()).Add_byte_nl();
|
||||||
} finally {tmp_bfr.Mkr_rls();}
|
}
|
||||||
|
finally {
|
||||||
|
tmp_bfr.Mkr_rls();
|
||||||
|
pages.Clear(); // clear pages else out-of-memory error when Next 200 3 times on en.wiktionary.org/wiki/Category:English_lemmas; DATE:2019-08-25
|
||||||
|
}
|
||||||
}
|
}
|
||||||
private static byte[] Bry_nofollow = Bry_.new_a7(" rel=\"nofollow\"");
|
private static byte[] Bry_nofollow = Bry_.new_a7(" rel=\"nofollow\"");
|
||||||
}
|
}
|
||||||
class Dpl_page_finder {
|
|
||||||
public static void Find_pages(List_adp rv, Xowe_wiki wiki, Dpl_itm itm) {
|
|
||||||
rv.Clear();
|
|
||||||
List_adp includes = itm.Ctg_includes(); if (includes == null) return;
|
|
||||||
int includes_len = includes.Count();
|
|
||||||
Ordered_hash old_regy = Ordered_hash_.New(), new_regy = Ordered_hash_.New(), cur_regy = Ordered_hash_.New();
|
|
||||||
Xodb_load_mgr load_mgr = wiki.Db_mgr().Load_mgr();
|
|
||||||
Xowd_page_itm tmp_page = new Xowd_page_itm();
|
|
||||||
Int_obj_ref tmp_id = Int_obj_ref.New_zero();
|
|
||||||
List_adp del_list = List_adp_.New();
|
|
||||||
int ns_filter = itm.Ns_filter();
|
|
||||||
Ordered_hash exclude_pages = Ordered_hash_.New();
|
|
||||||
Find_excludes(exclude_pages, wiki, load_mgr, itm.Page_ttl(), tmp_page, tmp_id, itm.Ctg_excludes());
|
|
||||||
|
|
||||||
for (int i = 0; i < includes_len; i++) { // loop over includes
|
|
||||||
byte[] include = (byte[])includes.Get_at(i);
|
|
||||||
Xoa_ttl include_ttl = wiki.Ttl_parse(gplx.xowa.wikis.nss.Xow_ns_.Tid__category, include);
|
|
||||||
|
|
||||||
// pages in en.n will pass "{{{2}}}" as category title; PAGE:en.b:Category:Egypt DATE:2016-10-18
|
|
||||||
if (include_ttl == null) {
|
|
||||||
Gfo_usr_dlg_.Instance.Log_many("", "", "category title is invalid; wiki=~{0} page=~{1} ttl=~{2}", wiki.Domain_str(), itm.Page_ttl(), include);
|
|
||||||
continue; // NOTE: must ignore invalid args; EX: "{{{2}}}" is ignored but "missing_category" is not
|
|
||||||
}
|
|
||||||
|
|
||||||
cur_regy.Clear(); del_list.Clear();
|
|
||||||
Find_pages_in_ctg(cur_regy, wiki, load_mgr, itm.Page_ttl(), tmp_page, tmp_id, include_ttl);
|
|
||||||
Del_old_pages_not_in_cur(i, tmp_id, old_regy, cur_regy, del_list);
|
|
||||||
Add_cur_pages_also_in_old(i, tmp_id, old_regy, cur_regy, new_regy, exclude_pages, ns_filter);
|
|
||||||
old_regy = new_regy;
|
|
||||||
new_regy = Ordered_hash_.New();
|
|
||||||
}
|
|
||||||
int pages_len = old_regy.Count();
|
|
||||||
for (int i = 0; i < pages_len; i++) { // loop over old and create pages
|
|
||||||
Int_obj_ref old_id = (Int_obj_ref)old_regy.Get_at(i);
|
|
||||||
rv.Add(new Xowd_page_itm().Id_(old_id.Val()));
|
|
||||||
}
|
|
||||||
wiki.Db_mgr().Load_mgr().Load_by_ids(Cancelable_.Never, rv, 0, pages_len);
|
|
||||||
rv.Sort_by(Xowd_page_itm_sorter.IdAsc);
|
|
||||||
}
|
|
||||||
private static void Find_excludes(Ordered_hash exclude_pages, Xowe_wiki wiki, Xodb_load_mgr load_mgr, byte[] page_ttl, Xowd_page_itm tmp_page, Int_obj_ref tmp_id, List_adp exclude_ctgs) {
|
|
||||||
if (exclude_ctgs == null) return;
|
|
||||||
int exclude_ctgs_len = exclude_ctgs.Count();
|
|
||||||
for (int i = 0; i < exclude_ctgs_len; i++) {
|
|
||||||
byte[] exclude_ctg = (byte[])exclude_ctgs.Get_at(i);
|
|
||||||
Xoa_ttl exclude_ttl = wiki.Ttl_parse(gplx.xowa.wikis.nss.Xow_ns_.Tid__category, exclude_ctg);
|
|
||||||
if (exclude_ttl != null)
|
|
||||||
Find_pages_in_ctg(exclude_pages, wiki, load_mgr, page_ttl, tmp_page, tmp_id, exclude_ttl);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
private static void Find_pages_in_ctg(Ordered_hash rv, Xowe_wiki wiki, Xodb_load_mgr load_mgr, byte[] page_ttl, Xowd_page_itm tmp_page, Int_obj_ref tmp_id, Xoa_ttl cat_ttl) {
|
|
||||||
Xoctg_catpage_ctg ctg = wiki.Ctg__catpage_mgr().Get_or_load_or_null(page_ttl, Xoctg_catpage_url.New__blank(), cat_ttl, Int_.Max_value);
|
|
||||||
if (ctg == null) return;
|
|
||||||
|
|
||||||
// loop grps to get grp
|
|
||||||
for (byte ctg_tid = 0; ctg_tid < Xoa_ctg_mgr.Tid___max; ++ctg_tid) {
|
|
||||||
Xoctg_catpage_grp ctg_grp = ctg.Grp_by_tid(ctg_tid);
|
|
||||||
int itms_len = ctg_grp.Itms__len();
|
|
||||||
|
|
||||||
// loop itms in grp and add to hash
|
|
||||||
for (int i = 0; i < itms_len; ++i) {
|
|
||||||
Xoctg_catpage_itm ctg_itm = ctg_grp.Itms__get_at(i);
|
|
||||||
int itm_page_id = ctg_itm.Page_id();
|
|
||||||
if (rv.Has(tmp_id.Val_(itm_page_id))) continue;
|
|
||||||
rv.Add(Int_obj_ref.New(itm_page_id), ctg_itm);
|
|
||||||
|
|
||||||
// DELETE: recurse subcategories; PAGE:en.b:XML DATE:2016-09-18
|
|
||||||
// if (ctg_tid == Xoa_ctg_mgr.Tid__subc) {
|
|
||||||
// load_mgr.Load_by_id(tmp_page, itm_page_id);
|
|
||||||
// Find_pages_in_ctg(rv, wiki, load_mgr, tmp_page, tmp_id, tmp_page.Ttl_page_db());
|
|
||||||
// }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
private static void Del_old_pages_not_in_cur(int i, Int_obj_ref tmp_id, Ordered_hash old_regy, Ordered_hash cur_regy, List_adp del_list) {
|
|
||||||
if (i == 0) return; // skip logic for first ctg (which doesn't have a predecessor)
|
|
||||||
int old_len = old_regy.Count();
|
|
||||||
for (int j = 0; j < old_len; j++) { // if cur is not in new, del it
|
|
||||||
Int_obj_ref old_id = (Int_obj_ref)old_regy.Get_at(j);
|
|
||||||
if (!cur_regy.Has(tmp_id.Val_(old_id.Val()))) // old_itm does not exist in cur_regy
|
|
||||||
del_list.Add(old_id); // remove; EX: (A,B) in old; B only in cur; old should now be (A) only
|
|
||||||
}
|
|
||||||
int del_len = del_list.Count();
|
|
||||||
for (int j = 0; j < del_len; j++) {
|
|
||||||
Int_obj_ref old_itm = (Int_obj_ref)del_list.Get_at(j);
|
|
||||||
old_regy.Del(tmp_id.Val_(old_itm.Val()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
private static void Add_cur_pages_also_in_old(int i, Int_obj_ref tmp_id, Ordered_hash old_regy, Ordered_hash cur_regy, Ordered_hash new_regy, Ordered_hash exclude_pages, int ns_filter) {
|
|
||||||
int found_len = cur_regy.Count();
|
|
||||||
for (int j = 0; j < found_len; j++) { // if new_page is in cur, add it
|
|
||||||
Xoctg_catpage_itm cur_itm = (Xoctg_catpage_itm)cur_regy.Get_at(j);
|
|
||||||
Xoa_ttl cur_ttl = cur_itm.Page_ttl(); if (cur_ttl == null) continue;
|
|
||||||
if (ns_filter != Dpl_itm.Ns_filter_null && ns_filter != cur_ttl.Ns().Id()) continue;
|
|
||||||
tmp_id.Val_(cur_itm.Page_id()); // set tmp_id, since it will be used at least once
|
|
||||||
if (exclude_pages.Has(tmp_id)) continue; // ignore excluded pages
|
|
||||||
if (i != 0) { // skip logic for first ctg (which doesn't have a predecessor)
|
|
||||||
if (!old_regy.Has(tmp_id)) continue; // cur_itm not in old_regy; ignore
|
|
||||||
}
|
|
||||||
new_regy.Add_as_key_and_val(Int_obj_ref.New(cur_itm.Page_id()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
Loading…
Reference in New Issue
Block a user