Xtn.Dpl: Change dynamicPageList to always get categories from cache [#556]

pull/620/head
gnosygnu 5 years ago
parent 42842f0bcc
commit 2598dee844

@ -49,27 +49,30 @@ public class Xoctg_catpage_mgr implements Gfo_invk {
wiki.App().Cfg().Bind_many_wiki(this, wiki, Cfg__missing_class);
}
public void Free_mem_all() {cache.Clear();}
public Xoctg_catpage_ctg Get_or_load_or_null(byte[] page_ttl, Xoctg_catpage_url catpage_url, Xoa_ttl cat_ttl, int limit) {
// load categories from cat dbs; exit if not found
public Xoctg_catpage_ctg Get_by_cache_or_null(byte[] page_ttl, Xoctg_catpage_url catpage_url, Xoa_ttl cat_ttl, int limit) {
// DynamicPageList categories only (b/c of many members); for regular catpages, always retrieve on demand
Xoctg_catpage_ctg ctg = (Xoctg_catpage_ctg)cache.Get_by(cat_ttl.Full_db());
if (ctg == null) {
if (gplx.core.envs.Env_.Mode_testing()) return null; // needed for dpl test
synchronized (thread_lock) { // LOCK:used by multiple wrks; DATE:2016-09-12
ctg = loader.Load_ctg_or_null(wiki, page_ttl, this, catpage_url, cat_ttl, limit);
}
ctg = Get_by_db_or_null(page_ttl, catpage_url, cat_ttl, limit);
if (ctg == null) return null; // not in cache or db; exit
if (limit == Int_.Max_value) // only add to cache if Max_val (DynamicPageList); for regular catpages, always retrieve on demand
cache.Add(cat_ttl.Full_db(), ctg);
cache.Add(cat_ttl.Full_db(), ctg);
}
return ctg;
}
public Xoctg_catpage_ctg Get_by_db_or_null(byte[] page_ttl, Xoctg_catpage_url catpage_url, Xoa_ttl cat_ttl, int limit) {
// load categories from cat dbs; exit if not found
synchronized (thread_lock) { // LOCK:used by multiple wrks; DATE:2016-09-12
return loader.Load_ctg_or_null(wiki, page_ttl, this, catpage_url, cat_ttl, limit);
}
}
public void Write_catpage(Bry_bfr bfr, Xoa_page page) {
try {
// get catpage_url
Xoctg_catpage_url catpage_url = Xoctg_catpage_url_parser.Parse(page.Url());
// load categories from cat dbs; exit if not found
Xoctg_catpage_ctg ctg = Get_or_load_or_null(page.Ttl().Page_db(), catpage_url, page.Ttl(), grp_max);
Xoctg_catpage_ctg ctg = Get_by_db_or_null(page.Ttl().Page_db(), catpage_url, page.Ttl(), grp_max);
if (ctg == null) return;
// write html

@ -45,8 +45,7 @@ class Xoctg_catlink_loader {
// sort and reduce list to 200 total
catlink_list.Sort_by(new Xoctg_catlink_sorter(url_is_from));
Xoctg_page_loader catlink_loader = new Xoctg_page_loader(wiki);
Ordered_hash catlink_hash = catlink_loader.Hash();
Ordered_hash catlink_hash = Ordered_hash_.New();
int catlink_list_len = catlink_list.Len();
int max = catlink_list_len < limit ? catlink_list_len : limit;
for (int i = 0; i < max; i++) {
@ -55,7 +54,6 @@ class Xoctg_catlink_loader {
}
// load ns / ttl for each catlink
page_tbl.Select_in__id(catlink_loader);
Xoctg_catpage_grp grp = rv.Grp_by_tid(grp_tid);
grp.Itms_((Xoctg_catpage_itm[])catlink_hash.To_ary_and_clear(Xoctg_catpage_itm.class));
@ -93,7 +91,10 @@ class Xoctg_catlink_loader {
, ", cl_type_id"
, ", {0} AS cl_sortkey"
, ", {1} AS cl_sortkey_prefix"
, ", p.page_namespace"
, ", p.page_title"
, "FROM <link_db_{3}>cat_link cl{2}"
, " LEFT JOIN <page_db>page p ON p.page_id = cl{2}.cl_from"
), sortkey_col, sortkey_prefix_fld, sortkey_join, link_db_id);
bfr.Add_str_u8_fmt(String_.Concat_lines_nl
( "WHERE cl_to_id = {0}"
@ -108,12 +109,15 @@ class Xoctg_catlink_loader {
}
private void Load_catlinks(List_adp catlink_list, String sql) {
Db_rdr rdr = Db_rdr_.Empty;
int count = 0;
try {
attach_mgr.Attach();
rdr = attach_mgr.Conn_main().Stmt_sql(sql).Exec_select__rls_auto();
while (rdr.Move_next()) {
Xoctg_catpage_itm itm = Xoctg_catpage_itm.New_by_rdr(rdr, version);
Xoctg_catpage_itm itm = Xoctg_catpage_itm.New_by_rdr(wiki, rdr, version);
catlink_list.Add(itm);
if (count >= 1000 && (count % 1000) == 0) Gfo_usr_dlg_.Instance.Prog_many("", "", "loading cat_links: count=~{0}", count);
count++;
}
}
finally {
@ -197,6 +201,10 @@ class Xoctg_catlink_loader {
version = 3;
db_1st = cat_core_conn;
}
// add page_db
db_list.Add(new Db_attach_itm("page_db", page_tbl.Conn()));
Db_attach_mgr attach_mgr = new Db_attach_mgr(db_1st, (Db_attach_itm[])db_list.To_ary_and_clear(Db_attach_itm.class));
return new Xoctg_catlink_loader(wiki, catpage_mgr, page_tbl, version, link_dbs_len, attach_mgr);
}

@ -1,40 +0,0 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.addons.wikis.ctgs.htmls.catpages.dbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.ctgs.*; import gplx.xowa.addons.wikis.ctgs.htmls.*; import gplx.xowa.addons.wikis.ctgs.htmls.catpages.*;
import gplx.dbs.*; import gplx.xowa.wikis.data.tbls.*;
import gplx.xowa.addons.wikis.ctgs.htmls.catpages.doms.*;
public class Xoctg_page_loader implements Select_in_cbk {
private final Xow_wiki wiki;
private final Ordered_hash hash = Ordered_hash_.New();
public Xoctg_page_loader(Xow_wiki wiki) {this.wiki = wiki;}
public Ordered_hash Hash() {return hash;}
public int Hash_max() {return hash.Len();}
public void Write_sql(Bry_bfr bfr, int idx) {
Xoctg_catpage_itm itm = (Xoctg_catpage_itm)hash.Get_at(idx);
bfr.Add_int_variable(itm.Page_id());
}
public void Read_data(Db_rdr rdr) {
// read values from page_tbl
int page_id = rdr.Read_int("page_id");
int page_ns = rdr.Read_int("page_namespace");
byte[] page_ttl = rdr.Read_bry_by_str("page_title");
// get itm and set data
Xoctg_catpage_itm itm = (Xoctg_catpage_itm)hash.Get_by(page_id);
if (itm == null) return; // NOTE: itms can exist in cat_links_tbl, but not in page_tbl; EX:User:Any_page
itm.Page_ttl_(wiki.Ttl_parse(page_ns, page_ttl));
}
}

@ -76,7 +76,7 @@ public class Xoctg_catpage_itm {
}
public static final Xoctg_catpage_itm[] Ary_empty = new Xoctg_catpage_itm[0];
public static Xoctg_catpage_itm New_by_rdr(Db_rdr rdr, byte version) {
public static Xoctg_catpage_itm New_by_rdr(Xow_wiki wiki, Db_rdr rdr, byte version) {
byte[] sortkey_binary = Bry_.Empty;
byte[] sortkey_prefix = Bry_.Empty;
if (version == Version__4) {
@ -87,7 +87,15 @@ public class Xoctg_catpage_itm {
sortkey_binary = Bry_.Empty;
sortkey_prefix = rdr.Read_bry_by_str("cl_sortkey");
}
return new Xoctg_catpage_itm(version, rdr.Read_byte("cl_type_id"), rdr.Read_int("cl_from"), sortkey_prefix, sortkey_binary);
Xoctg_catpage_itm rv = new Xoctg_catpage_itm(version, rdr.Read_byte("cl_type_id"), rdr.Read_int("cl_from"), sortkey_prefix, sortkey_binary);
if (version == Version__4) {
String ttl_str = rdr.Read_str("page_title");
if (ttl_str != null) {// NOTE: ttl_str will be NULL if LEFT JOIN fails on page_db.page
rv.Page_ttl_(wiki.Ttl_parse(rdr.Read_int("page_namespace"), Bry_.new_u8(ttl_str)));
}
}
return rv;
}
public static Xoctg_catpage_itm New_by_ttl(byte grp_tid, int page_id, Xoa_ttl ttl) { // TEST
Xoctg_catpage_itm rv = new Xoctg_catpage_itm(Version__4, grp_tid, page_id, ttl.Page_txt(), Bry_.Empty);

@ -40,7 +40,7 @@ class Dpl_itm {
public byte Quality_pages() {return quality_pages;} private byte quality_pages;
public byte Stable_pages() {return stable_pages;} private byte stable_pages;
private Xop_ctx sub_ctx; private Xop_tkn_mkr sub_tkn_mkr; private Xop_root_tkn sub_root;
public void Parse(Xowe_wiki wiki, Xop_ctx ctx, byte[] page_ttl, byte[] src, Xop_xnde_tkn xnde) { // parse kvps in xnde; EX:<dpl>category=abc\nredirects=y\n</dpl>
private void Parse_src(Xowe_wiki wiki, Xop_ctx ctx, byte[] page_ttl, byte[] src, Xop_xnde_tkn xnde) { // parse kvps in xnde; EX:<dpl>category=abc\nredirects=y\n</dpl>
this.page_ttl = page_ttl;
sub_ctx = Xop_ctx.New__sub__reuse_page(ctx);
sub_tkn_mkr = sub_ctx.Tkn_mkr();
@ -175,6 +175,11 @@ class Dpl_itm {
// boolean ctg_date = false, ctg_date_strip = false;
// byte[] ns_include = null;
// byte[] ctg_date_fmt;
public static Dpl_itm Parse(Xowe_wiki wiki, Xop_ctx ctx, byte[] page_ttl, byte[] src, Xop_xnde_tkn xnde) {
Dpl_itm rv = new Dpl_itm();
rv.Parse_src(wiki, ctx, page_ttl, src, xnde);
return rv;
}
}
class Dpl_stable_tid {
public static final byte Tid_null = 0, Tid_include = 1, Tid_only = 2, Tid_exclude = 3;

@ -0,0 +1,144 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.dynamicPageList; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*;
import gplx.core.primitives.*; import gplx.core.lists.*;
import gplx.xowa.wikis.dbs.*;
import gplx.xowa.wikis.data.tbls.*;
import gplx.xowa.addons.wikis.ctgs.*; import gplx.xowa.addons.wikis.ctgs.htmls.catpages.doms.*; import gplx.xowa.addons.wikis.ctgs.htmls.catpages.urls.*;
class Dpl_page_finder {
private final Dpl_itm itm;
private final Xowe_wiki wiki;
public Dpl_page_finder(Dpl_itm itm, Xowe_wiki wiki) {
this.itm = itm;
this.wiki = wiki;
}
public Ordered_hash Find() {
// get include_ttls
List_adp include_ttls = itm.Ctg_includes();
if (include_ttls == null) return Ordered_hash_.New(); // exit early if none exists
// get exclude_pages
Ordered_hash exclude_pages = Get_exclude_pages(itm.Ctg_excludes());
// init vars for loop below
int itm_ns_filter = itm.Ns_filter();
List_adp remove_list = List_adp_.New();
Int_obj_ref tmp_id = Int_obj_ref.New_zero();
// get include_pags; note that this is a UNION of all member pages; EX: include_ttls=Ctg_A,Ctg_B,Ctg_C will only return pages in Ctg_A AND Ctg_B AND Ctg_C
Ordered_hash rv = Ordered_hash_.New();
int len = include_ttls.Len();
for (int i = 0; i < len; i++) {
// get ttl
Xoa_ttl ttl = Get_ctg_ttl_or_null(include_ttls, i);
if (ttl == null) continue;
// get pages
Ordered_hash cur_pages = Ordered_hash_.New();
Find_pages_in_ctg(cur_pages, itm.Page_ttl(), ttl);
// identify pages (a) not in previous list; (b) excluded; (c) ns_filter
remove_list.Clear();
int cur_len = cur_pages.Len();
for (int j = 0; j < cur_len; j++) {
// get item and init tmp
Xowd_page_itm page_itm = (Xowd_page_itm)cur_pages.Get_at(j);
tmp_id.Val_(page_itm.Id());
// check if should be removed
if ( (i != 0 && !rv.Has(tmp_id)) // item doesn't exist in previous set; note this doesn't apply to the 0th set
|| exclude_pages.Has(tmp_id) // item is marked as excluded
|| itm_ns_filter != Dpl_itm.Ns_filter_null && itm_ns_filter != page_itm.Ns().Id() // item does not match specified filter
) {
remove_list.Add(page_itm);
}
}
// remove pages
int remove_len = remove_list.Len();
for (int j = 0; j < remove_len; j++) {
Xowd_page_itm page_itm = (Xowd_page_itm)remove_list.Get_at(j);
cur_pages.Del(tmp_id.Val_(page_itm.Id()));
}
// set cur_pages as main list
rv = cur_pages;
}
// sorting
rv.Sort_by
( itm.Sort_ascending() == Bool_.__byte
? (ComparerAble)Xowd_page_itm_sorter.IdAsc // sort not specified; use default;
: (ComparerAble)new Dpl_page_sorter(itm)); // sort specified
return rv;
}
private Ordered_hash Get_exclude_pages(List_adp ttls) {
Ordered_hash rv = Ordered_hash_.New();
// return empty hash if no ttls
if (ttls == null)
return rv;
// loop exclude ttls
int len = ttls.Count();
for (int i = 0; i < len; i++) {
Xoa_ttl ttl = Get_ctg_ttl_or_null(ttls, i);
if (ttl == null) continue;
Find_pages_in_ctg(rv, itm.Page_ttl(), ttl);
}
return rv;
}
private void Find_pages_in_ctg(Ordered_hash rv, byte[] page_ttl, Xoa_ttl cat_ttl) {
// get ctg
Xoctg_catpage_ctg ctg = wiki.Ctg__catpage_mgr().Get_by_cache_or_null(page_ttl, Xoctg_catpage_url.New__blank(), cat_ttl, Int_.Max_value);
if (ctg == null) return;
// loop grps to get each page
Int_obj_ref tmp_id = Int_obj_ref.New_zero();
for (byte tid = 0; tid < Xoa_ctg_mgr.Tid___max; tid++) {
// get grp; EX: subc; page; file
Xoctg_catpage_grp grp = ctg.Grp_by_tid(tid);
// loop itms in grp and add to hash
int len = grp.Itms__len();
for (int i = 0; i < len; i++) {
Xoctg_catpage_itm itm = grp.Itms__get_at(i);
int itm_page_id = itm.Page_id();
if (rv.Has(tmp_id.Val_(itm_page_id))) continue; // check to make sure not already added
Xowd_page_itm page = new Xowd_page_itm();
if (itm.Page_ttl() == null) continue; // cat_link can exist without entry in page_db.page
page.Id_(itm_page_id);
page.Ttl_(itm.Page_ttl());
rv.Add(Int_obj_ref.New(itm_page_id), page);
}
}
}
private Xoa_ttl Get_ctg_ttl_or_null(List_adp list, int i) {// helper method to extract ttl from list
// get ttl
byte[] ttl_bry = (byte[])list.Get_at(i);
Xoa_ttl ttl = wiki.Ttl_parse(gplx.xowa.wikis.nss.Xow_ns_.Tid__category, ttl_bry);
// log if invalid; NOTE: pages in en.n will pass "{{{2}}}" as category title; PAGE:en.b:Category:Egypt DATE:2016-10-18
if (ttl == null) {
Gfo_usr_dlg_.Instance.Log_many("", "", "category title is invalid; wiki=~{0} page=~{1} ttl=~{2}", wiki.Domain_str(), itm.Page_ttl(), ttl_bry);
}
return ttl;
}
}

@ -20,13 +20,11 @@ import gplx.xowa.wikis.dbs.*; import gplx.xowa.addons.wikis.ctgs.*; import gplx.
import gplx.xowa.parsers.*; import gplx.xowa.parsers.xndes.*; import gplx.xowa.parsers.htmls.*; import gplx.xowa.parsers.amps.*;
import gplx.xowa.addons.wikis.ctgs.htmls.catpages.doms.*; import gplx.xowa.addons.wikis.ctgs.htmls.catpages.urls.*;
public class Dpl_xnde implements Xox_xnde {
private Dpl_itm itm = new Dpl_itm(); private List_adp pages = List_adp_.New();
private Dpl_itm itm; private Ordered_hash pages;
public void Xatr__set(Xowe_wiki wiki, byte[] src, Mwh_atr_itm xatr, Object xatr_id_obj) {} // NOTE: <dynamicPageList> has no attributes
public void Xtn_parse(Xowe_wiki wiki, Xop_ctx ctx, Xop_root_tkn root, byte[] src, Xop_xnde_tkn xnde) {
itm.Parse(wiki, ctx, ctx.Page().Ttl().Full_txt_w_ttl_case(), src, xnde);
Dpl_page_finder.Find_pages(pages, wiki, itm);
if (itm.Sort_ascending() != Bool_.__byte)
pages.Sort_by(new Dpl_page_sorter(itm));
itm = Dpl_itm.Parse(wiki, ctx, ctx.Page().Ttl().Full_txt_w_ttl_case(), src, xnde);
pages = new Dpl_page_finder(itm, wiki).Find();
}
public void Xtn_write(Bry_bfr bfr, Xoae_app app, Xop_ctx ctx, Xoh_html_wtr html_wtr, Xoh_wtr_ctx hctx, Xoae_page wpg, Xop_xnde_tkn xnde, byte[] src) {
Xowe_wiki wiki = ctx.Wiki();
@ -71,109 +69,11 @@ public class Dpl_xnde implements Xox_xnde {
}
}
bfr.Add(html_mode.Grp_end()).Add_byte_nl();
} finally {tmp_bfr.Mkr_rls();}
}
private static byte[] Bry_nofollow = Bry_.new_a7(" rel=\"nofollow\"");
}
class Dpl_page_finder {
public static void Find_pages(List_adp rv, Xowe_wiki wiki, Dpl_itm itm) {
rv.Clear();
List_adp includes = itm.Ctg_includes(); if (includes == null) return;
int includes_len = includes.Count();
Ordered_hash old_regy = Ordered_hash_.New(), new_regy = Ordered_hash_.New(), cur_regy = Ordered_hash_.New();
Xodb_load_mgr load_mgr = wiki.Db_mgr().Load_mgr();
Xowd_page_itm tmp_page = new Xowd_page_itm();
Int_obj_ref tmp_id = Int_obj_ref.New_zero();
List_adp del_list = List_adp_.New();
int ns_filter = itm.Ns_filter();
Ordered_hash exclude_pages = Ordered_hash_.New();
Find_excludes(exclude_pages, wiki, load_mgr, itm.Page_ttl(), tmp_page, tmp_id, itm.Ctg_excludes());
for (int i = 0; i < includes_len; i++) { // loop over includes
byte[] include = (byte[])includes.Get_at(i);
Xoa_ttl include_ttl = wiki.Ttl_parse(gplx.xowa.wikis.nss.Xow_ns_.Tid__category, include);
// pages in en.n will pass "{{{2}}}" as category title; PAGE:en.b:Category:Egypt DATE:2016-10-18
if (include_ttl == null) {
Gfo_usr_dlg_.Instance.Log_many("", "", "category title is invalid; wiki=~{0} page=~{1} ttl=~{2}", wiki.Domain_str(), itm.Page_ttl(), include);
continue; // NOTE: must ignore invalid args; EX: "{{{2}}}" is ignored but "missing_category" is not
}
cur_regy.Clear(); del_list.Clear();
Find_pages_in_ctg(cur_regy, wiki, load_mgr, itm.Page_ttl(), tmp_page, tmp_id, include_ttl);
Del_old_pages_not_in_cur(i, tmp_id, old_regy, cur_regy, del_list);
Add_cur_pages_also_in_old(i, tmp_id, old_regy, cur_regy, new_regy, exclude_pages, ns_filter);
old_regy = new_regy;
new_regy = Ordered_hash_.New();
}
int pages_len = old_regy.Count();
for (int i = 0; i < pages_len; i++) { // loop over old and create pages
Int_obj_ref old_id = (Int_obj_ref)old_regy.Get_at(i);
rv.Add(new Xowd_page_itm().Id_(old_id.Val()));
}
wiki.Db_mgr().Load_mgr().Load_by_ids(Cancelable_.Never, rv, 0, pages_len);
rv.Sort_by(Xowd_page_itm_sorter.IdAsc);
}
private static void Find_excludes(Ordered_hash exclude_pages, Xowe_wiki wiki, Xodb_load_mgr load_mgr, byte[] page_ttl, Xowd_page_itm tmp_page, Int_obj_ref tmp_id, List_adp exclude_ctgs) {
if (exclude_ctgs == null) return;
int exclude_ctgs_len = exclude_ctgs.Count();
for (int i = 0; i < exclude_ctgs_len; i++) {
byte[] exclude_ctg = (byte[])exclude_ctgs.Get_at(i);
Xoa_ttl exclude_ttl = wiki.Ttl_parse(gplx.xowa.wikis.nss.Xow_ns_.Tid__category, exclude_ctg);
if (exclude_ttl != null)
Find_pages_in_ctg(exclude_pages, wiki, load_mgr, page_ttl, tmp_page, tmp_id, exclude_ttl);
}
}
private static void Find_pages_in_ctg(Ordered_hash rv, Xowe_wiki wiki, Xodb_load_mgr load_mgr, byte[] page_ttl, Xowd_page_itm tmp_page, Int_obj_ref tmp_id, Xoa_ttl cat_ttl) {
Xoctg_catpage_ctg ctg = wiki.Ctg__catpage_mgr().Get_or_load_or_null(page_ttl, Xoctg_catpage_url.New__blank(), cat_ttl, Int_.Max_value);
if (ctg == null) return;
// loop grps to get grp
for (byte ctg_tid = 0; ctg_tid < Xoa_ctg_mgr.Tid___max; ++ctg_tid) {
Xoctg_catpage_grp ctg_grp = ctg.Grp_by_tid(ctg_tid);
int itms_len = ctg_grp.Itms__len();
// loop itms in grp and add to hash
for (int i = 0; i < itms_len; ++i) {
Xoctg_catpage_itm ctg_itm = ctg_grp.Itms__get_at(i);
int itm_page_id = ctg_itm.Page_id();
if (rv.Has(tmp_id.Val_(itm_page_id))) continue;
rv.Add(Int_obj_ref.New(itm_page_id), ctg_itm);
// DELETE: recurse subcategories; PAGE:en.b:XML DATE:2016-09-18
// if (ctg_tid == Xoa_ctg_mgr.Tid__subc) {
// load_mgr.Load_by_id(tmp_page, itm_page_id);
// Find_pages_in_ctg(rv, wiki, load_mgr, tmp_page, tmp_id, tmp_page.Ttl_page_db());
// }
}
}
}
private static void Del_old_pages_not_in_cur(int i, Int_obj_ref tmp_id, Ordered_hash old_regy, Ordered_hash cur_regy, List_adp del_list) {
if (i == 0) return; // skip logic for first ctg (which doesn't have a predecessor)
int old_len = old_regy.Count();
for (int j = 0; j < old_len; j++) { // if cur is not in new, del it
Int_obj_ref old_id = (Int_obj_ref)old_regy.Get_at(j);
if (!cur_regy.Has(tmp_id.Val_(old_id.Val()))) // old_itm does not exist in cur_regy
del_list.Add(old_id); // remove; EX: (A,B) in old; B only in cur; old should now be (A) only
}
int del_len = del_list.Count();
for (int j = 0; j < del_len; j++) {
Int_obj_ref old_itm = (Int_obj_ref)del_list.Get_at(j);
old_regy.Del(tmp_id.Val_(old_itm.Val()));
}
}
private static void Add_cur_pages_also_in_old(int i, Int_obj_ref tmp_id, Ordered_hash old_regy, Ordered_hash cur_regy, Ordered_hash new_regy, Ordered_hash exclude_pages, int ns_filter) {
int found_len = cur_regy.Count();
for (int j = 0; j < found_len; j++) { // if new_page is in cur, add it
Xoctg_catpage_itm cur_itm = (Xoctg_catpage_itm)cur_regy.Get_at(j);
Xoa_ttl cur_ttl = cur_itm.Page_ttl(); if (cur_ttl == null) continue;
if (ns_filter != Dpl_itm.Ns_filter_null && ns_filter != cur_ttl.Ns().Id()) continue;
tmp_id.Val_(cur_itm.Page_id()); // set tmp_id, since it will be used at least once
if (exclude_pages.Has(tmp_id)) continue; // ignore excluded pages
if (i != 0) { // skip logic for first ctg (which doesn't have a predecessor)
if (!old_regy.Has(tmp_id)) continue; // cur_itm not in old_regy; ignore
}
new_regy.Add_as_key_and_val(Int_obj_ref.New(cur_itm.Page_id()));
finally {
tmp_bfr.Mkr_rls();
pages.Clear(); // clear pages else out-of-memory error when Next 200 3 times on en.wiktionary.org/wiki/Category:English_lemmas; DATE:2019-08-25
}
}
private static byte[] Bry_nofollow = Bry_.new_a7(" rel=\"nofollow\"");
}

Loading…
Cancel
Save