diff --git a/400_xowa/src/gplx/xowa/addons/wikis/ctgs/htmls/catpages/Xoctg_catpage_mgr.java b/400_xowa/src/gplx/xowa/addons/wikis/ctgs/htmls/catpages/Xoctg_catpage_mgr.java index 6a46be854..c3f28375f 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/ctgs/htmls/catpages/Xoctg_catpage_mgr.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/ctgs/htmls/catpages/Xoctg_catpage_mgr.java @@ -49,27 +49,30 @@ public class Xoctg_catpage_mgr implements Gfo_invk { wiki.App().Cfg().Bind_many_wiki(this, wiki, Cfg__missing_class); } public void Free_mem_all() {cache.Clear();} - public Xoctg_catpage_ctg Get_or_load_or_null(byte[] page_ttl, Xoctg_catpage_url catpage_url, Xoa_ttl cat_ttl, int limit) { - // load categories from cat dbs; exit if not found + public Xoctg_catpage_ctg Get_by_cache_or_null(byte[] page_ttl, Xoctg_catpage_url catpage_url, Xoa_ttl cat_ttl, int limit) { + // DynamicPageList categories only (b/c of many members); for regular catpages, always retrieve on demand Xoctg_catpage_ctg ctg = (Xoctg_catpage_ctg)cache.Get_by(cat_ttl.Full_db()); if (ctg == null) { if (gplx.core.envs.Env_.Mode_testing()) return null; // needed for dpl test - synchronized (thread_lock) { // LOCK:used by multiple wrks; DATE:2016-09-12 - ctg = loader.Load_ctg_or_null(wiki, page_ttl, this, catpage_url, cat_ttl, limit); - } + ctg = Get_by_db_or_null(page_ttl, catpage_url, cat_ttl, limit); if (ctg == null) return null; // not in cache or db; exit - if (limit == Int_.Max_value) // only add to cache if Max_val (DynamicPageList); for regular catpages, always retrieve on demand - cache.Add(cat_ttl.Full_db(), ctg); + cache.Add(cat_ttl.Full_db(), ctg); } return ctg; } + public Xoctg_catpage_ctg Get_by_db_or_null(byte[] page_ttl, Xoctg_catpage_url catpage_url, Xoa_ttl cat_ttl, int limit) { + // load categories from cat dbs; exit if not found + synchronized (thread_lock) { // LOCK:used by multiple wrks; DATE:2016-09-12 + return loader.Load_ctg_or_null(wiki, page_ttl, this, catpage_url, cat_ttl, limit); + } + } public void Write_catpage(Bry_bfr bfr, Xoa_page page) { try { // get catpage_url Xoctg_catpage_url catpage_url = Xoctg_catpage_url_parser.Parse(page.Url()); // load categories from cat dbs; exit if not found - Xoctg_catpage_ctg ctg = Get_or_load_or_null(page.Ttl().Page_db(), catpage_url, page.Ttl(), grp_max); + Xoctg_catpage_ctg ctg = Get_by_db_or_null(page.Ttl().Page_db(), catpage_url, page.Ttl(), grp_max); if (ctg == null) return; // write html diff --git a/400_xowa/src/gplx/xowa/addons/wikis/ctgs/htmls/catpages/dbs/Xoctg_catlink_loader.java b/400_xowa/src/gplx/xowa/addons/wikis/ctgs/htmls/catpages/dbs/Xoctg_catlink_loader.java index 8715fe30d..f4567d959 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/ctgs/htmls/catpages/dbs/Xoctg_catlink_loader.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/ctgs/htmls/catpages/dbs/Xoctg_catlink_loader.java @@ -45,8 +45,7 @@ class Xoctg_catlink_loader { // sort and reduce list to 200 total catlink_list.Sort_by(new Xoctg_catlink_sorter(url_is_from)); - Xoctg_page_loader catlink_loader = new Xoctg_page_loader(wiki); - Ordered_hash catlink_hash = catlink_loader.Hash(); + Ordered_hash catlink_hash = Ordered_hash_.New(); int catlink_list_len = catlink_list.Len(); int max = catlink_list_len < limit ? catlink_list_len : limit; for (int i = 0; i < max; i++) { @@ -55,7 +54,6 @@ class Xoctg_catlink_loader { } // load ns / ttl for each catlink - page_tbl.Select_in__id(catlink_loader); Xoctg_catpage_grp grp = rv.Grp_by_tid(grp_tid); grp.Itms_((Xoctg_catpage_itm[])catlink_hash.To_ary_and_clear(Xoctg_catpage_itm.class)); @@ -93,7 +91,10 @@ class Xoctg_catlink_loader { , ", cl_type_id" , ", {0} AS cl_sortkey" , ", {1} AS cl_sortkey_prefix" + , ", p.page_namespace" + , ", p.page_title" , "FROM cat_link cl{2}" + , " LEFT JOIN page p ON p.page_id = cl{2}.cl_from" ), sortkey_col, sortkey_prefix_fld, sortkey_join, link_db_id); bfr.Add_str_u8_fmt(String_.Concat_lines_nl ( "WHERE cl_to_id = {0}" @@ -108,12 +109,15 @@ class Xoctg_catlink_loader { } private void Load_catlinks(List_adp catlink_list, String sql) { Db_rdr rdr = Db_rdr_.Empty; + int count = 0; try { attach_mgr.Attach(); rdr = attach_mgr.Conn_main().Stmt_sql(sql).Exec_select__rls_auto(); while (rdr.Move_next()) { - Xoctg_catpage_itm itm = Xoctg_catpage_itm.New_by_rdr(rdr, version); + Xoctg_catpage_itm itm = Xoctg_catpage_itm.New_by_rdr(wiki, rdr, version); catlink_list.Add(itm); + if (count >= 1000 && (count % 1000) == 0) Gfo_usr_dlg_.Instance.Prog_many("", "", "loading cat_links: count=~{0}", count); + count++; } } finally { @@ -197,6 +201,10 @@ class Xoctg_catlink_loader { version = 3; db_1st = cat_core_conn; } + + // add page_db + db_list.Add(new Db_attach_itm("page_db", page_tbl.Conn())); + Db_attach_mgr attach_mgr = new Db_attach_mgr(db_1st, (Db_attach_itm[])db_list.To_ary_and_clear(Db_attach_itm.class)); return new Xoctg_catlink_loader(wiki, catpage_mgr, page_tbl, version, link_dbs_len, attach_mgr); } diff --git a/400_xowa/src/gplx/xowa/addons/wikis/ctgs/htmls/catpages/dbs/Xoctg_page_loader.java b/400_xowa/src/gplx/xowa/addons/wikis/ctgs/htmls/catpages/dbs/Xoctg_page_loader.java deleted file mode 100644 index 63b1514d9..000000000 --- a/400_xowa/src/gplx/xowa/addons/wikis/ctgs/htmls/catpages/dbs/Xoctg_page_loader.java +++ /dev/null @@ -1,40 +0,0 @@ -/* -XOWA: the XOWA Offline Wiki Application -Copyright (C) 2012-2017 gnosygnu@gmail.com - -XOWA is licensed under the terms of the General Public License (GPL) Version 3, -or alternatively under the terms of the Apache License Version 2.0. - -You may use XOWA according to either of these licenses as is most appropriate -for your project on a case-by-case basis. - -The terms of each license can be found in the source code repository: - -GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt -Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt -*/ -package gplx.xowa.addons.wikis.ctgs.htmls.catpages.dbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.ctgs.*; import gplx.xowa.addons.wikis.ctgs.htmls.*; import gplx.xowa.addons.wikis.ctgs.htmls.catpages.*; -import gplx.dbs.*; import gplx.xowa.wikis.data.tbls.*; -import gplx.xowa.addons.wikis.ctgs.htmls.catpages.doms.*; -public class Xoctg_page_loader implements Select_in_cbk { - private final Xow_wiki wiki; - private final Ordered_hash hash = Ordered_hash_.New(); - public Xoctg_page_loader(Xow_wiki wiki) {this.wiki = wiki;} - public Ordered_hash Hash() {return hash;} - public int Hash_max() {return hash.Len();} - public void Write_sql(Bry_bfr bfr, int idx) { - Xoctg_catpage_itm itm = (Xoctg_catpage_itm)hash.Get_at(idx); - bfr.Add_int_variable(itm.Page_id()); - } - public void Read_data(Db_rdr rdr) { - // read values from page_tbl - int page_id = rdr.Read_int("page_id"); - int page_ns = rdr.Read_int("page_namespace"); - byte[] page_ttl = rdr.Read_bry_by_str("page_title"); - - // get itm and set data - Xoctg_catpage_itm itm = (Xoctg_catpage_itm)hash.Get_by(page_id); - if (itm == null) return; // NOTE: itms can exist in cat_links_tbl, but not in page_tbl; EX:User:Any_page - itm.Page_ttl_(wiki.Ttl_parse(page_ns, page_ttl)); - } -} diff --git a/400_xowa/src/gplx/xowa/addons/wikis/ctgs/htmls/catpages/doms/Xoctg_catpage_itm.java b/400_xowa/src/gplx/xowa/addons/wikis/ctgs/htmls/catpages/doms/Xoctg_catpage_itm.java index 5c335ee81..2693f8a5d 100644 --- a/400_xowa/src/gplx/xowa/addons/wikis/ctgs/htmls/catpages/doms/Xoctg_catpage_itm.java +++ b/400_xowa/src/gplx/xowa/addons/wikis/ctgs/htmls/catpages/doms/Xoctg_catpage_itm.java @@ -76,7 +76,7 @@ public class Xoctg_catpage_itm { } public static final Xoctg_catpage_itm[] Ary_empty = new Xoctg_catpage_itm[0]; - public static Xoctg_catpage_itm New_by_rdr(Db_rdr rdr, byte version) { + public static Xoctg_catpage_itm New_by_rdr(Xow_wiki wiki, Db_rdr rdr, byte version) { byte[] sortkey_binary = Bry_.Empty; byte[] sortkey_prefix = Bry_.Empty; if (version == Version__4) { @@ -87,7 +87,15 @@ public class Xoctg_catpage_itm { sortkey_binary = Bry_.Empty; sortkey_prefix = rdr.Read_bry_by_str("cl_sortkey"); } - return new Xoctg_catpage_itm(version, rdr.Read_byte("cl_type_id"), rdr.Read_int("cl_from"), sortkey_prefix, sortkey_binary); + Xoctg_catpage_itm rv = new Xoctg_catpage_itm(version, rdr.Read_byte("cl_type_id"), rdr.Read_int("cl_from"), sortkey_prefix, sortkey_binary); + + if (version == Version__4) { + String ttl_str = rdr.Read_str("page_title"); + if (ttl_str != null) {// NOTE: ttl_str will be NULL if LEFT JOIN fails on page_db.page + rv.Page_ttl_(wiki.Ttl_parse(rdr.Read_int("page_namespace"), Bry_.new_u8(ttl_str))); + } + } + return rv; } public static Xoctg_catpage_itm New_by_ttl(byte grp_tid, int page_id, Xoa_ttl ttl) { // TEST Xoctg_catpage_itm rv = new Xoctg_catpage_itm(Version__4, grp_tid, page_id, ttl.Page_txt(), Bry_.Empty); diff --git a/400_xowa/src/gplx/xowa/xtns/dynamicPageList/Dpl_itm.java b/400_xowa/src/gplx/xowa/xtns/dynamicPageList/Dpl_itm.java index ccbef7ef1..4a62bac21 100644 --- a/400_xowa/src/gplx/xowa/xtns/dynamicPageList/Dpl_itm.java +++ b/400_xowa/src/gplx/xowa/xtns/dynamicPageList/Dpl_itm.java @@ -40,7 +40,7 @@ class Dpl_itm { public byte Quality_pages() {return quality_pages;} private byte quality_pages; public byte Stable_pages() {return stable_pages;} private byte stable_pages; private Xop_ctx sub_ctx; private Xop_tkn_mkr sub_tkn_mkr; private Xop_root_tkn sub_root; - public void Parse(Xowe_wiki wiki, Xop_ctx ctx, byte[] page_ttl, byte[] src, Xop_xnde_tkn xnde) { // parse kvps in xnde; EX:category=abc\nredirects=y\n + private void Parse_src(Xowe_wiki wiki, Xop_ctx ctx, byte[] page_ttl, byte[] src, Xop_xnde_tkn xnde) { // parse kvps in xnde; EX:category=abc\nredirects=y\n this.page_ttl = page_ttl; sub_ctx = Xop_ctx.New__sub__reuse_page(ctx); sub_tkn_mkr = sub_ctx.Tkn_mkr(); @@ -175,6 +175,11 @@ class Dpl_itm { // boolean ctg_date = false, ctg_date_strip = false; // byte[] ns_include = null; // byte[] ctg_date_fmt; + public static Dpl_itm Parse(Xowe_wiki wiki, Xop_ctx ctx, byte[] page_ttl, byte[] src, Xop_xnde_tkn xnde) { + Dpl_itm rv = new Dpl_itm(); + rv.Parse_src(wiki, ctx, page_ttl, src, xnde); + return rv; + } } class Dpl_stable_tid { public static final byte Tid_null = 0, Tid_include = 1, Tid_only = 2, Tid_exclude = 3; diff --git a/400_xowa/src/gplx/xowa/xtns/dynamicPageList/Dpl_page_finder.java b/400_xowa/src/gplx/xowa/xtns/dynamicPageList/Dpl_page_finder.java new file mode 100644 index 000000000..41a2d02da --- /dev/null +++ b/400_xowa/src/gplx/xowa/xtns/dynamicPageList/Dpl_page_finder.java @@ -0,0 +1,144 @@ +/* +XOWA: the XOWA Offline Wiki Application +Copyright (C) 2012-2017 gnosygnu@gmail.com + +XOWA is licensed under the terms of the General Public License (GPL) Version 3, +or alternatively under the terms of the Apache License Version 2.0. + +You may use XOWA according to either of these licenses as is most appropriate +for your project on a case-by-case basis. + +The terms of each license can be found in the source code repository: + +GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt +Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt +*/ +package gplx.xowa.xtns.dynamicPageList; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*; +import gplx.core.primitives.*; import gplx.core.lists.*; +import gplx.xowa.wikis.dbs.*; +import gplx.xowa.wikis.data.tbls.*; +import gplx.xowa.addons.wikis.ctgs.*; import gplx.xowa.addons.wikis.ctgs.htmls.catpages.doms.*; import gplx.xowa.addons.wikis.ctgs.htmls.catpages.urls.*; +class Dpl_page_finder { + private final Dpl_itm itm; + private final Xowe_wiki wiki; + + public Dpl_page_finder(Dpl_itm itm, Xowe_wiki wiki) { + this.itm = itm; + this.wiki = wiki; + } + public Ordered_hash Find() { + // get include_ttls + List_adp include_ttls = itm.Ctg_includes(); + if (include_ttls == null) return Ordered_hash_.New(); // exit early if none exists + + // get exclude_pages + Ordered_hash exclude_pages = Get_exclude_pages(itm.Ctg_excludes()); + + // init vars for loop below + int itm_ns_filter = itm.Ns_filter(); + List_adp remove_list = List_adp_.New(); + Int_obj_ref tmp_id = Int_obj_ref.New_zero(); + + // get include_pags; note that this is a UNION of all member pages; EX: include_ttls=Ctg_A,Ctg_B,Ctg_C will only return pages in Ctg_A AND Ctg_B AND Ctg_C + Ordered_hash rv = Ordered_hash_.New(); + int len = include_ttls.Len(); + for (int i = 0; i < len; i++) { + // get ttl + Xoa_ttl ttl = Get_ctg_ttl_or_null(include_ttls, i); + if (ttl == null) continue; + + // get pages + Ordered_hash cur_pages = Ordered_hash_.New(); + Find_pages_in_ctg(cur_pages, itm.Page_ttl(), ttl); + + // identify pages (a) not in previous list; (b) excluded; (c) ns_filter + remove_list.Clear(); + int cur_len = cur_pages.Len(); + for (int j = 0; j < cur_len; j++) { + // get item and init tmp + Xowd_page_itm page_itm = (Xowd_page_itm)cur_pages.Get_at(j); + tmp_id.Val_(page_itm.Id()); + + // check if should be removed + if ( (i != 0 && !rv.Has(tmp_id)) // item doesn't exist in previous set; note this doesn't apply to the 0th set + || exclude_pages.Has(tmp_id) // item is marked as excluded + || itm_ns_filter != Dpl_itm.Ns_filter_null && itm_ns_filter != page_itm.Ns().Id() // item does not match specified filter + ) { + remove_list.Add(page_itm); + } + } + + // remove pages + int remove_len = remove_list.Len(); + for (int j = 0; j < remove_len; j++) { + Xowd_page_itm page_itm = (Xowd_page_itm)remove_list.Get_at(j); + cur_pages.Del(tmp_id.Val_(page_itm.Id())); + } + + // set cur_pages as main list + rv = cur_pages; + } + + // sorting + rv.Sort_by + ( itm.Sort_ascending() == Bool_.__byte + ? (ComparerAble)Xowd_page_itm_sorter.IdAsc // sort not specified; use default; + : (ComparerAble)new Dpl_page_sorter(itm)); // sort specified + return rv; + } + private Ordered_hash Get_exclude_pages(List_adp ttls) { + Ordered_hash rv = Ordered_hash_.New(); + + // return empty hash if no ttls + if (ttls == null) + return rv; + + // loop exclude ttls + int len = ttls.Count(); + for (int i = 0; i < len; i++) { + Xoa_ttl ttl = Get_ctg_ttl_or_null(ttls, i); + if (ttl == null) continue; + Find_pages_in_ctg(rv, itm.Page_ttl(), ttl); + } + return rv; + } + private void Find_pages_in_ctg(Ordered_hash rv, byte[] page_ttl, Xoa_ttl cat_ttl) { + // get ctg + Xoctg_catpage_ctg ctg = wiki.Ctg__catpage_mgr().Get_by_cache_or_null(page_ttl, Xoctg_catpage_url.New__blank(), cat_ttl, Int_.Max_value); + if (ctg == null) return; + + // loop grps to get each page + Int_obj_ref tmp_id = Int_obj_ref.New_zero(); + for (byte tid = 0; tid < Xoa_ctg_mgr.Tid___max; tid++) { + // get grp; EX: subc; page; file + Xoctg_catpage_grp grp = ctg.Grp_by_tid(tid); + + // loop itms in grp and add to hash + int len = grp.Itms__len(); + for (int i = 0; i < len; i++) { + Xoctg_catpage_itm itm = grp.Itms__get_at(i); + int itm_page_id = itm.Page_id(); + + if (rv.Has(tmp_id.Val_(itm_page_id))) continue; // check to make sure not already added + + Xowd_page_itm page = new Xowd_page_itm(); + if (itm.Page_ttl() == null) continue; // cat_link can exist without entry in page_db.page + page.Id_(itm_page_id); + page.Ttl_(itm.Page_ttl()); + rv.Add(Int_obj_ref.New(itm_page_id), page); + } + } + } + + private Xoa_ttl Get_ctg_ttl_or_null(List_adp list, int i) {// helper method to extract ttl from list + // get ttl + byte[] ttl_bry = (byte[])list.Get_at(i); + Xoa_ttl ttl = wiki.Ttl_parse(gplx.xowa.wikis.nss.Xow_ns_.Tid__category, ttl_bry); + + // log if invalid; NOTE: pages in en.n will pass "{{{2}}}" as category title; PAGE:en.b:Category:Egypt DATE:2016-10-18 + if (ttl == null) { + Gfo_usr_dlg_.Instance.Log_many("", "", "category title is invalid; wiki=~{0} page=~{1} ttl=~{2}", wiki.Domain_str(), itm.Page_ttl(), ttl_bry); + } + return ttl; + } +} diff --git a/400_xowa/src/gplx/xowa/xtns/dynamicPageList/Dpl_xnde.java b/400_xowa/src/gplx/xowa/xtns/dynamicPageList/Dpl_xnde.java index 88c87b0bd..46fc98409 100644 --- a/400_xowa/src/gplx/xowa/xtns/dynamicPageList/Dpl_xnde.java +++ b/400_xowa/src/gplx/xowa/xtns/dynamicPageList/Dpl_xnde.java @@ -20,13 +20,11 @@ import gplx.xowa.wikis.dbs.*; import gplx.xowa.addons.wikis.ctgs.*; import gplx. import gplx.xowa.parsers.*; import gplx.xowa.parsers.xndes.*; import gplx.xowa.parsers.htmls.*; import gplx.xowa.parsers.amps.*; import gplx.xowa.addons.wikis.ctgs.htmls.catpages.doms.*; import gplx.xowa.addons.wikis.ctgs.htmls.catpages.urls.*; public class Dpl_xnde implements Xox_xnde { - private Dpl_itm itm = new Dpl_itm(); private List_adp pages = List_adp_.New(); + private Dpl_itm itm; private Ordered_hash pages; public void Xatr__set(Xowe_wiki wiki, byte[] src, Mwh_atr_itm xatr, Object xatr_id_obj) {} // NOTE: has no attributes public void Xtn_parse(Xowe_wiki wiki, Xop_ctx ctx, Xop_root_tkn root, byte[] src, Xop_xnde_tkn xnde) { - itm.Parse(wiki, ctx, ctx.Page().Ttl().Full_txt_w_ttl_case(), src, xnde); - Dpl_page_finder.Find_pages(pages, wiki, itm); - if (itm.Sort_ascending() != Bool_.__byte) - pages.Sort_by(new Dpl_page_sorter(itm)); + itm = Dpl_itm.Parse(wiki, ctx, ctx.Page().Ttl().Full_txt_w_ttl_case(), src, xnde); + pages = new Dpl_page_finder(itm, wiki).Find(); } public void Xtn_write(Bry_bfr bfr, Xoae_app app, Xop_ctx ctx, Xoh_html_wtr html_wtr, Xoh_wtr_ctx hctx, Xoae_page wpg, Xop_xnde_tkn xnde, byte[] src) { Xowe_wiki wiki = ctx.Wiki(); @@ -71,109 +69,11 @@ public class Dpl_xnde implements Xox_xnde { } } bfr.Add(html_mode.Grp_end()).Add_byte_nl(); - } finally {tmp_bfr.Mkr_rls();} - } - private static byte[] Bry_nofollow = Bry_.new_a7(" rel=\"nofollow\""); -} -class Dpl_page_finder { - public static void Find_pages(List_adp rv, Xowe_wiki wiki, Dpl_itm itm) { - rv.Clear(); - List_adp includes = itm.Ctg_includes(); if (includes == null) return; - int includes_len = includes.Count(); - Ordered_hash old_regy = Ordered_hash_.New(), new_regy = Ordered_hash_.New(), cur_regy = Ordered_hash_.New(); - Xodb_load_mgr load_mgr = wiki.Db_mgr().Load_mgr(); - Xowd_page_itm tmp_page = new Xowd_page_itm(); - Int_obj_ref tmp_id = Int_obj_ref.New_zero(); - List_adp del_list = List_adp_.New(); - int ns_filter = itm.Ns_filter(); - Ordered_hash exclude_pages = Ordered_hash_.New(); - Find_excludes(exclude_pages, wiki, load_mgr, itm.Page_ttl(), tmp_page, tmp_id, itm.Ctg_excludes()); - - for (int i = 0; i < includes_len; i++) { // loop over includes - byte[] include = (byte[])includes.Get_at(i); - Xoa_ttl include_ttl = wiki.Ttl_parse(gplx.xowa.wikis.nss.Xow_ns_.Tid__category, include); - - // pages in en.n will pass "{{{2}}}" as category title; PAGE:en.b:Category:Egypt DATE:2016-10-18 - if (include_ttl == null) { - Gfo_usr_dlg_.Instance.Log_many("", "", "category title is invalid; wiki=~{0} page=~{1} ttl=~{2}", wiki.Domain_str(), itm.Page_ttl(), include); - continue; // NOTE: must ignore invalid args; EX: "{{{2}}}" is ignored but "missing_category" is not - } - - cur_regy.Clear(); del_list.Clear(); - Find_pages_in_ctg(cur_regy, wiki, load_mgr, itm.Page_ttl(), tmp_page, tmp_id, include_ttl); - Del_old_pages_not_in_cur(i, tmp_id, old_regy, cur_regy, del_list); - Add_cur_pages_also_in_old(i, tmp_id, old_regy, cur_regy, new_regy, exclude_pages, ns_filter); - old_regy = new_regy; - new_regy = Ordered_hash_.New(); - } - int pages_len = old_regy.Count(); - for (int i = 0; i < pages_len; i++) { // loop over old and create pages - Int_obj_ref old_id = (Int_obj_ref)old_regy.Get_at(i); - rv.Add(new Xowd_page_itm().Id_(old_id.Val())); - } - wiki.Db_mgr().Load_mgr().Load_by_ids(Cancelable_.Never, rv, 0, pages_len); - rv.Sort_by(Xowd_page_itm_sorter.IdAsc); - } - private static void Find_excludes(Ordered_hash exclude_pages, Xowe_wiki wiki, Xodb_load_mgr load_mgr, byte[] page_ttl, Xowd_page_itm tmp_page, Int_obj_ref tmp_id, List_adp exclude_ctgs) { - if (exclude_ctgs == null) return; - int exclude_ctgs_len = exclude_ctgs.Count(); - for (int i = 0; i < exclude_ctgs_len; i++) { - byte[] exclude_ctg = (byte[])exclude_ctgs.Get_at(i); - Xoa_ttl exclude_ttl = wiki.Ttl_parse(gplx.xowa.wikis.nss.Xow_ns_.Tid__category, exclude_ctg); - if (exclude_ttl != null) - Find_pages_in_ctg(exclude_pages, wiki, load_mgr, page_ttl, tmp_page, tmp_id, exclude_ttl); - } - } - private static void Find_pages_in_ctg(Ordered_hash rv, Xowe_wiki wiki, Xodb_load_mgr load_mgr, byte[] page_ttl, Xowd_page_itm tmp_page, Int_obj_ref tmp_id, Xoa_ttl cat_ttl) { - Xoctg_catpage_ctg ctg = wiki.Ctg__catpage_mgr().Get_or_load_or_null(page_ttl, Xoctg_catpage_url.New__blank(), cat_ttl, Int_.Max_value); - if (ctg == null) return; - - // loop grps to get grp - for (byte ctg_tid = 0; ctg_tid < Xoa_ctg_mgr.Tid___max; ++ctg_tid) { - Xoctg_catpage_grp ctg_grp = ctg.Grp_by_tid(ctg_tid); - int itms_len = ctg_grp.Itms__len(); - - // loop itms in grp and add to hash - for (int i = 0; i < itms_len; ++i) { - Xoctg_catpage_itm ctg_itm = ctg_grp.Itms__get_at(i); - int itm_page_id = ctg_itm.Page_id(); - if (rv.Has(tmp_id.Val_(itm_page_id))) continue; - rv.Add(Int_obj_ref.New(itm_page_id), ctg_itm); - - // DELETE: recurse subcategories; PAGE:en.b:XML DATE:2016-09-18 - // if (ctg_tid == Xoa_ctg_mgr.Tid__subc) { - // load_mgr.Load_by_id(tmp_page, itm_page_id); - // Find_pages_in_ctg(rv, wiki, load_mgr, tmp_page, tmp_id, tmp_page.Ttl_page_db()); - // } - } - } - } - private static void Del_old_pages_not_in_cur(int i, Int_obj_ref tmp_id, Ordered_hash old_regy, Ordered_hash cur_regy, List_adp del_list) { - if (i == 0) return; // skip logic for first ctg (which doesn't have a predecessor) - int old_len = old_regy.Count(); - for (int j = 0; j < old_len; j++) { // if cur is not in new, del it - Int_obj_ref old_id = (Int_obj_ref)old_regy.Get_at(j); - if (!cur_regy.Has(tmp_id.Val_(old_id.Val()))) // old_itm does not exist in cur_regy - del_list.Add(old_id); // remove; EX: (A,B) in old; B only in cur; old should now be (A) only - } - int del_len = del_list.Count(); - for (int j = 0; j < del_len; j++) { - Int_obj_ref old_itm = (Int_obj_ref)del_list.Get_at(j); - old_regy.Del(tmp_id.Val_(old_itm.Val())); - } - } - private static void Add_cur_pages_also_in_old(int i, Int_obj_ref tmp_id, Ordered_hash old_regy, Ordered_hash cur_regy, Ordered_hash new_regy, Ordered_hash exclude_pages, int ns_filter) { - int found_len = cur_regy.Count(); - for (int j = 0; j < found_len; j++) { // if new_page is in cur, add it - Xoctg_catpage_itm cur_itm = (Xoctg_catpage_itm)cur_regy.Get_at(j); - Xoa_ttl cur_ttl = cur_itm.Page_ttl(); if (cur_ttl == null) continue; - if (ns_filter != Dpl_itm.Ns_filter_null && ns_filter != cur_ttl.Ns().Id()) continue; - tmp_id.Val_(cur_itm.Page_id()); // set tmp_id, since it will be used at least once - if (exclude_pages.Has(tmp_id)) continue; // ignore excluded pages - if (i != 0) { // skip logic for first ctg (which doesn't have a predecessor) - if (!old_regy.Has(tmp_id)) continue; // cur_itm not in old_regy; ignore - } - new_regy.Add_as_key_and_val(Int_obj_ref.New(cur_itm.Page_id())); - } + } + finally { + tmp_bfr.Mkr_rls(); + pages.Clear(); // clear pages else out-of-memory error when Next 200 3 times on en.wiktionary.org/wiki/Category:English_lemmas; DATE:2019-08-25 + } } + private static byte[] Bry_nofollow = Bry_.new_a7(" rel=\"nofollow\""); }