mirror of
https://github.com/gnosygnu/xowa.git
synced 2024-10-27 20:34:16 +00:00
Xtn.Dpl: Change dynamicPageList to always get categories from cache [#556]
This commit is contained in:
parent
42842f0bcc
commit
2598dee844
@ -49,27 +49,30 @@ public class Xoctg_catpage_mgr implements Gfo_invk {
|
||||
wiki.App().Cfg().Bind_many_wiki(this, wiki, Cfg__missing_class);
|
||||
}
|
||||
public void Free_mem_all() {cache.Clear();}
|
||||
public Xoctg_catpage_ctg Get_or_load_or_null(byte[] page_ttl, Xoctg_catpage_url catpage_url, Xoa_ttl cat_ttl, int limit) {
|
||||
// load categories from cat dbs; exit if not found
|
||||
public Xoctg_catpage_ctg Get_by_cache_or_null(byte[] page_ttl, Xoctg_catpage_url catpage_url, Xoa_ttl cat_ttl, int limit) {
|
||||
// DynamicPageList categories only (b/c of many members); for regular catpages, always retrieve on demand
|
||||
Xoctg_catpage_ctg ctg = (Xoctg_catpage_ctg)cache.Get_by(cat_ttl.Full_db());
|
||||
if (ctg == null) {
|
||||
if (gplx.core.envs.Env_.Mode_testing()) return null; // needed for dpl test
|
||||
synchronized (thread_lock) { // LOCK:used by multiple wrks; DATE:2016-09-12
|
||||
ctg = loader.Load_ctg_or_null(wiki, page_ttl, this, catpage_url, cat_ttl, limit);
|
||||
}
|
||||
ctg = Get_by_db_or_null(page_ttl, catpage_url, cat_ttl, limit);
|
||||
if (ctg == null) return null; // not in cache or db; exit
|
||||
if (limit == Int_.Max_value) // only add to cache if Max_val (DynamicPageList); for regular catpages, always retrieve on demand
|
||||
cache.Add(cat_ttl.Full_db(), ctg);
|
||||
}
|
||||
return ctg;
|
||||
}
|
||||
public Xoctg_catpage_ctg Get_by_db_or_null(byte[] page_ttl, Xoctg_catpage_url catpage_url, Xoa_ttl cat_ttl, int limit) {
|
||||
// load categories from cat dbs; exit if not found
|
||||
synchronized (thread_lock) { // LOCK:used by multiple wrks; DATE:2016-09-12
|
||||
return loader.Load_ctg_or_null(wiki, page_ttl, this, catpage_url, cat_ttl, limit);
|
||||
}
|
||||
}
|
||||
public void Write_catpage(Bry_bfr bfr, Xoa_page page) {
|
||||
try {
|
||||
// get catpage_url
|
||||
Xoctg_catpage_url catpage_url = Xoctg_catpage_url_parser.Parse(page.Url());
|
||||
|
||||
// load categories from cat dbs; exit if not found
|
||||
Xoctg_catpage_ctg ctg = Get_or_load_or_null(page.Ttl().Page_db(), catpage_url, page.Ttl(), grp_max);
|
||||
Xoctg_catpage_ctg ctg = Get_by_db_or_null(page.Ttl().Page_db(), catpage_url, page.Ttl(), grp_max);
|
||||
if (ctg == null) return;
|
||||
|
||||
// write html
|
||||
|
@ -45,8 +45,7 @@ class Xoctg_catlink_loader {
|
||||
|
||||
// sort and reduce list to 200 total
|
||||
catlink_list.Sort_by(new Xoctg_catlink_sorter(url_is_from));
|
||||
Xoctg_page_loader catlink_loader = new Xoctg_page_loader(wiki);
|
||||
Ordered_hash catlink_hash = catlink_loader.Hash();
|
||||
Ordered_hash catlink_hash = Ordered_hash_.New();
|
||||
int catlink_list_len = catlink_list.Len();
|
||||
int max = catlink_list_len < limit ? catlink_list_len : limit;
|
||||
for (int i = 0; i < max; i++) {
|
||||
@ -55,7 +54,6 @@ class Xoctg_catlink_loader {
|
||||
}
|
||||
|
||||
// load ns / ttl for each catlink
|
||||
page_tbl.Select_in__id(catlink_loader);
|
||||
Xoctg_catpage_grp grp = rv.Grp_by_tid(grp_tid);
|
||||
grp.Itms_((Xoctg_catpage_itm[])catlink_hash.To_ary_and_clear(Xoctg_catpage_itm.class));
|
||||
|
||||
@ -93,7 +91,10 @@ class Xoctg_catlink_loader {
|
||||
, ", cl_type_id"
|
||||
, ", {0} AS cl_sortkey"
|
||||
, ", {1} AS cl_sortkey_prefix"
|
||||
, ", p.page_namespace"
|
||||
, ", p.page_title"
|
||||
, "FROM <link_db_{3}>cat_link cl{2}"
|
||||
, " LEFT JOIN <page_db>page p ON p.page_id = cl{2}.cl_from"
|
||||
), sortkey_col, sortkey_prefix_fld, sortkey_join, link_db_id);
|
||||
bfr.Add_str_u8_fmt(String_.Concat_lines_nl
|
||||
( "WHERE cl_to_id = {0}"
|
||||
@ -108,12 +109,15 @@ class Xoctg_catlink_loader {
|
||||
}
|
||||
private void Load_catlinks(List_adp catlink_list, String sql) {
|
||||
Db_rdr rdr = Db_rdr_.Empty;
|
||||
int count = 0;
|
||||
try {
|
||||
attach_mgr.Attach();
|
||||
rdr = attach_mgr.Conn_main().Stmt_sql(sql).Exec_select__rls_auto();
|
||||
while (rdr.Move_next()) {
|
||||
Xoctg_catpage_itm itm = Xoctg_catpage_itm.New_by_rdr(rdr, version);
|
||||
Xoctg_catpage_itm itm = Xoctg_catpage_itm.New_by_rdr(wiki, rdr, version);
|
||||
catlink_list.Add(itm);
|
||||
if (count >= 1000 && (count % 1000) == 0) Gfo_usr_dlg_.Instance.Prog_many("", "", "loading cat_links: count=~{0}", count);
|
||||
count++;
|
||||
}
|
||||
}
|
||||
finally {
|
||||
@ -197,6 +201,10 @@ class Xoctg_catlink_loader {
|
||||
version = 3;
|
||||
db_1st = cat_core_conn;
|
||||
}
|
||||
|
||||
// add page_db
|
||||
db_list.Add(new Db_attach_itm("page_db", page_tbl.Conn()));
|
||||
|
||||
Db_attach_mgr attach_mgr = new Db_attach_mgr(db_1st, (Db_attach_itm[])db_list.To_ary_and_clear(Db_attach_itm.class));
|
||||
return new Xoctg_catlink_loader(wiki, catpage_mgr, page_tbl, version, link_dbs_len, attach_mgr);
|
||||
}
|
||||
|
@ -1,40 +0,0 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.addons.wikis.ctgs.htmls.catpages.dbs; import gplx.*; import gplx.xowa.*; import gplx.xowa.addons.*; import gplx.xowa.addons.wikis.*; import gplx.xowa.addons.wikis.ctgs.*; import gplx.xowa.addons.wikis.ctgs.htmls.*; import gplx.xowa.addons.wikis.ctgs.htmls.catpages.*;
|
||||
import gplx.dbs.*; import gplx.xowa.wikis.data.tbls.*;
|
||||
import gplx.xowa.addons.wikis.ctgs.htmls.catpages.doms.*;
|
||||
public class Xoctg_page_loader implements Select_in_cbk {
|
||||
private final Xow_wiki wiki;
|
||||
private final Ordered_hash hash = Ordered_hash_.New();
|
||||
public Xoctg_page_loader(Xow_wiki wiki) {this.wiki = wiki;}
|
||||
public Ordered_hash Hash() {return hash;}
|
||||
public int Hash_max() {return hash.Len();}
|
||||
public void Write_sql(Bry_bfr bfr, int idx) {
|
||||
Xoctg_catpage_itm itm = (Xoctg_catpage_itm)hash.Get_at(idx);
|
||||
bfr.Add_int_variable(itm.Page_id());
|
||||
}
|
||||
public void Read_data(Db_rdr rdr) {
|
||||
// read values from page_tbl
|
||||
int page_id = rdr.Read_int("page_id");
|
||||
int page_ns = rdr.Read_int("page_namespace");
|
||||
byte[] page_ttl = rdr.Read_bry_by_str("page_title");
|
||||
|
||||
// get itm and set data
|
||||
Xoctg_catpage_itm itm = (Xoctg_catpage_itm)hash.Get_by(page_id);
|
||||
if (itm == null) return; // NOTE: itms can exist in cat_links_tbl, but not in page_tbl; EX:User:Any_page
|
||||
itm.Page_ttl_(wiki.Ttl_parse(page_ns, page_ttl));
|
||||
}
|
||||
}
|
@ -76,7 +76,7 @@ public class Xoctg_catpage_itm {
|
||||
}
|
||||
|
||||
public static final Xoctg_catpage_itm[] Ary_empty = new Xoctg_catpage_itm[0];
|
||||
public static Xoctg_catpage_itm New_by_rdr(Db_rdr rdr, byte version) {
|
||||
public static Xoctg_catpage_itm New_by_rdr(Xow_wiki wiki, Db_rdr rdr, byte version) {
|
||||
byte[] sortkey_binary = Bry_.Empty;
|
||||
byte[] sortkey_prefix = Bry_.Empty;
|
||||
if (version == Version__4) {
|
||||
@ -87,7 +87,15 @@ public class Xoctg_catpage_itm {
|
||||
sortkey_binary = Bry_.Empty;
|
||||
sortkey_prefix = rdr.Read_bry_by_str("cl_sortkey");
|
||||
}
|
||||
return new Xoctg_catpage_itm(version, rdr.Read_byte("cl_type_id"), rdr.Read_int("cl_from"), sortkey_prefix, sortkey_binary);
|
||||
Xoctg_catpage_itm rv = new Xoctg_catpage_itm(version, rdr.Read_byte("cl_type_id"), rdr.Read_int("cl_from"), sortkey_prefix, sortkey_binary);
|
||||
|
||||
if (version == Version__4) {
|
||||
String ttl_str = rdr.Read_str("page_title");
|
||||
if (ttl_str != null) {// NOTE: ttl_str will be NULL if LEFT JOIN fails on page_db.page
|
||||
rv.Page_ttl_(wiki.Ttl_parse(rdr.Read_int("page_namespace"), Bry_.new_u8(ttl_str)));
|
||||
}
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
public static Xoctg_catpage_itm New_by_ttl(byte grp_tid, int page_id, Xoa_ttl ttl) { // TEST
|
||||
Xoctg_catpage_itm rv = new Xoctg_catpage_itm(Version__4, grp_tid, page_id, ttl.Page_txt(), Bry_.Empty);
|
||||
|
@ -40,7 +40,7 @@ class Dpl_itm {
|
||||
public byte Quality_pages() {return quality_pages;} private byte quality_pages;
|
||||
public byte Stable_pages() {return stable_pages;} private byte stable_pages;
|
||||
private Xop_ctx sub_ctx; private Xop_tkn_mkr sub_tkn_mkr; private Xop_root_tkn sub_root;
|
||||
public void Parse(Xowe_wiki wiki, Xop_ctx ctx, byte[] page_ttl, byte[] src, Xop_xnde_tkn xnde) { // parse kvps in xnde; EX:<dpl>category=abc\nredirects=y\n</dpl>
|
||||
private void Parse_src(Xowe_wiki wiki, Xop_ctx ctx, byte[] page_ttl, byte[] src, Xop_xnde_tkn xnde) { // parse kvps in xnde; EX:<dpl>category=abc\nredirects=y\n</dpl>
|
||||
this.page_ttl = page_ttl;
|
||||
sub_ctx = Xop_ctx.New__sub__reuse_page(ctx);
|
||||
sub_tkn_mkr = sub_ctx.Tkn_mkr();
|
||||
@ -175,6 +175,11 @@ class Dpl_itm {
|
||||
// boolean ctg_date = false, ctg_date_strip = false;
|
||||
// byte[] ns_include = null;
|
||||
// byte[] ctg_date_fmt;
|
||||
public static Dpl_itm Parse(Xowe_wiki wiki, Xop_ctx ctx, byte[] page_ttl, byte[] src, Xop_xnde_tkn xnde) {
|
||||
Dpl_itm rv = new Dpl_itm();
|
||||
rv.Parse_src(wiki, ctx, page_ttl, src, xnde);
|
||||
return rv;
|
||||
}
|
||||
}
|
||||
class Dpl_stable_tid {
|
||||
public static final byte Tid_null = 0, Tid_include = 1, Tid_only = 2, Tid_exclude = 3;
|
||||
|
144
400_xowa/src/gplx/xowa/xtns/dynamicPageList/Dpl_page_finder.java
Normal file
144
400_xowa/src/gplx/xowa/xtns/dynamicPageList/Dpl_page_finder.java
Normal file
@ -0,0 +1,144 @@
|
||||
/*
|
||||
XOWA: the XOWA Offline Wiki Application
|
||||
Copyright (C) 2012-2017 gnosygnu@gmail.com
|
||||
|
||||
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
|
||||
or alternatively under the terms of the Apache License Version 2.0.
|
||||
|
||||
You may use XOWA according to either of these licenses as is most appropriate
|
||||
for your project on a case-by-case basis.
|
||||
|
||||
The terms of each license can be found in the source code repository:
|
||||
|
||||
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
|
||||
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
|
||||
*/
|
||||
package gplx.xowa.xtns.dynamicPageList; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*;
|
||||
import gplx.core.primitives.*; import gplx.core.lists.*;
|
||||
import gplx.xowa.wikis.dbs.*;
|
||||
import gplx.xowa.wikis.data.tbls.*;
|
||||
import gplx.xowa.addons.wikis.ctgs.*; import gplx.xowa.addons.wikis.ctgs.htmls.catpages.doms.*; import gplx.xowa.addons.wikis.ctgs.htmls.catpages.urls.*;
|
||||
class Dpl_page_finder {
|
||||
private final Dpl_itm itm;
|
||||
private final Xowe_wiki wiki;
|
||||
|
||||
public Dpl_page_finder(Dpl_itm itm, Xowe_wiki wiki) {
|
||||
this.itm = itm;
|
||||
this.wiki = wiki;
|
||||
}
|
||||
public Ordered_hash Find() {
|
||||
// get include_ttls
|
||||
List_adp include_ttls = itm.Ctg_includes();
|
||||
if (include_ttls == null) return Ordered_hash_.New(); // exit early if none exists
|
||||
|
||||
// get exclude_pages
|
||||
Ordered_hash exclude_pages = Get_exclude_pages(itm.Ctg_excludes());
|
||||
|
||||
// init vars for loop below
|
||||
int itm_ns_filter = itm.Ns_filter();
|
||||
List_adp remove_list = List_adp_.New();
|
||||
Int_obj_ref tmp_id = Int_obj_ref.New_zero();
|
||||
|
||||
// get include_pags; note that this is a UNION of all member pages; EX: include_ttls=Ctg_A,Ctg_B,Ctg_C will only return pages in Ctg_A AND Ctg_B AND Ctg_C
|
||||
Ordered_hash rv = Ordered_hash_.New();
|
||||
int len = include_ttls.Len();
|
||||
for (int i = 0; i < len; i++) {
|
||||
// get ttl
|
||||
Xoa_ttl ttl = Get_ctg_ttl_or_null(include_ttls, i);
|
||||
if (ttl == null) continue;
|
||||
|
||||
// get pages
|
||||
Ordered_hash cur_pages = Ordered_hash_.New();
|
||||
Find_pages_in_ctg(cur_pages, itm.Page_ttl(), ttl);
|
||||
|
||||
// identify pages (a) not in previous list; (b) excluded; (c) ns_filter
|
||||
remove_list.Clear();
|
||||
int cur_len = cur_pages.Len();
|
||||
for (int j = 0; j < cur_len; j++) {
|
||||
// get item and init tmp
|
||||
Xowd_page_itm page_itm = (Xowd_page_itm)cur_pages.Get_at(j);
|
||||
tmp_id.Val_(page_itm.Id());
|
||||
|
||||
// check if should be removed
|
||||
if ( (i != 0 && !rv.Has(tmp_id)) // item doesn't exist in previous set; note this doesn't apply to the 0th set
|
||||
|| exclude_pages.Has(tmp_id) // item is marked as excluded
|
||||
|| itm_ns_filter != Dpl_itm.Ns_filter_null && itm_ns_filter != page_itm.Ns().Id() // item does not match specified filter
|
||||
) {
|
||||
remove_list.Add(page_itm);
|
||||
}
|
||||
}
|
||||
|
||||
// remove pages
|
||||
int remove_len = remove_list.Len();
|
||||
for (int j = 0; j < remove_len; j++) {
|
||||
Xowd_page_itm page_itm = (Xowd_page_itm)remove_list.Get_at(j);
|
||||
cur_pages.Del(tmp_id.Val_(page_itm.Id()));
|
||||
}
|
||||
|
||||
// set cur_pages as main list
|
||||
rv = cur_pages;
|
||||
}
|
||||
|
||||
// sorting
|
||||
rv.Sort_by
|
||||
( itm.Sort_ascending() == Bool_.__byte
|
||||
? (ComparerAble)Xowd_page_itm_sorter.IdAsc // sort not specified; use default;
|
||||
: (ComparerAble)new Dpl_page_sorter(itm)); // sort specified
|
||||
return rv;
|
||||
}
|
||||
private Ordered_hash Get_exclude_pages(List_adp ttls) {
|
||||
Ordered_hash rv = Ordered_hash_.New();
|
||||
|
||||
// return empty hash if no ttls
|
||||
if (ttls == null)
|
||||
return rv;
|
||||
|
||||
// loop exclude ttls
|
||||
int len = ttls.Count();
|
||||
for (int i = 0; i < len; i++) {
|
||||
Xoa_ttl ttl = Get_ctg_ttl_or_null(ttls, i);
|
||||
if (ttl == null) continue;
|
||||
Find_pages_in_ctg(rv, itm.Page_ttl(), ttl);
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
private void Find_pages_in_ctg(Ordered_hash rv, byte[] page_ttl, Xoa_ttl cat_ttl) {
|
||||
// get ctg
|
||||
Xoctg_catpage_ctg ctg = wiki.Ctg__catpage_mgr().Get_by_cache_or_null(page_ttl, Xoctg_catpage_url.New__blank(), cat_ttl, Int_.Max_value);
|
||||
if (ctg == null) return;
|
||||
|
||||
// loop grps to get each page
|
||||
Int_obj_ref tmp_id = Int_obj_ref.New_zero();
|
||||
for (byte tid = 0; tid < Xoa_ctg_mgr.Tid___max; tid++) {
|
||||
// get grp; EX: subc; page; file
|
||||
Xoctg_catpage_grp grp = ctg.Grp_by_tid(tid);
|
||||
|
||||
// loop itms in grp and add to hash
|
||||
int len = grp.Itms__len();
|
||||
for (int i = 0; i < len; i++) {
|
||||
Xoctg_catpage_itm itm = grp.Itms__get_at(i);
|
||||
int itm_page_id = itm.Page_id();
|
||||
|
||||
if (rv.Has(tmp_id.Val_(itm_page_id))) continue; // check to make sure not already added
|
||||
|
||||
Xowd_page_itm page = new Xowd_page_itm();
|
||||
if (itm.Page_ttl() == null) continue; // cat_link can exist without entry in page_db.page
|
||||
page.Id_(itm_page_id);
|
||||
page.Ttl_(itm.Page_ttl());
|
||||
rv.Add(Int_obj_ref.New(itm_page_id), page);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private Xoa_ttl Get_ctg_ttl_or_null(List_adp list, int i) {// helper method to extract ttl from list
|
||||
// get ttl
|
||||
byte[] ttl_bry = (byte[])list.Get_at(i);
|
||||
Xoa_ttl ttl = wiki.Ttl_parse(gplx.xowa.wikis.nss.Xow_ns_.Tid__category, ttl_bry);
|
||||
|
||||
// log if invalid; NOTE: pages in en.n will pass "{{{2}}}" as category title; PAGE:en.b:Category:Egypt DATE:2016-10-18
|
||||
if (ttl == null) {
|
||||
Gfo_usr_dlg_.Instance.Log_many("", "", "category title is invalid; wiki=~{0} page=~{1} ttl=~{2}", wiki.Domain_str(), itm.Page_ttl(), ttl_bry);
|
||||
}
|
||||
return ttl;
|
||||
}
|
||||
}
|
@ -20,13 +20,11 @@ import gplx.xowa.wikis.dbs.*; import gplx.xowa.addons.wikis.ctgs.*; import gplx.
|
||||
import gplx.xowa.parsers.*; import gplx.xowa.parsers.xndes.*; import gplx.xowa.parsers.htmls.*; import gplx.xowa.parsers.amps.*;
|
||||
import gplx.xowa.addons.wikis.ctgs.htmls.catpages.doms.*; import gplx.xowa.addons.wikis.ctgs.htmls.catpages.urls.*;
|
||||
public class Dpl_xnde implements Xox_xnde {
|
||||
private Dpl_itm itm = new Dpl_itm(); private List_adp pages = List_adp_.New();
|
||||
private Dpl_itm itm; private Ordered_hash pages;
|
||||
public void Xatr__set(Xowe_wiki wiki, byte[] src, Mwh_atr_itm xatr, Object xatr_id_obj) {} // NOTE: <dynamicPageList> has no attributes
|
||||
public void Xtn_parse(Xowe_wiki wiki, Xop_ctx ctx, Xop_root_tkn root, byte[] src, Xop_xnde_tkn xnde) {
|
||||
itm.Parse(wiki, ctx, ctx.Page().Ttl().Full_txt_w_ttl_case(), src, xnde);
|
||||
Dpl_page_finder.Find_pages(pages, wiki, itm);
|
||||
if (itm.Sort_ascending() != Bool_.__byte)
|
||||
pages.Sort_by(new Dpl_page_sorter(itm));
|
||||
itm = Dpl_itm.Parse(wiki, ctx, ctx.Page().Ttl().Full_txt_w_ttl_case(), src, xnde);
|
||||
pages = new Dpl_page_finder(itm, wiki).Find();
|
||||
}
|
||||
public void Xtn_write(Bry_bfr bfr, Xoae_app app, Xop_ctx ctx, Xoh_html_wtr html_wtr, Xoh_wtr_ctx hctx, Xoae_page wpg, Xop_xnde_tkn xnde, byte[] src) {
|
||||
Xowe_wiki wiki = ctx.Wiki();
|
||||
@ -71,109 +69,11 @@ public class Dpl_xnde implements Xox_xnde {
|
||||
}
|
||||
}
|
||||
bfr.Add(html_mode.Grp_end()).Add_byte_nl();
|
||||
} finally {tmp_bfr.Mkr_rls();}
|
||||
}
|
||||
finally {
|
||||
tmp_bfr.Mkr_rls();
|
||||
pages.Clear(); // clear pages else out-of-memory error when Next 200 3 times on en.wiktionary.org/wiki/Category:English_lemmas; DATE:2019-08-25
|
||||
}
|
||||
}
|
||||
private static byte[] Bry_nofollow = Bry_.new_a7(" rel=\"nofollow\"");
|
||||
}
|
||||
class Dpl_page_finder {
|
||||
public static void Find_pages(List_adp rv, Xowe_wiki wiki, Dpl_itm itm) {
|
||||
rv.Clear();
|
||||
List_adp includes = itm.Ctg_includes(); if (includes == null) return;
|
||||
int includes_len = includes.Count();
|
||||
Ordered_hash old_regy = Ordered_hash_.New(), new_regy = Ordered_hash_.New(), cur_regy = Ordered_hash_.New();
|
||||
Xodb_load_mgr load_mgr = wiki.Db_mgr().Load_mgr();
|
||||
Xowd_page_itm tmp_page = new Xowd_page_itm();
|
||||
Int_obj_ref tmp_id = Int_obj_ref.New_zero();
|
||||
List_adp del_list = List_adp_.New();
|
||||
int ns_filter = itm.Ns_filter();
|
||||
Ordered_hash exclude_pages = Ordered_hash_.New();
|
||||
Find_excludes(exclude_pages, wiki, load_mgr, itm.Page_ttl(), tmp_page, tmp_id, itm.Ctg_excludes());
|
||||
|
||||
for (int i = 0; i < includes_len; i++) { // loop over includes
|
||||
byte[] include = (byte[])includes.Get_at(i);
|
||||
Xoa_ttl include_ttl = wiki.Ttl_parse(gplx.xowa.wikis.nss.Xow_ns_.Tid__category, include);
|
||||
|
||||
// pages in en.n will pass "{{{2}}}" as category title; PAGE:en.b:Category:Egypt DATE:2016-10-18
|
||||
if (include_ttl == null) {
|
||||
Gfo_usr_dlg_.Instance.Log_many("", "", "category title is invalid; wiki=~{0} page=~{1} ttl=~{2}", wiki.Domain_str(), itm.Page_ttl(), include);
|
||||
continue; // NOTE: must ignore invalid args; EX: "{{{2}}}" is ignored but "missing_category" is not
|
||||
}
|
||||
|
||||
cur_regy.Clear(); del_list.Clear();
|
||||
Find_pages_in_ctg(cur_regy, wiki, load_mgr, itm.Page_ttl(), tmp_page, tmp_id, include_ttl);
|
||||
Del_old_pages_not_in_cur(i, tmp_id, old_regy, cur_regy, del_list);
|
||||
Add_cur_pages_also_in_old(i, tmp_id, old_regy, cur_regy, new_regy, exclude_pages, ns_filter);
|
||||
old_regy = new_regy;
|
||||
new_regy = Ordered_hash_.New();
|
||||
}
|
||||
int pages_len = old_regy.Count();
|
||||
for (int i = 0; i < pages_len; i++) { // loop over old and create pages
|
||||
Int_obj_ref old_id = (Int_obj_ref)old_regy.Get_at(i);
|
||||
rv.Add(new Xowd_page_itm().Id_(old_id.Val()));
|
||||
}
|
||||
wiki.Db_mgr().Load_mgr().Load_by_ids(Cancelable_.Never, rv, 0, pages_len);
|
||||
rv.Sort_by(Xowd_page_itm_sorter.IdAsc);
|
||||
}
|
||||
private static void Find_excludes(Ordered_hash exclude_pages, Xowe_wiki wiki, Xodb_load_mgr load_mgr, byte[] page_ttl, Xowd_page_itm tmp_page, Int_obj_ref tmp_id, List_adp exclude_ctgs) {
|
||||
if (exclude_ctgs == null) return;
|
||||
int exclude_ctgs_len = exclude_ctgs.Count();
|
||||
for (int i = 0; i < exclude_ctgs_len; i++) {
|
||||
byte[] exclude_ctg = (byte[])exclude_ctgs.Get_at(i);
|
||||
Xoa_ttl exclude_ttl = wiki.Ttl_parse(gplx.xowa.wikis.nss.Xow_ns_.Tid__category, exclude_ctg);
|
||||
if (exclude_ttl != null)
|
||||
Find_pages_in_ctg(exclude_pages, wiki, load_mgr, page_ttl, tmp_page, tmp_id, exclude_ttl);
|
||||
}
|
||||
}
|
||||
private static void Find_pages_in_ctg(Ordered_hash rv, Xowe_wiki wiki, Xodb_load_mgr load_mgr, byte[] page_ttl, Xowd_page_itm tmp_page, Int_obj_ref tmp_id, Xoa_ttl cat_ttl) {
|
||||
Xoctg_catpage_ctg ctg = wiki.Ctg__catpage_mgr().Get_or_load_or_null(page_ttl, Xoctg_catpage_url.New__blank(), cat_ttl, Int_.Max_value);
|
||||
if (ctg == null) return;
|
||||
|
||||
// loop grps to get grp
|
||||
for (byte ctg_tid = 0; ctg_tid < Xoa_ctg_mgr.Tid___max; ++ctg_tid) {
|
||||
Xoctg_catpage_grp ctg_grp = ctg.Grp_by_tid(ctg_tid);
|
||||
int itms_len = ctg_grp.Itms__len();
|
||||
|
||||
// loop itms in grp and add to hash
|
||||
for (int i = 0; i < itms_len; ++i) {
|
||||
Xoctg_catpage_itm ctg_itm = ctg_grp.Itms__get_at(i);
|
||||
int itm_page_id = ctg_itm.Page_id();
|
||||
if (rv.Has(tmp_id.Val_(itm_page_id))) continue;
|
||||
rv.Add(Int_obj_ref.New(itm_page_id), ctg_itm);
|
||||
|
||||
// DELETE: recurse subcategories; PAGE:en.b:XML DATE:2016-09-18
|
||||
// if (ctg_tid == Xoa_ctg_mgr.Tid__subc) {
|
||||
// load_mgr.Load_by_id(tmp_page, itm_page_id);
|
||||
// Find_pages_in_ctg(rv, wiki, load_mgr, tmp_page, tmp_id, tmp_page.Ttl_page_db());
|
||||
// }
|
||||
}
|
||||
}
|
||||
}
|
||||
private static void Del_old_pages_not_in_cur(int i, Int_obj_ref tmp_id, Ordered_hash old_regy, Ordered_hash cur_regy, List_adp del_list) {
|
||||
if (i == 0) return; // skip logic for first ctg (which doesn't have a predecessor)
|
||||
int old_len = old_regy.Count();
|
||||
for (int j = 0; j < old_len; j++) { // if cur is not in new, del it
|
||||
Int_obj_ref old_id = (Int_obj_ref)old_regy.Get_at(j);
|
||||
if (!cur_regy.Has(tmp_id.Val_(old_id.Val()))) // old_itm does not exist in cur_regy
|
||||
del_list.Add(old_id); // remove; EX: (A,B) in old; B only in cur; old should now be (A) only
|
||||
}
|
||||
int del_len = del_list.Count();
|
||||
for (int j = 0; j < del_len; j++) {
|
||||
Int_obj_ref old_itm = (Int_obj_ref)del_list.Get_at(j);
|
||||
old_regy.Del(tmp_id.Val_(old_itm.Val()));
|
||||
}
|
||||
}
|
||||
private static void Add_cur_pages_also_in_old(int i, Int_obj_ref tmp_id, Ordered_hash old_regy, Ordered_hash cur_regy, Ordered_hash new_regy, Ordered_hash exclude_pages, int ns_filter) {
|
||||
int found_len = cur_regy.Count();
|
||||
for (int j = 0; j < found_len; j++) { // if new_page is in cur, add it
|
||||
Xoctg_catpage_itm cur_itm = (Xoctg_catpage_itm)cur_regy.Get_at(j);
|
||||
Xoa_ttl cur_ttl = cur_itm.Page_ttl(); if (cur_ttl == null) continue;
|
||||
if (ns_filter != Dpl_itm.Ns_filter_null && ns_filter != cur_ttl.Ns().Id()) continue;
|
||||
tmp_id.Val_(cur_itm.Page_id()); // set tmp_id, since it will be used at least once
|
||||
if (exclude_pages.Has(tmp_id)) continue; // ignore excluded pages
|
||||
if (i != 0) { // skip logic for first ctg (which doesn't have a predecessor)
|
||||
if (!old_regy.Has(tmp_id)) continue; // cur_itm not in old_regy; ignore
|
||||
}
|
||||
new_regy.Add_as_key_and_val(Int_obj_ref.New(cur_itm.Page_id()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user