Mass_parse: Embed score in full-text search index

pull/620/head
gnosygnu 7 years ago
parent 53c3400366
commit eaa83db644

@ -56,6 +56,7 @@ public class Xomp_load_wkr implements Gfo_invk {
, ", pp.page_namespace"
, ", pp.page_title"
, ", pp.page_text_db_id"
, ", pp.page_score"
, "FROM xomp_page mp"
, " JOIN <page_db>page pp ON mp.page_id = pp.page_id"
, "WHERE mp.page_id > {0}"
@ -80,6 +81,7 @@ public class Xomp_load_wkr implements Gfo_invk {
( rdr.Read_int("page_namespace")
, rdr.Read_bry_by_str("page_title")
, text_db_id
, rdr.Read_int("page_score")
);
list.Add(ppg);
text_db_loader.Add(text_db_id, ppg);

@ -19,14 +19,16 @@ public class Xomp_page_itm implements Xowd_text_bry_owner {
public Xomp_page_itm(int id) {this.id = id;}
public int Id() {return id;} private final int id;
public int Ns_id() {return ns_id;} private int ns_id;
public int Page_score() {return page_score;} private int page_score;
public byte[] Ttl_bry() {return ttl_bry;} private byte[] ttl_bry;
public int Text_db_id() {return text_db_id;} private int text_db_id;
public byte[] Text() {return text;} private byte[] text;
public void Init_by_page(int ns_id, byte[] ttl_bry, int text_db_id) {
public void Init_by_page(int ns_id, byte[] ttl_bry, int text_db_id, int page_score) {
this.ns_id = ns_id;
this.ttl_bry = ttl_bry;
this.text_db_id = text_db_id;
this.page_score = page_score;
}
public int Page_id() {return id;}

@ -62,6 +62,7 @@ public class Xomp_page_pool_loader {
, ", pp.page_namespace"
, ", pp.page_title"
, ", pp.page_text_db_id"
, ", pp.page_score"
, "FROM xomp_page mp"
, " JOIN <page_db>page pp ON mp.page_id = pp.page_id"
, "WHERE mp.xomp_uid > {0}"
@ -85,6 +86,7 @@ public class Xomp_page_pool_loader {
( rdr.Read_int("page_namespace")
, rdr.Read_bry_by_str("page_title")
, text_db_id
, rdr.Read_int("page_score")
);
list.Add(ppg);
text_db_loader.Add(text_db_id, ppg);

@ -122,7 +122,7 @@ public class Xomp_parse_wkr implements Gfo_invk {
Xoae_page wpg = Xoae_page.New(wiki, ttl);
wpg.Bldr__ns_ord_(ns_ord_mgr.Get_ord_by_ns_id(cur_ns)); // NOTE: must set ns_id for tier_id in lnki_temp; DATE:2016-09-19
wpg.Db().Text().Text_bry_(ppg.Text());
wpg.Db().Page().Id_(ppg.Id());
wpg.Db().Page().Init_by_mp(ppg.Id(), ppg.Page_score());
// parse page
Xop_ctx pctx = parser_mgr.Ctx();

@ -34,6 +34,7 @@ public class Xopg_db_page {
public void Exists_n_() {this.Exists_(Bool_.N);}
public void Exists_(boolean v) {this.exists = v;}
public Xopg_db_page Id_(int v) {this.id = v; return this;}
public Xopg_db_page Score_(int v) {this.score = v; return this;}
public Xopg_db_page Modified_on_(DateAdp v) {this.modified_on = v; return this;}
public Xopg_db_page Html_db_id_(int v) {this.html_db_id = v; return this;}
@ -53,6 +54,10 @@ public class Xopg_db_page {
this.ttl = wiki.Ttl_parse(ns_id, ttl_bry);
return this;
}
public void Init_by_mp(int id, int score) {
this.id = id;
this.score = score;
}
public void Clear() {
this.exists = true;
this.modified_on = DateAdp_.MinValue; // NOTE: must set to MinValue else some tests will fail

@ -347,7 +347,7 @@ class Scrib_lib_ustring_gsub_mgr {
}
}
Keyval[] rslts = core.Interpreter().CallFunction(repl_func.Id(), luacbk_args);
if (rslts.length == 0)
if (rslts.length == 0) // will be 0 when gsub_proc returns nil; PAGE:en.d:tracer; DATE:2017-04-22
return false;
else { // ArrayIndex check
Object rslt_obj = rslts[0].Val(); // 0th idx has result

@ -64,14 +64,12 @@ public class Gflucene_searcher_mgr {
IndexSearcher searcher = new IndexSearcher(reader);
// Query query = new QueryParser("body", analyzer).parse(data.query);
Query multi_query = MultiFieldQueryParser.parse(data.query, new String[] {"body"}, new BooleanClause.Occur []{BooleanClause.Occur.SHOULD}, analyzer);
// Query body_query = new QueryParser("body", analyzer).parse(data.query);
// Query title_query = new QueryParser("title", analyzer).parse(data.query);
// creates query that boosts by page_score; not sure if this is needed, but 1st release of fts uses this
Query multi_query = MultiFieldQueryParser.parse(data.query, new String[] {"body"}, new BooleanClause.Occur []{BooleanClause.Occur.SHOULD}, analyzer);
FunctionQuery boost_query = new FunctionQuery(new LongFieldSource("page_score"));
CustomScoreQuery query = new CustomScoreQuery(multi_query, boost_query);
// TopDocs docs = searcher.search(query, reader.maxDoc());
TopDocs docs = searcher.search(query, data.match_max);
ScoreDoc[] hits = docs.scoreDocs;

Loading…
Cancel
Save