mirror of
https://github.com/gnosygnu/xowa.git
synced 2024-10-27 20:34:16 +00:00
Mass_parse: Embed score in full-text search index
This commit is contained in:
parent
53c3400366
commit
eaa83db644
@ -56,6 +56,7 @@ public class Xomp_load_wkr implements Gfo_invk {
|
||||
, ", pp.page_namespace"
|
||||
, ", pp.page_title"
|
||||
, ", pp.page_text_db_id"
|
||||
, ", pp.page_score"
|
||||
, "FROM xomp_page mp"
|
||||
, " JOIN <page_db>page pp ON mp.page_id = pp.page_id"
|
||||
, "WHERE mp.page_id > {0}"
|
||||
@ -80,6 +81,7 @@ public class Xomp_load_wkr implements Gfo_invk {
|
||||
( rdr.Read_int("page_namespace")
|
||||
, rdr.Read_bry_by_str("page_title")
|
||||
, text_db_id
|
||||
, rdr.Read_int("page_score")
|
||||
);
|
||||
list.Add(ppg);
|
||||
text_db_loader.Add(text_db_id, ppg);
|
||||
|
@ -19,14 +19,16 @@ public class Xomp_page_itm implements Xowd_text_bry_owner {
|
||||
public Xomp_page_itm(int id) {this.id = id;}
|
||||
public int Id() {return id;} private final int id;
|
||||
public int Ns_id() {return ns_id;} private int ns_id;
|
||||
public int Page_score() {return page_score;} private int page_score;
|
||||
public byte[] Ttl_bry() {return ttl_bry;} private byte[] ttl_bry;
|
||||
public int Text_db_id() {return text_db_id;} private int text_db_id;
|
||||
public byte[] Text() {return text;} private byte[] text;
|
||||
|
||||
public void Init_by_page(int ns_id, byte[] ttl_bry, int text_db_id) {
|
||||
public void Init_by_page(int ns_id, byte[] ttl_bry, int text_db_id, int page_score) {
|
||||
this.ns_id = ns_id;
|
||||
this.ttl_bry = ttl_bry;
|
||||
this.text_db_id = text_db_id;
|
||||
this.page_score = page_score;
|
||||
}
|
||||
|
||||
public int Page_id() {return id;}
|
||||
|
@ -62,6 +62,7 @@ public class Xomp_page_pool_loader {
|
||||
, ", pp.page_namespace"
|
||||
, ", pp.page_title"
|
||||
, ", pp.page_text_db_id"
|
||||
, ", pp.page_score"
|
||||
, "FROM xomp_page mp"
|
||||
, " JOIN <page_db>page pp ON mp.page_id = pp.page_id"
|
||||
, "WHERE mp.xomp_uid > {0}"
|
||||
@ -85,6 +86,7 @@ public class Xomp_page_pool_loader {
|
||||
( rdr.Read_int("page_namespace")
|
||||
, rdr.Read_bry_by_str("page_title")
|
||||
, text_db_id
|
||||
, rdr.Read_int("page_score")
|
||||
);
|
||||
list.Add(ppg);
|
||||
text_db_loader.Add(text_db_id, ppg);
|
||||
|
@ -122,7 +122,7 @@ public class Xomp_parse_wkr implements Gfo_invk {
|
||||
Xoae_page wpg = Xoae_page.New(wiki, ttl);
|
||||
wpg.Bldr__ns_ord_(ns_ord_mgr.Get_ord_by_ns_id(cur_ns)); // NOTE: must set ns_id for tier_id in lnki_temp; DATE:2016-09-19
|
||||
wpg.Db().Text().Text_bry_(ppg.Text());
|
||||
wpg.Db().Page().Id_(ppg.Id());
|
||||
wpg.Db().Page().Init_by_mp(ppg.Id(), ppg.Page_score());
|
||||
|
||||
// parse page
|
||||
Xop_ctx pctx = parser_mgr.Ctx();
|
||||
|
@ -34,6 +34,7 @@ public class Xopg_db_page {
|
||||
public void Exists_n_() {this.Exists_(Bool_.N);}
|
||||
public void Exists_(boolean v) {this.exists = v;}
|
||||
public Xopg_db_page Id_(int v) {this.id = v; return this;}
|
||||
public Xopg_db_page Score_(int v) {this.score = v; return this;}
|
||||
public Xopg_db_page Modified_on_(DateAdp v) {this.modified_on = v; return this;}
|
||||
public Xopg_db_page Html_db_id_(int v) {this.html_db_id = v; return this;}
|
||||
|
||||
@ -53,6 +54,10 @@ public class Xopg_db_page {
|
||||
this.ttl = wiki.Ttl_parse(ns_id, ttl_bry);
|
||||
return this;
|
||||
}
|
||||
public void Init_by_mp(int id, int score) {
|
||||
this.id = id;
|
||||
this.score = score;
|
||||
}
|
||||
public void Clear() {
|
||||
this.exists = true;
|
||||
this.modified_on = DateAdp_.MinValue; // NOTE: must set to MinValue else some tests will fail
|
||||
|
@ -347,7 +347,7 @@ class Scrib_lib_ustring_gsub_mgr {
|
||||
}
|
||||
}
|
||||
Keyval[] rslts = core.Interpreter().CallFunction(repl_func.Id(), luacbk_args);
|
||||
if (rslts.length == 0)
|
||||
if (rslts.length == 0) // will be 0 when gsub_proc returns nil; PAGE:en.d:tracer; DATE:2017-04-22
|
||||
return false;
|
||||
else { // ArrayIndex check
|
||||
Object rslt_obj = rslts[0].Val(); // 0th idx has result
|
||||
|
@ -64,14 +64,12 @@ public class Gflucene_searcher_mgr {
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
|
||||
// Query query = new QueryParser("body", analyzer).parse(data.query);
|
||||
Query multi_query = MultiFieldQueryParser.parse(data.query, new String[] {"body"}, new BooleanClause.Occur []{BooleanClause.Occur.SHOULD}, analyzer);
|
||||
|
||||
// Query body_query = new QueryParser("body", analyzer).parse(data.query);
|
||||
// Query title_query = new QueryParser("title", analyzer).parse(data.query);
|
||||
// creates query that boosts by page_score; not sure if this is needed, but 1st release of fts uses this
|
||||
Query multi_query = MultiFieldQueryParser.parse(data.query, new String[] {"body"}, new BooleanClause.Occur []{BooleanClause.Occur.SHOULD}, analyzer);
|
||||
FunctionQuery boost_query = new FunctionQuery(new LongFieldSource("page_score"));
|
||||
CustomScoreQuery query = new CustomScoreQuery(multi_query, boost_query);
|
||||
|
||||
// TopDocs docs = searcher.search(query, reader.maxDoc());
|
||||
TopDocs docs = searcher.search(query, data.match_max);
|
||||
ScoreDoc[] hits = docs.scoreDocs;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user