1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

Source: Restore broken commit

This commit is contained in:
gnosygnu
2017-02-06 22:14:55 -05:00
parent 938beac9f9
commit 3bfeb94b43
4380 changed files with 328018 additions and 0 deletions

View File

@@ -0,0 +1,74 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.dbs.percentiles; import gplx.*; import gplx.dbs.*;
class Log_tbl_fmtr {
private final Bry_bfr bfr = Bry_bfr_.New();
private final List_adp itms = List_adp_.New();
private Log_fld_itm[] ary;
public Log_tbl_fmtr Add_str(String key, int len) {ary = null; itms.Add(new Log_fld_itm__bry(Type_adp_.Tid__bry, key, len)); return this;}
public Log_tbl_fmtr Add_int(String key, int bgn, int end) {ary = null; itms.Add(new Log_fld_itm__int(Type_adp_.Tid__int, key, bgn, end)); return this;}
public void Log(Object... vals) {
if (ary == null)
ary = (Log_fld_itm[])itms.To_ary_and_clear(Log_fld_itm.class);
int len = ary.length;
for (int i = 0; i < len; ++i) {
Log_fld_itm itm = ary[i];
Object val = vals[i];
if (i != 0) bfr.Add_byte_pipe();
itm.Fmt(bfr, val);
}
bfr.Add_byte_nl();
}
public String To_str_and_clear() {return bfr.To_str_and_clear();}
}
interface Log_fld_itm {
void Fmt(Bry_bfr bfr, Object val);
}
abstract class Log_fld_itm__base implements Log_fld_itm {
public Log_fld_itm__base(int tid, String key, int len) {
this.tid = tid; this.key = key; this.len = len;
}
public int Tid() {return tid;} private final int tid;
public String Key() {return key;} private final String key;
public int Len() {return len;} protected int len;
public abstract void Fmt(Bry_bfr bfr, Object val);
}
class Log_fld_itm__bry extends Log_fld_itm__base {
public Log_fld_itm__bry(int tid, String key, int len) {super(tid, key, len);}
@Override public void Fmt(Bry_bfr bfr, Object val) {
byte[] val_bry = Bry_.cast(val);
int val_bry_len = val_bry.length;
int pad_len = this.len - val_bry_len;
bfr.Add(val_bry);
if (pad_len > 0)
bfr.Add_byte_repeat(Byte_ascii.Space, pad_len);
}
}
class Log_fld_itm__int extends Log_fld_itm__base {
public Log_fld_itm__int(int tid, String key, int bgn, int end) {super(tid, key, 0);
this.bgn = bgn; this.end = end;
this.len = Int_.DigitCount(end);
}
public int Bgn() {return bgn;} private final int bgn;
public int End() {return end;} private final int end;
@Override public void Fmt(Bry_bfr bfr, Object val) {
int val_int = Int_.cast(val);
String val_str = String_.PadBgn(Int_.To_str(val_int), this.Len(), " ");
bfr.Add_str_u8(val_str);
}
}

View File

@@ -0,0 +1,87 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.dbs.percentiles; import gplx.*; import gplx.dbs.*;
public class Percentile_rng {
private long total_max; private int total_needed;
private int score_max, score_len_max;
private long prv_time;
public int Score_bgn() {return score_bgn;} private int score_bgn;
public int Score_end() {return score_end;} private int score_end;
public int Score_len() {return score_len;} private int score_len;
public int Found_rdr() {return found_rdr;} private int found_rdr;
public int Found_all() {return found_all;} private int found_all;
public int Elapsed() {return elapsed;} private int elapsed;
public Percentile_rng Init(long total_max, int score_max) {
this.total_max = total_max;
this.score_max = score_max;
this.score_len_max = score_max / 20; // limit to 5%
return this;
}
public void Select_init(int total_needed, int prv_score_bgn, int prv_score_len, int score_len_adj) {
this.total_needed = total_needed;
this.found_all = 0;
this.prv_time = gplx.core.envs.System_.Ticks();
int score_unit = Calc_score_unit(total_needed, total_max, score_max);
if (prv_score_bgn == Score_null) {
score_len = score_unit + (score_unit * score_len_adj);
score_bgn = score_max;
Rng_len_(Bool_.Y);
}
else {
score_len = prv_score_len;
score_bgn = prv_score_bgn;
score_end = score_bgn + score_len;
}
}
public void Update(int found_rdr) {
this.found_rdr = found_rdr;
this.found_all += found_rdr;
// calc rng_multiplier based on found_rdr and total_needed; EX: 100=total_needed; 10=found_rdr; 40=found_all -> 6=rng_multiplier; (100 - 40 / 10)
int rng_multiplier = 1;
if (found_rdr == 0) {
rng_multiplier = 4;
} else {
int total_remaining = total_needed - found_all;
rng_multiplier = total_remaining == 0 ? 1 : Math_.Ceil_as_int(total_remaining / found_rdr);
}
// calc new score_len
int new_score_len = score_len * rng_multiplier;
if (new_score_len < 1) new_score_len = score_len;
else if (new_score_len > score_len_max) new_score_len = score_len_max;
score_len = new_score_len;
Rng_len_(Bool_.N);
// update times
long new_time = gplx.core.envs.System_.Ticks();
this.elapsed = Int_.Subtract_long(new_time, prv_time);
prv_time = new_time;
}
private void Rng_len_(boolean first) {
score_end = score_bgn + (first ? 1 : 0); // + 1 to include rows with scores at max; EX: > 999,998 AND < 1,000,001
score_bgn = score_end - score_len;
if (score_bgn < 0) score_bgn = 0; // make sure score is not negative
}
@gplx.Internal protected static int Calc_score_unit(int total_needed, long total_max, int score_max) {// TEST:
int rv = (int)Math_.Ceil(Math_.Div_safe_as_double(total_needed, Math_.Div_safe_as_double(total_max, score_max))); // EX: 100 needed / (16 M / 1 M) -> 7 units to fill 100
if (rv > score_max) rv = score_max; // never allow score_unit to be > score_max; occurs when total_needed > total_max; EX: 50 needed; 10 available
return rv;
}
public static final int Score_null = -1;
}

View File

@@ -0,0 +1,43 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.dbs.percentiles; import gplx.*; import gplx.dbs.*;
public class Percentile_rng_log {
private final Log_tbl_fmtr fmtr = new Log_tbl_fmtr();
private byte[] search; private int rslts_needed;
private int rdr_idx;
public Percentile_rng_log(int score_max) {
fmtr.Add_str("search" , 50)
.Add_int("rslts_needed" , 1, 999)
.Add_int("rdr_idx" , 0, 100) // warn if more than 100 sql queries
.Add_int("score_bgn" , 0, score_max)
.Add_int("score_end" , 0, score_max)
.Add_int("score_len" , 1, 100000)
.Add_int("rdr_found" , 0, 9999) // warn if more than 9.999 seconds
.Add_int("total_found" , 0, 999)
.Add_int("total_needed" , 1, 999)
;
}
public void Init(byte[] search, int rslts_needed) {
this.search = search; this.rslts_needed = rslts_needed;
rdr_idx = -1;
}
public void Log(int score_bgn, int score_end, int rdr_found, int total_found, int pass_time) {
fmtr.Log(search, rslts_needed, ++rdr_idx, score_bgn, score_end, score_end - score_bgn, rdr_found, total_found, pass_time);
}
public String To_str_and_clear() {return fmtr.To_str_and_clear();}
}

View File

@@ -0,0 +1,63 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.dbs.percentiles; import gplx.*; import gplx.dbs.*;
import org.junit.*;
public class Percentile_rng_tst {
private final Percentile_rng_fxt fxt = new Percentile_rng_fxt();
@Before public void init() {fxt.Clear();}
@Test public void Found__000() {
fxt.Test__rng(999994, 1000001);
fxt.Exec__update( 0).Test__rng(999966, 999994);
fxt.Exec__update( 0).Test__rng(999854, 999966);
}
@Test public void Found__025() {
fxt.Test__rng(999994, 1000001);
fxt.Exec__update( 25).Test__rng(999973, 999994);
fxt.Exec__update( 25).Test__rng(999931, 999973);
fxt.Exec__update( 25).Test__rng(999889, 999931);
fxt.Exec__update( 25).Test__rng(999847, 999889);
}
@Test public void Calc_score_unit() {
fxt.Test__calc_score_unit(50, 16000000, 1000000, 4); // to fill 50 -> 16 pages per point -> read every 4 points to get 64 pages
fxt.Test__calc_score_unit(50, 1000, 1000000, 50000); // to fill 50 -> 1000 points per page -> read every 50k points to get 50 pages
fxt.Test__calc_score_unit(50, 25, 1000000, 1000000); // range bounds check; to fill 50, always read full amount
}
}
class Percentile_rng_fxt {
private final Percentile_rng rng = new Percentile_rng();
public void Clear() {
this.Exec__init_for_wiki(16000000, 1000000);
this.Exec__init_for_search(100, 0);
}
public Percentile_rng_fxt Exec__init_for_wiki (int pages_max, int score_max) {
rng.Init(pages_max, score_max); return this;
}
public Percentile_rng_fxt Exec__init_for_search(int request_count, int score_len_adj) {
rng.Select_init(request_count, Percentile_rng.Score_null, Percentile_rng.Score_null, score_len_adj); return this;
}
public Percentile_rng_fxt Exec__update(int rdr_found) {
rng.Update(rdr_found); return this;
}
public void Test__rng(int expd_bgn, int expd_end) {
Tfds.Eq(expd_end, rng.Score_end(), "rng_end");
Tfds.Eq(expd_bgn, rng.Score_bgn(), "rng_bgn");
}
public void Test__calc_score_unit(int request_count, long pages_max, int score_max, int expd) {
Tfds.Eq(expd, Percentile_rng.Calc_score_unit(request_count, pages_max, score_max));
}
}

View File

@@ -0,0 +1,67 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.dbs.percentiles; import gplx.*; import gplx.dbs.*;
public abstract class Percentile_select_base { // SELECT * FROM x ORDER BY y LIMIT 10;
protected Cancelable cxl;
protected Percentile_rng rng;
protected Percentile_rng_log rng_log;
protected void Select() {
Db_rdr rdr = null;
try {
int rdr_found = 0;
while (true) {
if (cxl.Canceled()) return;
if (rdr == null) {
rdr = Rdr__init(); // EXPENSIVE
rdr_found = 0;
if (cxl.Canceled()) return;
}
if (!Row__read(rdr)) { // EXPENSIVE
if (cxl.Canceled()) return;
rng_log.Log(rng.Score_bgn(), rng.Score_end(), rng.Found_rdr(), rng.Found_all(), rng.Elapsed());
rdr = Rdr__term(rdr);
Rng__update(rdr_found);
boolean found_enough = Found_enough();
boolean none_left = rng.Score_bgn() == 0;
Rdr__done(found_enough, none_left);
if (found_enough || none_left)
break;
else
continue; // resume from top; will create new rdrd
}
if (Row__eval()) ++rdr_found;
}
}
catch (Exception exc) {
Gfo_usr_dlg_.Instance.Warn_many("", "", "error during percentile; err=~{0}", Err_.Message_gplx_log(exc));
}
finally {
rdr = Rdr__term(rdr);
}
}
protected abstract Db_rdr Rdr__init();
@gplx.Virtual protected void Rdr__done(boolean found_enough, boolean none_left) {}
@gplx.Virtual protected Db_rdr Rdr__term(Db_rdr rdr) {
if (rdr != null) rdr.Rls();
return null;
}
@gplx.Virtual protected void Rng__update(int rdr_found) {rng.Update(rdr_found);}
@gplx.Virtual protected boolean Row__read(Db_rdr rdr) {return true;}
@gplx.Virtual protected boolean Row__eval() {return true;} // NOTE: return true by default; DEPENDENCY: Srch_word_count_wkr
@gplx.Virtual protected boolean Found_enough() {return false;}
}