1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00
This commit is contained in:
gnosygnu
2015-07-12 21:10:02 -04:00
commit 794b5a232f
3099 changed files with 238212 additions and 0 deletions

View File

@@ -0,0 +1,25 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
class Bry_comparer_bgn_eos implements gplx.lists.ComparerAble {
public Bry_comparer_bgn_eos(int bgn) {this.bgn = bgn;} private int bgn;
public int compare(Object lhsObj, Object rhsObj) {
byte[] lhs = (byte[])lhsObj, rhs = (byte[])rhsObj;
return Bry_.Compare(lhs, bgn, lhs.length, rhs, bgn, rhs.length);
}
}

View File

@@ -0,0 +1,25 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.ios.*;
public class Io_line_rdr_key_gen_all implements Io_line_rdr_key_gen {
public void Gen(Io_line_rdr bfr) {
bfr.Key_pos_bgn_(bfr.Itm_pos_bgn()).Key_pos_end_(bfr.Itm_pos_end());
}
public static final Io_line_rdr_key_gen_all _ = new Io_line_rdr_key_gen_all(); Io_line_rdr_key_gen_all() {}
}

View File

@@ -0,0 +1,63 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.ios.*;
class Io_sort_cmd_img implements Io_sort_cmd {
Bry_bfr fil_bfr = Bry_bfr.new_();
int prv_itm_bgn, prv_itm_end;
public int Make_fil_max() {return make_fil_max;} public Io_sort_cmd_img Make_fil_max_(int v) {make_fil_max = v; return this;} private int make_fil_max = 65 * Io_mgr.Len_kb;
public Io_url_gen Make_url_gen() {return make_url_gen;} public Io_sort_cmd_img Make_url_gen_(Io_url_gen v) {make_url_gen = v; return this;} Io_url_gen make_url_gen;
public void Sort_bgn() {
prv_itm_bgn = prv_itm_end = 0;
} byte line_dlm = Byte_ascii.Nil;
public void Sort_do(Io_line_rdr rdr) {
if (line_dlm == Byte_ascii.Nil) line_dlm = rdr.Line_dlm();
int rdr_key_bgn = rdr.Key_pos_bgn(), rdr_key_end = rdr.Key_pos_end();
if (prv_itm_end == 0 || !Bry_.Match(rdr.Bfr(), rdr_key_bgn, rdr_key_end, fil_bfr.Bfr(), prv_itm_bgn, prv_itm_end)) {
int fil_bfr_len = fil_bfr.Len();
int rdr_key_len = rdr_key_end - rdr_key_bgn;
if (fil_bfr_len + rdr_key_len > make_fil_max) Flush();
prv_itm_bgn = fil_bfr_len;
prv_itm_end = prv_itm_bgn + rdr_key_len;
fil_bfr.Add_mid(rdr.Bfr(), rdr_key_bgn, rdr_key_end).Add_byte_nl();
}
}
public void Sort_end() {
Flush();
//fil_wtr.Rls(); itm_bfr.Rls(); fil_wtr.Rls(); reg_bfr.Rls(); key_bfr_0.Rls(); key_bfr_n.Rls();
}
private void Flush() {
Io_mgr.I.SaveFilBry(make_url_gen.Nxt_url(), fil_bfr.Bfr(), fil_bfr.Len());
fil_bfr.Clear();
}
}
class Io_line_rdr_key_gen_img implements Io_line_rdr_key_gen {
public void Gen(Io_line_rdr rdr) {
int itm_bgn = rdr.Itm_pos_bgn();
int itm_end = rdr.Itm_pos_end();
rdr.Key_pos_bgn_(itm_bgn);
int key_end = Bry_finder.Find_bwd(rdr.Bfr(), Byte_ascii.Pipe, itm_end - 2, itm_bgn); // NOTE: -2 to skip terminating |\n
rdr.Key_pos_end_(key_end + 1); // NOTE: +1 to include terminating |; enforces every field terminating with |; EX: A.png|0|220|120|.8|\n
}
}
class Io_line_rdr_key_gen_all_wo_nl implements Io_line_rdr_key_gen {
public void Gen(Io_line_rdr bfr) {
bfr.Key_pos_bgn_(bfr.Itm_pos_bgn()).Key_pos_end_(bfr.Itm_pos_end() - 1); // subtract closing nl
}
public static final Io_line_rdr_key_gen_all_wo_nl _ = new Io_line_rdr_key_gen_all_wo_nl(); Io_line_rdr_key_gen_all_wo_nl() {}
}

View File

@@ -0,0 +1,46 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import org.junit.*;
import gplx.ios.*;
public class Io_sort_cmd_img_tst {
@Test public void Basic() {
tst_cmd(String_.Concat_lines_nl
( "Abc.png|0|220|180|-1|Page 1|"
, "Abc.png|0|220|180|-1|Page 2|"
, "Abc.png|1|-1|-1|-1|Page 2|"
), String_.Concat_lines_nl
( "Abc.png|0|220|180|-1|"
, "Abc.png|1|-1|-1|-1|"
));
}
private void tst_cmd(String raw, String expd) {
Io_url src_fil = Io_url_.mem_fil_("mem/src.csv");
Io_url trg_fil = Io_url_.mem_fil_("mem/trg.csv");
Io_mgr.I.SaveFilStr(src_fil, raw);
Io_sort_cmd_img cmd = new Io_sort_cmd_img().Make_url_gen_(Io_url_gen_.fil_(trg_fil));
Io_line_rdr rdr = new Io_line_rdr(Gfo_usr_dlg_.Test(), src_fil).Key_gen_(new Io_line_rdr_key_gen_img());
cmd.Sort_bgn();
while (rdr.Read_next()) {
cmd.Sort_do(rdr);
}
cmd.Sort_end();
String actl = Io_mgr.I.LoadFilStr(trg_fil);
Tfds.Eq_str_lines(expd, actl);
}
}

View File

@@ -0,0 +1,60 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.ios.*; import gplx.xowa.tdbs.*;
public class Io_sort_cmd_ns implements Io_make_cmd {
Xob_xdat_file_wtr fil_wtr; Bry_bfr reg_bfr = Bry_bfr.new_(), key_bfr_0 = Bry_bfr.new_(512), key_bfr_n = Bry_bfr.new_(512);
int fil_count = 0, itm_count = 0;
public Io_sort_cmd_ns(Gfo_usr_dlg usr_dlg) {this.usr_dlg = usr_dlg;} Gfo_usr_dlg usr_dlg;
public int Trg_fil_max() {return trg_fil_max;} public Io_sort_cmd_ns Trg_fil_max_(int v) {trg_fil_max = v; return this;} private int trg_fil_max = 65 * Io_mgr.Len_kb;
Io_url reg_url;
public Io_sort_cmd Make_dir_(Io_url v) {make_dir = v; return this;} Io_url make_dir;
public void Sort_bgn() {
fil_count = itm_count = 0;
fil_wtr = Xob_xdat_file_wtr.new_file_(trg_fil_max, make_dir);
reg_url = make_dir.GenSubFil(Xotdb_dir_info_.Name_reg_fil);
}
public void Sort_do(Io_line_rdr rdr) {
int itm_bgn = rdr.Itm_pos_bgn(), itm_end = rdr.Itm_pos_end(), key_bgn = rdr.Key_pos_bgn(), key_end = rdr.Key_pos_end();
int itm_len = itm_end - itm_bgn;
if (fil_wtr.FlushNeeded(itm_len)) Flush();
byte[] bfr = rdr.Bfr();
if (key_bfr_0.Len() == 0) {key_bfr_0.Add_mid(bfr, key_bgn, key_end);}
key_bfr_n.Clear().Add_mid(bfr, key_bgn, key_end);
fil_wtr.Bfr().Add_mid(rdr.Bfr(), itm_bgn, itm_end);
fil_wtr.Add_idx(Byte_ascii.Nil);
++itm_count;
}
public void Sort_end() {
Flush();
Io_mgr.I.AppendFilBfr(reg_url, reg_bfr);
//fil_wtr.Rls(); reg_bfr.Rls(); key_bfr_0.Rls(); key_bfr_n.Rls();
}
private void Flush() {
reg_bfr
.Add_int_variable(fil_count++).Add_byte(Byte_ascii.Pipe)
.Add_bfr_and_preserve(key_bfr_0).Add_byte(Byte_ascii.Pipe)
.Add_bfr_and_preserve(key_bfr_n).Add_byte(Byte_ascii.Pipe)
.Add_int_variable(itm_count).Add_byte(Byte_ascii.Nl);
itm_count = 0;
key_bfr_0.Clear();
if (fil_wtr.Fil_idx() % 10 == 0)
usr_dlg.Prog_many("cmd_ns", "prog", "saving: ~{0} ~{1}", reg_url.OwnerDir().OwnerDir().NameOnly(), fil_wtr.Fil_url().NameOnly());
fil_wtr.Flush(usr_dlg);
}
}

View File

@@ -0,0 +1,113 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.xowa.apps.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.xmls.*; import gplx.xowa.bldrs.cfgs.*; import gplx.xowa.bldrs.langs.*;
public class Xob_bldr implements GfoInvkAble {
private boolean pause_at_end = false; private long prv_prog_time; private Xob_xml_parser dump_parser;
public Xob_bldr(Xoae_app app) {
this.app = app;
this.cmd_mgr = new Xob_cmd_mgr(this);
this.import_marker = new Xob_import_marker();
this.wiki_cfg_bldr = new Xob_wiki_cfg_bldr(this);
}
public Xoae_app App() {return app;} private final Xoae_app app;
public Xob_cmd_mgr Cmd_mgr() {return cmd_mgr;} private final Xob_cmd_mgr cmd_mgr;
public Gfo_usr_dlg Usr_dlg() {return app.Usr_dlg();}
public int Sort_mem_len() {return sort_mem_len;} public Xob_bldr Sort_mem_len_(int v) {sort_mem_len = v; return this;} private int sort_mem_len = 16 * Io_mgr.Len_mb;
public int Dump_fil_len() {return dump_fil_len;} public Xob_bldr Dump_fil_len_(int v) {dump_fil_len = v; return this;} private int dump_fil_len = 1 * Io_mgr.Len_mb;
public int Make_fil_len() {return make_fil_len;} public Xob_bldr Make_fil_len_(int v) {make_fil_len = v; return this;} private int make_fil_len = 64 * Io_mgr.Len_kb;
public Xob_xml_parser Dump_parser() {if (dump_parser == null) this.dump_parser = new Xob_xml_parser(); return dump_parser;}
public Xob_import_marker Import_marker() {return import_marker;} private Xob_import_marker import_marker;
public Xob_wiki_cfg_bldr Wiki_cfg_bldr() {return wiki_cfg_bldr;} private Xob_wiki_cfg_bldr wiki_cfg_bldr;
public void Pause_at_end_(boolean v) {this.pause_at_end = v;}
public void Print_prog_msg(long cur, long end, int pct_idx, String fmt, Object... ary) {
long now = Env_.TickCount(); if (now - prv_prog_time < 100) return;
this.prv_prog_time = now;
if (pct_idx > -1) ary[pct_idx] = DecimalAdp_.CalcPctStr(cur, end, "00.00");
app.Usr_dlg().Prog_many("", "", fmt, ary);
}
public void Run() {
try {
app.Launch(); // HACK: bldr will be called by a gfs file which embeds "bldr.run" inside it; need to call Launch though before Run; DATE:2013-03-23
long time_bgn = Env_.TickCount();
int cmd_mgr_len = cmd_mgr.Len();
for (int i = 0; i < cmd_mgr_len; i++) {
Xob_cmd cmd = cmd_mgr.Get_at(i);
cmd.Cmd_init(this);
}
cmd_mgr_len = cmd_mgr.Len(); // NOTE: refresh len b/c other cmds may have added new ones in Cmd_init
for (int i = 0; i < cmd_mgr_len; i++) {
Xob_cmd cmd = cmd_mgr.Get_at(i);
app.Usr_dlg().Note_many("", "", "cmd bgn: ~{0}", cmd.Cmd_key());
long time_cur = Env_.TickCount();
cmd.Cmd_bgn(this);
cmd.Cmd_run();
cmd.Cmd_end();
Env_.GarbageCollect();
app.Usr_dlg().Note_many("", "", "cmd end: ~{0} ~{1}", cmd.Cmd_key(), TimeSpanAdp_.from_(time_cur).XtoStrUiAbbrv());
}
for (int i = 0; i < cmd_mgr_len; i++) {
Xob_cmd cmd = cmd_mgr.Get_at(i);
cmd.Cmd_term();
}
app.Usr_dlg().Note_many("", "", "bldr done: ~{0}", TimeSpanAdp_.from_(time_bgn).XtoStrUiAbbrv());
cmd_mgr.Clear();
if (pause_at_end && !Env_.Mode_testing()) {ConsoleAdp._.ReadLine("press enter to continue");}
}
catch (Exception e) {throw Exc_.new_exc(e, "bldr", "unknown error");}
}
private void Cancel() {
int cmd_mgr_len = cmd_mgr.Len();
for (int i = 0; i < cmd_mgr_len; i++) {
Xob_cmd cmd = cmd_mgr.Get_at(i);
cmd.Cmd_end();
}
}
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
if (ctx.Match(k, Invk_pause_at_end_)) pause_at_end = m.ReadBoolOrTrue("val");
else if (ctx.Match(k, Invk_cmds)) return cmd_mgr;
else if (ctx.Match(k, Invk_wiki_cfg_bldr)) return wiki_cfg_bldr;
else if (ctx.Match(k, Invk_sort_mem_len_)) sort_mem_len = gplx.ios.Io_size_.Load_int_(m);
else if (ctx.Match(k, Invk_dump_fil_len_)) dump_fil_len = gplx.ios.Io_size_.Load_int_(m);
else if (ctx.Match(k, Invk_make_fil_len_)) make_fil_len = gplx.ios.Io_size_.Load_int_(m);
else if (ctx.Match(k, Invk_run)) Run();
else if (ctx.Match(k, Invk_cancel)) Cancel();
else return GfoInvkAble_.Rv_unhandled;
return this;
}
private static final String
Invk_cmds = "cmds", Invk_wiki_cfg_bldr = "wiki_cfg_bldr"
, Invk_pause_at_end_ = "pause_at_end_", Invk_sort_mem_len_ = "sort_mem_len_", Invk_dump_fil_len_ = "dump_fil_len_", Invk_make_fil_len_ = "make_fil_len_"
, Invk_run = "run", Invk_cancel = "cancel"
;
}
/*
. make_fil_len: max size of made file; EX: /id/..../0000000001.csv will have max len of 64 KB
. dump_fil_len: max size of temp file; EX: /tmp/.../0000000001.csv will have max len of 1 MB
. sort_mem_len: max size of memory for external merge process; note the following
.. a continguous range of memory of that size will be needed: "Bry_bfr.new_(sort_mem_len)" will be called
.. large sort_mem_len will result in smaller number of merge files
... EX: 16 MB will take en.wikipedia.org's 640 MB title files and generate 40 temp files of 8 MB each
.. number of merge files is number of open file channels during merge process
... 40 is a "reasonable" number; the 1st max is 512 (for older windows OS's) and 2048 for Windows XP; Linux seems to be about 7000
.. small sort_mem_len will use smaller buffer; 16 MB / 40 files -> 400 kb buffer for each file
... do not go under max page size for a given row
... for example, a 100 b buffer will fail if a given row is > 100 b (the entire row won't be loaded in memory)
.. smaller buffer will mean more refills which will require more I/O
... EX: 400 kb buffer will require at least 20 refills to read the entire 8 MB file
*/

View File

@@ -0,0 +1,137 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.ios.*; import gplx.xowa.tdbs.*;
public class Xob_make_cmd_site implements Io_make_cmd {
Xob_xdat_file_wtr fil_wtr; Bry_bfr cur_bfr = Bry_bfr.new_(), reg_bfr = Bry_bfr.new_(), reg_key_0 = Bry_bfr.new_(512), reg_key_n = Bry_bfr.new_(512);
int make_fil_max = 65 * Io_mgr.Len_kb, fil_count = 0, itm_count = 0, itm_key_end = 0; Io_url reg_url;
public Xob_make_cmd_site(Gfo_usr_dlg usr_dlg, Io_url make_dir, int make_fil_max) {this.usr_dlg = usr_dlg; this.make_dir = make_dir; this.make_fil_max = make_fil_max;} Gfo_usr_dlg usr_dlg;
public Io_sort_cmd Make_dir_(Io_url v) {make_dir = v; return this;} Io_url make_dir;
public byte Line_dlm() {return line_dlm;} public Xob_make_cmd_site Line_dlm_(byte v) {line_dlm = v; return this;} private byte line_dlm = Byte_ascii.Nil;
public void Sort_bgn() {
fil_count = itm_count = itm_key_end = 0;
reg_url = make_dir.GenSubFil(Xotdb_dir_info_.Name_reg_fil);
fil_wtr = Xob_xdat_file_wtr.new_file_(make_fil_max, make_dir);
}
public void Sort_do(Io_line_rdr rdr) {
if (line_dlm == Byte_ascii.Nil) line_dlm = rdr.Line_dlm();
int rdr_key_bgn = rdr.Key_pos_bgn(), rdr_key_end = rdr.Key_pos_end();
int rdr_key_len = rdr_key_end - rdr_key_bgn;
int rdr_val_bgn = rdr_key_end, /* NOTE: no +1: want to include fld_dlm for below*/ rdr_val_end = rdr.Itm_pos_end() - 1; // -1: ignore rdr_dlm
if (Bry_.Match(cur_bfr.Bfr(), 0, itm_key_end, rdr.Bfr(), rdr_key_bgn, rdr_key_end)) // key is same; add rest of line as val
cur_bfr.Add_mid(rdr.Bfr(), rdr_val_bgn, rdr_val_end);
else {
if (fil_wtr.FlushNeeded(cur_bfr.Len() + rdr_key_len)) Flush();
byte[] bfr = rdr.Bfr();
if (reg_key_0.Len() == 0) {
if (cur_bfr.Len() == 0)
reg_key_0.Add_mid(bfr, rdr_key_bgn, rdr_key_end);
else
reg_key_0.Add_mid(cur_bfr.Bfr(), 0, itm_key_end);
}
if (cur_bfr.Len() > 0) {
reg_key_n.Clear().Add_mid(cur_bfr.Bfr(), 0, itm_key_end);
fil_wtr.Bfr().Add_bfr_and_clear(cur_bfr);
fil_wtr.Add_idx(line_dlm);
}
cur_bfr.Add_mid(rdr.Bfr(), rdr.Itm_pos_bgn(), rdr.Itm_pos_end() - 1); // -1 to ignore closing newline
itm_key_end = rdr_key_len; // NOTE: must be set last
++itm_count;
}
}
public void Do_bry(byte[] bry, int key_bgn, int key_end, int itm_bgn, int itm_end) {
int val_bgn = key_end, /* NOTE: no +1: want to include fld_dlm for below*/ val_end = itm_end - 1; // -1: ignore rdr_dlm
if (Bry_.Match(cur_bfr.Bfr(), 0, itm_key_end, bry, key_bgn, key_end)) // key is same; add rest of line as val
cur_bfr.Add_mid(bry, val_bgn, val_end);
else { // key changed;
int itm_len = itm_end - itm_bgn;
if (cur_bfr.Len() > 0) { // pending itm
fil_wtr.Bfr().Add_bfr_and_clear(cur_bfr); // add cur_bfr to fil_bfr
fil_wtr.Add_idx(line_dlm); // add cur_itm to hdr
if (fil_wtr.FlushNeeded(cur_bfr.Len() + itm_len))
Flush();
}
if (reg_key_0.Len() == 0) // regy.key_0 bfr is empty
reg_key_0.Add_mid(bry, key_bgn, key_end); // update reg_0key_0
reg_key_n.Clear().Add_mid(bry, key_bgn, key_end); // always update reg_key_n
if (itm_len > 100 * Io_mgr.Len_mb)
Flush_large(bry, itm_bgn, itm_end, itm_len);
else {
cur_bfr.Add_mid(bry, itm_bgn, itm_end - 1); // add incoming itm; -1 to ignore closing newline
itm_key_end = key_end; // NOTE: must be set last
++itm_count;
}
}
}
public void Sort_end() {
reg_key_n.Clear().Add_mid(cur_bfr.Bfr(), 0, itm_key_end);
fil_wtr.Bfr().Add_bfr_and_clear(cur_bfr);
fil_wtr.Add_idx(line_dlm);
Flush();
Io_mgr.I.AppendFilBfr(reg_url, reg_bfr);
//fil_wtr.Rls(); cur_bfr.Rls(); fil_wtr.Rls(); reg_bfr.Rls(); reg_key_0.Rls(); reg_key_n.Rls();
}
// private void Flush_large(byte[] bry, int itm_bgn, int itm_end, int itm_len) {
// ++itm_count;
// this.Flush_reg();
// fil_wtr.Add_idx_direct(itm_len, Byte_.Zero);
// IoStream stream = IoStream_.Null;
// try {
// stream = Io_mgr.I.OpenStreamWrite(fil_wtr.Fil_url());
// fil_wtr.FlushIdx(stream);
// stream.Write_and_flush(bry, itm_bgn, itm_end);
// fil_wtr.Clear();
// fil_wtr.Url_gen_add();
// }
// finally {stream.Rls();}
// }
private void Flush_large(byte[] bry, int itm_bgn, int itm_end, int itm_len) {
++itm_count;
this.Flush_reg();
fil_wtr.Add_idx_direct(itm_len, Byte_.Zero);
Io_stream_wtr wtr = null;
try {
wtr = Io_stream_wtr_.file_(fil_wtr.Fil_url());
wtr.Open();
fil_wtr.FlushIdx(wtr);
wtr.Write(bry, itm_bgn, itm_end);
wtr.Flush();
fil_wtr.Clear();
fil_wtr.Url_gen_add();
}
finally {if (wtr != null) wtr.Rls();}
}
private void Flush() {
Flush_reg();
Flush_fil();
}
private void Flush_reg() {
reg_bfr
.Add_int_variable(fil_count++).Add_byte(Byte_ascii.Pipe)
.Add_bfr_and_preserve(reg_key_0).Add_byte(Byte_ascii.Pipe)
.Add_bfr_and_preserve(reg_key_n).Add_byte(Byte_ascii.Pipe)
.Add_int_variable(itm_count).Add_byte(Byte_ascii.Nl);
itm_count = 0;
reg_key_0.Clear();
}
private void Flush_fil() {
if (fil_wtr.Fil_idx() % 10 == 0)
usr_dlg.Prog_many("cmd_site", "prog", "saving: ~{0} ~{1}", reg_url.OwnerDir().NameOnly(), fil_wtr.Fil_url().NameOnly());
fil_wtr.Flush(usr_dlg);
}
}

View File

@@ -0,0 +1,25 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.xowa.wikis.data.tbls.*;
interface Xowd_page_cmd {
String Cmd_key();
void Cmd_bgn(Xob_bldr bldr);
void Cmd_run(Xowd_page_itm page);
void Cmd_end();
}

View File

@@ -0,0 +1,43 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.core.strings.*;
public class Xob_stat_itm implements NewAble {
public String Ns() {return ns;} private String ns;
public int Fils;
public long Size, SizeMax = Int_.MinValue, SizeMin = Int_.MaxValue;
public int SizeMaxIdx, SizeMinIdx;
public void Tally(long size, int idx) {
Fils++;
Size += size;
if (size > SizeMax) {SizeMax = size; SizeMaxIdx = idx;}
if (size < SizeMin) {SizeMin = size; SizeMinIdx = idx;}
}
public void XtoStr(String_bldr sb) {
XtoStr_fld(sb, ns).XtoStr_fld(sb, Fils).XtoStr_fld(sb, Size).XtoStr_fld(sb, SizeMax).XtoStr_fld(sb, SizeMaxIdx).XtoStr_fld(sb, SizeMin);
sb.Add(Int_.Xto_str(SizeMinIdx));
}
Xob_stat_itm XtoStr_fld(String_bldr sb, long v) {sb.Add(Long_.Xto_str(v)).Add(Xob_stat_itm.Dlm); return this;}
Xob_stat_itm XtoStr_fld(String_bldr sb, int v) {sb.Add(Int_.Xto_str(v)).Add(Xob_stat_itm.Dlm); return this;}
Xob_stat_itm XtoStr_fld(String_bldr sb, String v) {sb.Add(v).Add(Xob_stat_itm.Dlm); return this;}
public Xob_stat_itm(String ns) {
this.ns = ns;
}
public Object NewByKey(Object o) {return new Xob_stat_itm((String)o);} public static final Xob_stat_itm _ = new Xob_stat_itm(); Xob_stat_itm() {}
public static final char Dlm = '|';
}

View File

@@ -0,0 +1,68 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.core.strings.*; import gplx.xowa.tdbs.*;
public class Xob_stat_mgr {
public Xob_stat_type GetOrNew(byte tid) {
Xob_stat_type rv = (Xob_stat_type)regy.Get_by(tid);
if (rv == null) {
rv = new Xob_stat_type(tid);
regy.Add(tid, rv);
}
return rv;
}
public String Print(Xow_ns_mgr nsMgr) {
String_bldr sb = String_bldr_.new_();
for (int i = 0; i < regy.Count(); i++) {
Xob_stat_type typ = (Xob_stat_type)regy.Get_at(i);
sb.Add(String_.PadEnd(Xotdb_dir_info_.Tid_name(typ.Tid()), 6, " "));
}
sb.Add_str_w_crlf("ns");
String[] nsAry = GetNmsAry(nsMgr);
for (String ns : nsAry) {
for (int i = 0; i < regy.Count(); i++) {
Xob_stat_type typ = (Xob_stat_type)regy.Get_at(i);
Xob_stat_itm itm = (Xob_stat_itm)typ.GetOrNew(ns);
sb.Add(Int_.Xto_str_pad_bgn_zero(itm.Fils, 5)).Add(" ");
}
sb.Add_str_w_crlf(ns);
}
return sb.XtoStr();
}
public String XtoStr() {
String_bldr sb = String_bldr_.new_();
for (int i = 0; i < regy.Count(); i++) {
Xob_stat_type typ = (Xob_stat_type)regy.Get_at(i);
typ.XtoStr(sb);
}
return sb.XtoStr();
}
String[] GetNmsAry(Xow_ns_mgr nsMgr) {
Ordered_hash nsRegy = Ordered_hash_.new_();
for (int i = 0; i < regy.Count(); i++) {
Xob_stat_type typ = (Xob_stat_type)regy.Get_at(i);
for (int j = 0; j < typ.Count(); j++) {
Xob_stat_itm itm = (Xob_stat_itm)typ.GetAt(j);
if (!nsRegy.Has(itm.Ns()))
nsRegy.Add_as_key_and_val(itm.Ns());
}
}
return (String[])nsRegy.To_ary(String.class);
}
Ordered_hash regy = Ordered_hash_.new_();
}

View File

@@ -0,0 +1,36 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.core.strings.*; import gplx.xowa.tdbs.*;
public class Xob_stat_type {
public byte Tid() {return tid;} private byte tid;
public Xob_stat_type(byte tid) {this.tid = tid;}
public Xob_stat_itm GetOrNew(String ns) {return (Xob_stat_itm)regy.Get_by_or_new(ns, Xob_stat_itm._);}
public Xob_stat_itm GetAt(int i) {return (Xob_stat_itm)regy.Get_at(i);}
public int Count() {return regy.Count();}
public void XtoStr(String_bldr sb) {
for (int i = 0; i < regy.Count(); i++) {
Xob_stat_itm itm = (Xob_stat_itm)regy.Get_at(i);
sb.Add(Xotdb_dir_info_.Tid_name(tid)).Add(Xob_stat_itm.Dlm);
itm.XtoStr(sb);
sb.Add(Byte_ascii.Nl);
}
}
Ordered_hash regy = Ordered_hash_.new_();
public static final Xob_stat_type _ = new Xob_stat_type(); Xob_stat_type() {}
}

View File

@@ -0,0 +1,272 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.ios.*;
public class Xob_xdat_file {
public byte[] Src() {return src;} private byte[] src;
public int Src_len() {return src_len;} public Xob_xdat_file Src_len_(int v) {src_len = v; return this;} private int src_len; // NOTE: src_len can be different than src.length (occurs when reusing brys)
public Xob_xdat_file Update(Bry_bfr bfr, Xob_xdat_itm itm, byte[] v) {
int ary_len = itm_ends.length;
int itm_idx = itm.Itm_idx();
int prv = itm_idx == 0 ? 0 : itm_ends[itm_idx - 1];
int old_end = itm_ends[itm_idx];
int new_end = prv + v.length;
int dif = new_end - old_end;
itm_ends[itm_idx] = new_end;
for (int i = itm_idx + 1; i < ary_len; i++) {
itm_ends[i] += dif;
}
Src_rebuild_hdr(bfr, ary_len);
bfr.Add_mid(src, itm_0_bgn, itm.Itm_bgn());
bfr.Add(v);
bfr.Add_mid(src, itm.Itm_end() + 1, src.length); // NOTE: + 1 to skip nl
src = bfr.Xto_bry_and_clear();
return this;
}
byte[][] Src_extract_brys(int ary_len) {
byte[][] rv = new byte[ary_len][];
int itm_bgn = this.itm_0_bgn;
for (int i = 0; i < ary_len; i++) {
int itm_end = itm_ends[i] + itm_0_bgn;
rv[i] = Bry_.Mid(src, itm_bgn, itm_end);
itm_bgn = itm_end;
}
return rv;
}
public void Sort(Bry_bfr bfr, gplx.lists.ComparerAble comparer) {
int ary_len = itm_ends.length;
byte[][] brys = Src_extract_brys(ary_len);
Array_.Sort(brys, comparer);
Src_rebuild_hdr(bfr, ary_len);
itm_0_bgn = (ary_len * Len_idx_itm) + Len_itm_dlm;
int itm_bgn = 0;
for (int i = 0; i < ary_len; i++) {
byte[] bry = brys[i];
int bry_len = bry.length;
int itm_end = itm_bgn + bry_len;
itm_ends[i] = itm_end;
itm_bgn = itm_end;
bfr.Add(bry);
}
src = bfr.Xto_bry_and_clear();
}
public void Insert(Bry_bfr bfr, byte[] itm) {
int ary_len = itm_ends.length;
itm_ends = (int[])Array_.Resize(itm_ends, ary_len + 1);
int prv_pos = ary_len == 0 ? 0 : itm_ends[ary_len - 1];
itm_ends[ary_len] = prv_pos + itm.length;
Src_rebuild(bfr, ary_len + 1, itm);
}
private void Src_rebuild_hdr(Bry_bfr bfr, int ary_len) {
int bgn = 0;
for (int i = 0; i < ary_len; i++) {
int end = itm_ends[i];
int len = end - bgn;
bfr.Add_base85_len_5(len).Add_byte(Dlm_hdr_fld);
bgn = end;
}
bfr.Add_byte(Dlm_row);
}
private void Src_rebuild(Bry_bfr bfr, int ary_len, byte[] new_itm) {
Src_rebuild_hdr(bfr, ary_len);
Src_rebuild_brys(bfr, ary_len, new_itm);
}
private void Src_rebuild_brys(Bry_bfr bfr, int ary_len, byte[] new_itm) {
int bgn = itm_0_bgn;
boolean insert = new_itm != null;
int ary_end = insert ? ary_len - 1 : ary_len;
for (int i = 0; i < ary_end; i++) {
int end = itm_ends[i] + itm_0_bgn;
bfr.Add_mid(src, bgn, end);
bgn = end;
}
if (insert) bfr.Add(new_itm);
itm_0_bgn = (ary_len * Len_idx_itm) + Len_itm_dlm;
src = bfr.Xto_bry_and_clear();
} static final byte Dlm_hdr_fld = Byte_ascii.Pipe, Dlm_row = Byte_ascii.Nl;
public void Save(Io_url url) {
Bry_bfr bfr = Bry_bfr.new_();
Srl_save_bry(bfr);
Io_stream_wtr wtr = Io_stream_wtr_.new_by_url_(url);
try {
wtr.Open();
wtr.Write(bfr.Bfr(), 0, bfr.Len());
wtr.Flush();
}
catch (Exception e) {throw Exc_.new_exc(e, "xo", "failed to save file", "url", url.Xto_api());}
finally {
wtr.Rls();
}
}
public void Srl_save_bry(Bry_bfr bfr) {
int itm_ends_len = itm_ends.length;
int prv_bgn = 0;
for (int i = 0; i < itm_ends_len; i++) {
int itm_end = itm_ends[i];
bfr.Add_base85_len_5(itm_end - prv_bgn).Add_byte(Dlm_hdr_fld);
prv_bgn = itm_end;
}
bfr.Add_byte(Dlm_row);
bfr.Add_mid(src, itm_0_bgn, src.length);
}
public byte[] Get_bry(int i) {
int bgn = i == 0 ? itm_0_bgn : itm_0_bgn + itm_ends[i - 1];
int end = itm_0_bgn + itm_ends[i];
return Bry_.Mid(src, bgn, end);
}
public int Count() {return itm_ends.length;}
public Xob_xdat_file GetAt(Xob_xdat_itm itm, int idx) {
itm.Src_(src);
itm.Itm_idx_(idx);
itm.Itm_bgn_(itm_0_bgn + (idx == 0 ? 0 : itm_ends[idx - 1]));
itm.Itm_end_(itm_0_bgn + itm_ends[idx] - Len_itm_dlm);
return this;
}
public Xob_xdat_file Find(Xob_xdat_itm itm, byte[] lkp, int lkp_bgn, byte lkp_dlm, boolean exact) {
itm.Clear();
int itm_idx = Xob_xdat_file_.BinarySearch(itm_0_bgn, src, itm_ends, lkp, lkp_bgn, lkp_dlm, 1, exact, itm); if (itm_idx == String_.Find_none) {return this;}
GetAt(itm, itm_idx);
return this;
}
public Xob_xdat_file Clear() {src = null; itm_ends = Int_.Ary_empty; return this;}
private int[] itm_ends = Int_.Ary_empty; private int itm_0_bgn;
public Xob_xdat_file Parse(byte[] src, int src_len, Io_url url) {// SEE:NOTE_1;xdat format
if (src_len == 0) throw Exc_.new_("file cannot be empty for parse", "url", url.Raw());
int itm_count = 0, tmp_len = Parse_tmp_len; int[] tmp = Parse_tmp;
try {
int slot_bgn = 0, slot_old = 0, slot_new = 0;
while (true) {
slot_bgn = itm_count * Len_idx_itm;
if (slot_bgn >= src_len) break;
if (src[slot_bgn] == Byte_ascii.Nl) break;
int tmp_val = Base85_utl.XtoIntByAry(src, slot_bgn, slot_bgn + Offset_base85);
slot_new = slot_old + tmp_val;
int new_idx = itm_count + 1;
if (tmp_len < new_idx) {
tmp_len = new_idx * 2;
tmp = (int[])Array_.Resize(tmp, tmp_len);
}
tmp[itm_count] = slot_new;
itm_count = new_idx;
slot_old = slot_new;
}
int itm_ends_last = slot_new;
itm_ends = new int[itm_count];
for (int i = 0; i < itm_count; i++)
itm_ends[i] = tmp[i];
itm_0_bgn = slot_bgn + Len_itm_dlm;
this.src = Bry_.Mid(src, 0, itm_ends_last + itm_0_bgn);
} catch (Exception e) {throw Exc_.new_exc(e, "xo", "failed to parse idx", "itm_count", itm_count, "url", url.Raw());}
return this;
} private static final int Parse_tmp_len = 8 * 1024; static int[] Parse_tmp = new int[Parse_tmp_len];
static final int Len_itm_dlm = 1, Len_idx_itm = 6, Offset_base85 = 4; // 6 = 5 (base85_int) + 1 (new_line/pipe)
static final String GRP_KEY = "xowa.xdat_fil";
public static byte[] Rebuid_header(byte[] orig, byte[] dlm) {
byte[][] rows = Bry_.Split(orig, dlm);
int rows_len = rows.length;
Bry_bfr bfr = Bry_bfr.new_();
int dlm_len = dlm.length;
for (int i = 1; i < rows_len; i++) { // i=1; skip 1st row (which is empty header)
byte[] row = rows[i];
int row_len = row.length + dlm_len;
bfr.Add_base85_len_5(row_len).Add_byte(Byte_ascii.Pipe);
}
bfr.Add_byte(Byte_ascii.Nl);
for (int i = 1; i < rows_len; i++) { // i=1; skip 1st row (which is empty header)
byte[] row = rows[i];
bfr.Add(row);
bfr.Add(dlm);
}
return bfr.Xto_bry_and_clear();
}
}
class Xob_xdat_file_ {
public static int BinarySearch(int itm_0_bgn, byte[] src, int[] itm_ends, byte[] lkp, int lkp_bgn, byte lkp_dlm, int itm_end_adj, boolean exact, Xob_xdat_itm xdat_itm) {if (lkp == null) throw Exc_.new_null("lkp is null");
int itm_ends_len = itm_ends.length; if (itm_ends_len == 0) throw Exc_.new_("itm_ends_len cannot have 0 itms");
int lo = -1, hi = itm_ends_len - 1; // NOTE: -1 is necessary; see test
int itm_idx = (hi - lo) / 2;
int lkp_len = lkp.length;
int delta = 1;
boolean flagged = false;
while (true) {
int itm_bgn = itm_0_bgn + (itm_idx == 0 ? 0 : itm_ends[itm_idx - 1]);
int itm_end = itm_0_bgn + itm_ends[itm_idx] - itm_end_adj; // itm_end_adj to handle ttl .xdat and trailing \n
int fld_bgn = itm_bgn + lkp_bgn, lkp_pos = -1;
int comp = CompareAble_.Same;
for (int i = fld_bgn; i < itm_end; i++) { // see if current itm matches lkp; NOTE: that i < itm_end but will end much earlier (since itm_end includes page text)
byte b = src[i];
if (b == lkp_dlm) { // fld is done
if (lkp_pos != lkp_len - 1) comp = CompareAble_.More; // lkp has more chars than itm; lkp_dlm reached early
break;
}
lkp_pos = i - fld_bgn;
if (lkp_pos >= lkp_len) {
comp = CompareAble_.Less; // lkp has less chars than itm
break;
}
comp = (lkp[lkp_pos] & 0xff) - (b & 0xff); // subtract src[i] from lkp[lkp_pos] // PATCH.JAVA:need to convert to unsigned byte
if (comp != CompareAble_.Same) break; // if comp != 0 then not equal; break; otherwise if bytes are the same, then comp == 0;
}
if (comp > CompareAble_.Same || (comp == CompareAble_.Same && itm_end - fld_bgn < lkp_len)) {lo = itm_idx; delta = 1;}
else if (comp == CompareAble_.Same) {xdat_itm.Found_exact_y_(); return itm_idx;}
else if (comp < CompareAble_.Same) {hi = itm_idx; delta = -1;}
int itm_dif = hi - lo;
// if (itm_end - 1 > fld_bgn) Tfds.Write(comp, itm_dif, String_.new_u8(src, fld_bgn, itm_end - 1));
switch (itm_dif) {
case 0: return exact ? String_.Find_none : hi; // NOTE: can be 0 when src.length == 1 || 2; also, sometimes 0 in some situations
case -1:
if (flagged) return exact ? String_.Find_none : lo;
else {
itm_idx--;
flagged = true;
}
break;
case 1:
if (flagged) return exact ? String_.Find_none : hi;
else {
itm_idx++; // ++ to always take higher value when !exact???; EX: "ab,ad,af"
if (itm_idx >= itm_ends_len) return String_.Find_none; // NOTE: occurs when there is only 1 item
flagged = true;
}
break;
default:
itm_idx += ((itm_dif / 2) * delta);
break;
}
}
}
}
/*
NOTE_1:xdat format
line 0 : delimited String of article lengths; EX: "00012|00004|00005\n"
line 1+: articles
pseudo example: (note that ints/dates will be replaced with base85 variants)
== BOF ==
00025|00024|00026
2006-01-01 Ttl1 Abcd
2006-02-01 Ttl2 Abc
2006-03-01 Ttl3 Abcde
== EOF ==
other notes:
. itm_len is entire length of article including text, title, date and any other fields
. line 0 uses len instead of bgn or end b/c len is independent (single len can be changed without having to recalculate entire array)
. however, note that in memory, itm_end_ary will be stored; this will make article extraction quicker: getting nth article means getting nth item in array;
. Parse is written for speed, not correctness; if correctness is needed, write a separate method that validates and call it before calling parse
*/

View File

@@ -0,0 +1,118 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import org.junit.*; import gplx.core.strings.*;
public class Xob_xdat_file_tst {
@Test public void Find() {
Xob_xdat_file rdr = rdr_("!!!!%|!!!!%|!!!!%|!!!!%|!!!!%|", "0|b", "1|d", "2|f", "3|h", "4|j");
tst_ReadAt(rdr, 0, "0|b");
tst_ReadAt(rdr, 1, "1|d");
tst_ReadAt(rdr, 2, "2|f");
tst_ReadAt(rdr, 3, "3|h");
tst_ReadAt(rdr, 4, "4|j");
tst_Find(rdr, "b", 0, false);
tst_Find(rdr, "j", 4, false);
tst_Find(rdr, "a", 0, false);
tst_Find(rdr, "c", 1, false);
tst_Find(rdr, "k", 4, false);
}
@Test public void Update() {
Xob_xdat_file rdr = rdr_("!!!!%|!!!!%|!!!!%|!!!!%|!!!!%|", "0|b", "1|d", "2|f", "3|h", "4|j");
tst_Update(rdr, 3, "3|h1\n", String_.Concat_lines_nl_skip_last
( "!!!!%|!!!!%|!!!!%|!!!!&|!!!!%|"
, "0|b"
, "1|d"
, "2|f"
, "3|h1"
, "4|j"
, ""
));
}
@Test public void Insert() {
Xob_xdat_file rdr = rdr_("!!!!%|!!!!%|!!!!%|!!!!%|!!!!%|", "0|b", "1|d", "2|f", "3|h", "4|j");
tst_Insert(rdr, "5|k\n", String_.Concat_lines_nl_skip_last
( "!!!!%|!!!!%|!!!!%|!!!!%|!!!!%|!!!!%|"
, "0|b"
, "1|d"
, "2|f"
, "3|h"
, "4|j"
, "5|k"
, ""
));
}
@Test public void Sort() {
Xob_xdat_file rdr = rdr_("!!!!%|!!!!%|!!!!%|!!!!%|!!!!%|", "4|j", "2|f", "0|b", "1|d", "3|h");
Bry_comparer_bgn_eos comparer = new Bry_comparer_bgn_eos(2);
tst_Sort(rdr, comparer, String_.Concat_lines_nl_skip_last
( "!!!!%|!!!!%|!!!!%|!!!!%|!!!!%|"
, "0|b"
, "1|d"
, "2|f"
, "3|h"
, "4|j"
, ""
));
}
@Test public void Rebuild_header() {
String orig = String_.Concat_lines_nl("" , "4|j", "2|f", "0|b", "1|d", "3|h");
String expd = String_.Concat_lines_nl("!!!!%|!!!!%|!!!!%|!!!!%|!!!!%|" , "4|j", "2|f", "0|b", "1|d", "3|h");
Rebuild_header_tst(orig, expd);
}
private void Rebuild_header_tst(String orig, String expd) {
Tfds.Eq_str_lines(expd, String_.new_a7(Xob_xdat_file.Rebuid_header(Bry_.new_a7(orig), Bry_.new_a7("\n"))));
}
Bry_bfr tmp = Bry_bfr.new_();
private void tst_Sort(Xob_xdat_file rdr, gplx.lists.ComparerAble comparer, String expd) {
rdr.Sort(tmp, comparer);
Chk_file(rdr, expd);
}
private void tst_Insert(Xob_xdat_file rdr, String new_val, String expd) {
rdr.Insert(tmp, Bry_.new_u8(new_val));
Chk_file(rdr, expd);
}
private void tst_Update(Xob_xdat_file rdr, int idx, String new_val, String expd) {
Xob_xdat_itm itm = new Xob_xdat_itm();
rdr.GetAt(itm, idx);
rdr.Update(tmp, itm, Bry_.new_u8(new_val));
Chk_file(rdr, expd);
}
private void Chk_file(Xob_xdat_file rdr, String expd) {
Io_url url = Io_url_.new_fil_("mem/test.xdat");
rdr.Save(url);
String actl = Io_mgr.I.LoadFilStr(url);
Tfds.Eq_str_lines(expd, actl);
}
private void tst_Find(Xob_xdat_file rdr, String find, int expd, boolean exact) {
rdr.Find(itm, Bry_.new_u8(find), 2, Byte_ascii.Nl, exact);
int id = Bry_.Xto_int_or(Bry_.Mid(itm.Itm_bry(), 0, 1), -1);
Tfds.Eq(expd, id);
}
private void tst_ReadAt(Xob_xdat_file rdr, int i, String expd) {rdr.GetAt(itm, i); Tfds.Eq(expd, String_.new_u8(itm.Itm_bry()));}
Xob_xdat_itm itm = new Xob_xdat_itm();
Xob_xdat_file rdr_(String... lines) {
String_bldr sb = String_bldr_.new_();
int len = lines.length;
for (int i = 0; i < len; i++) {
String line = lines[i];
sb.Add(line).Add_char_nl();
}
byte[] bry = Bry_.new_u8(sb.XtoStr());
return new Xob_xdat_file().Parse(bry, bry.length, Io_url_.Empty);
}
}

View File

@@ -0,0 +1,148 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.ios.*; import gplx.xowa.tdbs.*;
public class Xob_xdat_file_wtr {
public static Xob_xdat_file_wtr new_file_(int fil_max, Io_url root_dir) {return new Xob_xdat_file_wtr(fil_max, root_dir, Io_stream_.Tid_raw);}
public static Xob_xdat_file_wtr new_by_tid_(int fil_max, Io_url root_dir, byte dir_tid, byte tid) {return new Xob_xdat_file_wtr(fil_max, root_dir.GenSubDir(Xotdb_dir_info_.Tid_name(dir_tid) + Xotdb_dir_info.Wtr_dir(tid)), tid);}
Xob_xdat_file_wtr(int fil_max, Io_url root_dir, byte wtr_tid) {
this.fil_max = fil_max;
this.root_dir = root_dir;
fil_ext = Xotdb_dir_info.Wtr_ext(wtr_tid);
bfr = Bry_bfr.new_(fil_max);
idx = new int[fil_max / 8]; // ASSUME: any given row must at least be 8 bytes long
Url_gen(fil_idx); // set 1st url
wtr = Io_stream_wtr_.new_by_tid_(wtr_tid);
} int fil_max; Io_stream_wtr wtr; byte[] fil_ext;
public int Fil_idx() {return fil_idx;} public Xob_xdat_file_wtr Fil_idx_(int v) {fil_idx = v; return this;} private int fil_idx;
public int Ns_ord_idx() {return ns_ord_idx;} public Xob_xdat_file_wtr Ns_ord_idx_(int v) {ns_ord_idx = v; return this;} private int ns_ord_idx; // NOTE: optional; needed for page cmd which will flush all wtrs, but needs ns_idx for stats
@gplx.Internal protected Io_url Fil_url() {return fil_url;}
@gplx.Internal protected int[] Idx() {return idx;} private int[] idx;
public int Idx_pos() {return idx_pos;} private int idx_pos;
public Bry_bfr Bfr() {return bfr;} Bry_bfr bfr;
public Xob_xdat_file_wtr Add_idx(byte data_dlm) {return Add_idx_direct(bfr.Len(), data_dlm);}
public Xob_xdat_file_wtr Add_idx_direct(int itm_len, byte data_dlm) {
if (data_dlm != Byte_ascii.Nil) { // write closing dlm for data_eny, unless Byte_.Null passed in
bfr.Add_byte(data_dlm);
++itm_len;
}
if (idx_pos + 1 > idx.length) Idx_resize(idx.length * 2); // resize hdr if necessary
idx[idx_pos++] = itm_len;
return this;
}
public int Fil_len() {return ((idx_pos ) * Len_idx_itm) + bfr.Len();}
public boolean FlushNeeded(int writeLen) {return ((idx_pos + 1) * Len_idx_itm) + bfr.Len() + writeLen > fil_max;} // +1: pending entry will create new idx_itm
public void Flush(Gfo_usr_dlg usr_dlg) {
if (bfr.Len() == 0) return; // nothing to flush
if (this.Fil_len() > fil_max) // NOTE: data can exceed proscribed len; EX: wikt:Category for Italian nouns is 1 MB+
usr_dlg.Log_many(GRP_KEY, "flush_err", "--ctg exceeds len: expd_len=~{0} actl_len=~{1} url=~{2}", this.Fil_len(), fil_max, fil_url.Xto_api());
try {
wtr.Url_(fil_url).Open();
if (idx_pos > 0) // write idx; NOTE: if idx written, then .xdat; else .csv
FlushIdx(wtr);
wtr.Write(bfr.Bfr(), 0, bfr.Len()); // write data;
wtr.Flush();
}
finally {wtr.Rls();}
Clear();
this.Url_gen(++fil_idx);
}
public void FlushIdx(Io_stream_wtr wtr) {
int idx_bry_len = (idx_pos * Len_idx_itm) + 1; // 1=\n.length
byte[] idx_bry = new byte[idx_bry_len];
int prv_pos = 0; // NOTE: prv_pos needed b/c idx[] stores data_end, not data_len
for (int i = 0; i < idx_pos; i++) {
int idx_bry_pos = i * Len_idx_itm;
int cur_pos = idx[i];
Base85_utl.XtoStrByAry(cur_pos - prv_pos, idx_bry, idx_bry_pos, Len_base85);
idx_bry[idx_bry_pos + Len_base85] = Dlm_fld;
prv_pos = cur_pos;
}
idx_bry[idx_bry_len - 1] = Byte_ascii.Nl;
wtr.Write(idx_bry, 0, idx_bry_len);
}
// public void Flush(Gfo_usr_dlg usr_dlg) {
// if (bfr.Len() == 0) return; // nothing to flush
// if (this.Fil_len() > fil_max) // NOTE: data can exceed proscribed len; EX: wikt:Category for Italian nouns is 1 MB+
// usr_dlg.Log_many(GRP_KEY, "flush_err", "--ctg exceeds len: expd_len=~{0} actl_len=~{1} url=~{2}", this.Fil_len(), fil_max, fil_url.Xto_api());
// IoStream stream = IoStream_.Null;
// try {
// stream = Io_mgr.I.OpenStreamWrite(fil_url);
// if (idx_pos > 0) // write idx; NOTE: if idx written, then .xdat; else .csv
// FlushIdx(stream);
// stream.Write(bfr.Bry(), 0, bfr.Len()); // write data;
// }
// finally {stream.Rls();}
// Clear();
// this.Url_gen(++fil_idx);
// }
// public void FlushIdx(IoStream stream) {
// int idx_bry_len = (idx_pos * Len_idx_itm) + 1; // 1=\n.length
// byte[] idx_bry = new byte[idx_bry_len];
// int prv_pos = 0; // NOTE: prv_pos needed b/c idx[] stores data_end, not data_len
// for (int i = 0; i < idx_pos; i++) {
// int idx_bry_pos = i * Len_idx_itm;
// int cur_pos = idx[i];
// Base85_utl.XtoStrByAry(cur_pos - prv_pos, idx_bry, idx_bry_pos, Len_base85);
// idx_bry[idx_bry_pos + Len_base85] = Dlm_idx;
// prv_pos = cur_pos;
// }
// idx_bry[idx_bry_len - 1] = Byte_ascii.Nl;
// stream.Write(idx_bry, 0, idx_bry_len);
// }
static final int Len_idx_itm = 6, Len_base85 = 5;
public void Clear() {idx_pos = 0; bfr.Clear();}
public void Rls() {bfr.Rls(); idx = null;}
public void Url_gen_add() {Url_gen(++fil_idx);}
private void Url_gen(int newIdx) {fil_url = Xotdb_fsys_mgr.Url_fil(root_dir, newIdx, fil_ext);} Io_url fil_url; Io_url root_dir;
private void Idx_resize(int newLen) {idx = (int[])Array_.Resize(idx, newLen);}
static final String GRP_KEY = "xowa.bldr.xdat_wtr";
private static final byte Dlm_fld = Byte_ascii.Pipe;
}
class SortAlgo_quick {// quicksort
Object[] ary; int ary_len; gplx.lists.ComparerAble comparer;
public void Sort(Object[] ary, int ary_len, gplx.lists.ComparerAble comparer) {
if (ary == null || ary_len < 2) return;
this.ary = ary; this.ary_len = ary_len; this.comparer = comparer;
Sort_recurse(0, ary_len - 1);
}
private void Sort_recurse(int lo, int hi) {
int i = lo, j = hi;
int mid_idx = lo + (hi-lo)/2;
Object mid = ary[mid_idx]; // get mid itm
while (i <= j) { // divide into two lists
while (comparer.compare(ary[i], mid) == CompareAble_.Less) // if lhs.cur < mid, then get next from lhs
i++;
while (comparer.compare(ary[j], mid) == CompareAble_.More) // if rhs.cur > mid, then get next from rhs
j--;
// lhs.cur > mid && rhs.cur < mid; switch lhs.cur and rhs.cur; increase i and j
if (i <= j) {
Object tmp = ary[i];
ary[i] = ary[j];
ary[j] = tmp;
i++;
j--;
}
}
if (lo < j) Sort_recurse(lo, j);
if (i < hi) Sort_recurse(i, hi);
}
public static final SortAlgo_quick _ = new SortAlgo_quick(); SortAlgo_quick() {}
}

View File

@@ -0,0 +1,54 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import org.junit.*;
public class Xob_xdat_file_wtr_tst {
@Test public void Write() {
Io_mgr.I.InitEngine_mem();
Io_url dir = Io_url_.mem_dir_("mem/dir");
Xob_xdat_file_wtr wtr = Xob_xdat_file_wtr.new_file_(1000, dir);
tst_Write(wtr, "<1", "<1");
tst_Write(wtr, ">a", "<1>a");
tst_Write(wtr, ">b", "<1>a>b");
tst_Add_idx(wtr, 7);
wtr.Bfr().Add(Bry_.new_u8("<2>b>cc"));
tst_Add_idx(wtr, 15);
wtr.Bfr().Add(Bry_.new_u8("<3>c>dd"));
tst_Add_idx(wtr, 23);
tst_Flush(wtr, String_.Concat
( "!!!!(|!!!!)|!!!!)|\n"
, "<1>a>b\n"
, "<2>b>cc\n"
, "<3>c>dd\n"
));
}
private void tst_Write(Xob_xdat_file_wtr wtr, String val, String expd) {
wtr.Bfr().Add(Bry_.new_u8(val));
Tfds.Eq(expd, String_.new_u8(wtr.Bfr().Bfr(), 0, wtr.Bfr().Len()));
}
private void tst_Add_idx(Xob_xdat_file_wtr wtr, int expd) {
wtr.Add_idx(Byte_ascii.Nl);
Tfds.Eq(expd, wtr.Idx()[wtr.Idx_pos() - 1]);
}
private void tst_Flush(Xob_xdat_file_wtr wtr, String expd) {
Io_url url = wtr.Fil_url();
wtr.Flush(Gfo_usr_dlg_.Test());
String actl = Io_mgr.I.LoadFilStr(url);
Tfds.Eq(expd, actl);
}
}

View File

@@ -0,0 +1,29 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
public class Xob_xdat_itm {
public byte[] Src() {return src;} public Xob_xdat_itm Src_(byte[] v) {src = v; return this;} private byte[] src;
public int Itm_bgn() {return itm_bgn;} public Xob_xdat_itm Itm_bgn_(int v) {itm_bgn = v; return this;} private int itm_bgn = -1;
public int Itm_end() {return itm_end;} public Xob_xdat_itm Itm_end_(int v) {itm_end = v; return this;} private int itm_end = -1;
public int Itm_idx() {return itm_idx;} public Xob_xdat_itm Itm_idx_(int v) {itm_idx = v; return this;} private int itm_idx = -1;
public void Clear() {itm_bgn = itm_end = itm_idx = -1; src = null; found_exact = false;}
public boolean Found_exact() {return found_exact;} private boolean found_exact;
public Xob_xdat_itm Found_exact_y_() {found_exact = true; return this;}
public boolean Missing() {return itm_bgn == -1;}
public byte[] Itm_bry() {return Bry_.Mid(src, itm_bgn, itm_end);}
}

View File

@@ -0,0 +1,63 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.core.btries.*; import gplx.xowa.wikis.data.tbls.*;
public class Xobd_parser implements Xobd_wkr {
private Btrie_slim_mgr trie = Btrie_slim_mgr.ci_ascii_(); // NOTE:ci.ascii:MW_const.en; ctg.v1 assumes [[Category:
private List_adp wkr_list = List_adp_.new_();
public String Wkr_key() {return KEY;} static final String KEY = "page_parser";
public void Wkr_ini(Xob_bldr bldr) {}
public void Wkr_add(Xobd_parser_wkr wkr) {wkr_list.Add(wkr);}
public void Wkr_bgn(Xob_bldr app) {
int wkr_list_len = wkr_list.Count();
for (int i = 0; i < wkr_list_len; i++) {
Xobd_parser_wkr wkr = (Xobd_parser_wkr)wkr_list.Get_at(i);
wkr.Wkr_bgn(app);
int hooks_len = wkr.Wkr_hooks().Count();
for (int j = 0; j < hooks_len; j++) {
byte[] bry = (byte[])wkr.Wkr_hooks().Get_at(j);
trie.Add_obj(bry, wkr);
}
}
}
public void Wkr_run(Xowd_page_itm page) {
byte[] src = page.Text(); int src_len = src.length;
int pos = 0;
while (true) {
if (pos == src_len) break;
Object o = trie.Match_bgn(src, pos, src_len);
if (o == null)
++pos;
else {
Xobd_parser_wkr wkr = (Xobd_parser_wkr)o;
pos = wkr.Wkr_run(page, src, src_len, pos, trie.Match_pos());
}
}
}
public void Wkr_end() {
int wkr_list_len = wkr_list.Count();
for (int i = 0; i < wkr_list_len; i++) {
Xobd_parser_wkr wkr = (Xobd_parser_wkr)wkr_list.Get_at(i);
wkr.Wkr_end();
}
}
public void Wkr_print() {}
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
throw Exc_.new_unimplemented();
}
}

View File

@@ -0,0 +1,25 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.xowa.wikis.data.tbls.*;
public interface Xobd_parser_wkr extends GfoInvkAble {
Ordered_hash Wkr_hooks();
void Wkr_bgn(Xob_bldr bldr);
int Wkr_run(Xowd_page_itm page, byte[] src, int src_len, int bgn, int end);
void Wkr_end();
}

View File

@@ -0,0 +1,101 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.ios.*; import gplx.xowa.bldrs.*; import gplx.xowa.bldrs.xmls.*; import gplx.xowa.wikis.data.tbls.*;
public class Xobd_rdr implements Xob_cmd {
private Xob_bldr bldr; private Xowe_wiki wiki;
public Xobd_rdr(Xob_bldr bldr, Xowe_wiki wiki) {this.bldr = bldr; this.wiki = wiki;}
public String Cmd_key() {return KEY;} public static final String KEY = "dump_mgr";
public void Cmd_init(Xob_bldr bldr) {
Xobd_wkr[] wkr_ary = (Xobd_wkr[])wkrs.To_ary(Xobd_wkr.class); int wkr_ary_len = wkr_ary.length;
for (int i = 0; i < wkr_ary_len; i++)
wkr_ary[i].Wkr_ini(bldr);
}
public void Cmd_bgn(Xob_bldr bldr) {}
public void Cmd_end() {}
public void Cmd_run() {
Xobd_wkr[] wkr_ary = (Xobd_wkr[])wkrs.To_ary(Xobd_wkr.class); int wkr_ary_len = wkr_ary.length;
for (int i = 0; i < wkr_ary_len; i++)
wkr_ary[i].Wkr_bgn(bldr);
Io_buffer_rdr fil = Io_buffer_rdr.Null; Xowd_page_itm page = new Xowd_page_itm(); Xow_ns_mgr ns_mgr = wiki.Ns_mgr();
Xob_xml_parser parser = bldr.Dump_parser().Data_bfr_len_(Io_mgr.Len_mb);
long fil_len = 0;
Gfo_usr_dlg usr_dlg = bldr.App().Usr_dlg();
try {
gplx.ios.Io_stream_rdr src_rdr = wiki.Import_cfg().Src_rdr();
fil = Io_buffer_rdr.new_(src_rdr, optRdrBfrSize);
fil_len = fil.Fil_len();
if (src_rdr.Tid() == gplx.ios.Io_stream_.Tid_bzip2) fil_len = (fil_len * 100) / 18; // HACK: no way to get actual file progress; assume 18% compression
// fil.Seek(bldr.Opts().ResumeAt());
int prv_pos = 0;
while (true) {
int cur_pos = parser.Parse_page(page, usr_dlg, fil, fil.Bfr(), prv_pos, ns_mgr); if (cur_pos == Bry_.NotFound) break;
if (cur_pos < prv_pos)
bldr.Print_prog_msg(fil.Fil_pos(), fil_len, 1, optRdrFillFmt, Int_.Xto_str_pad_bgn_zero((int)(fil.Fil_pos() / Io_mgr.Len_mb), Int_.DigitCount((int)(fil.Fil_len() / Io_mgr.Len_mb))), "", String_.new_u8(page.Ttl_full_db()));
prv_pos = cur_pos;
try {
for (int i = 0; i < wkr_ary_len; i++)
wkr_ary[i].Wkr_run(page);
}
catch (Exception e) {
Exc_.Noop(e);
long dividend = fil.Fil_pos();
if (dividend >= fil_len) dividend = fil_len - 1; // prevent % from going over 100
String msg = DecimalAdp_.CalcPctStr(dividend, fil_len, "00.00") + "|" + String_.new_u8(page.Ttl_full_db()) + "|\n" + Err_.Message_lang(e) + "|" + Xot_tmpl_wtr.Err_string; Xot_tmpl_wtr.Err_string = "";
bldr.Usr_dlg().Log_wkr().Log_to_session(msg);
ConsoleAdp._.WriteLine(msg);
}
}
}
catch (Exception e) {
String msg = Err_.Message_lang(e);
bldr.Usr_dlg().Log_wkr().Log_to_session(msg);
ConsoleAdp._.WriteLine(msg);
throw Exc_.new_exc(e, "xo", "error while reading dump");
}
finally {fil.Rls();}
bldr.Usr_dlg().Prog_none("", "", "reading completed: performing post-processing clean-up");
for (int i = 0; i < wkr_ary_len; i++)
wkr_ary[i].Wkr_end();
}
public void Cmd_term() {
Xobd_wkr[] wkr_ary = (Xobd_wkr[])wkrs.To_ary(Xobd_wkr.class); int wkr_ary_len = wkr_ary.length;
for (int i = 0; i < wkr_ary_len; i++)
wkr_ary[i].Wkr_print();
}
public void Wkr_add(Xobd_wkr wkr) {wkrs.Add(wkr.Wkr_key(), wkr);} private Ordered_hash wkrs = Ordered_hash_.new_();
public Xobd_wkr Wkr_get(String key) {return (Xobd_wkr)wkrs.Get_by(key);}
public Xobd_parser Page_parser_assert() {
if (page_parser == null) {
page_parser = new Xobd_parser();
this.Wkr_add(page_parser);
}
return page_parser;
} private Xobd_parser page_parser;
public static Io_url Find_fil_by(Io_url dir, String filter) {
Io_url[] fil_ary = Io_mgr.I.QueryDir_args(dir).FilPath_(filter).ExecAsUrlAry();
int fil_ary_len = fil_ary.length;
return fil_ary_len == 0 ? null : fil_ary[fil_ary_len - 1]; // return last
}
int optRdrBfrSize = 8 * Io_mgr.Len_mb;
String optRdrFillFmt = "reading ~{0} MB: ~{1} ~{2}";
static final String GRP_KEY = "xowa.bldr.rdr";
public Object Invk(GfsCtx ctx, int ikey, String k, GfoMsg m) {
throw Exc_.new_unimplemented();
}
}

View File

@@ -0,0 +1,27 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.xowa.wikis.data.tbls.*;
public interface Xobd_wkr extends GfoInvkAble {
String Wkr_key();
void Wkr_ini(Xob_bldr bldr);
void Wkr_bgn(Xob_bldr bldr);
void Wkr_run(Xowd_page_itm page);
void Wkr_end();
void Wkr_print();
}

View File

@@ -0,0 +1,41 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.ios.*; import gplx.lists.*;
public class Xobdc_merger {
public static void Basic(Gfo_usr_dlg usr_dlg, Io_url_gen dump_url_gen, Io_url sort_dir, int memory_max, Io_line_rdr_key_gen key_gen, Io_sort_cmd make_cmd) {Basic(usr_dlg, dump_url_gen, sort_dir, memory_max, Io_sort_split_itm_sorter._, key_gen, make_cmd);}
public static void Basic(Gfo_usr_dlg usr_dlg, Io_url_gen dump_url_gen, Io_url sort_dir, int memory_max, ComparerAble row_comparer, Io_line_rdr_key_gen key_gen, Io_sort_cmd make_cmd) {
Io_sort sort = new Io_sort().Memory_max_(memory_max);
Io_url_gen sort_url_gen = Io_url_gen_.dir_(sort_dir);
sort.Split(usr_dlg, dump_url_gen, sort_url_gen, row_comparer, key_gen);
sort.Merge(usr_dlg, sort_url_gen.Prv_urls(), row_comparer, key_gen, make_cmd);
}
public static void Ns(Gfo_usr_dlg usr_dlg, Xob_tmp_wtr[] ttl_wtrs, String type, Io_url tmp_root, Io_url make_root, int memory_max, Io_line_rdr_key_gen key_gen, Io_make_cmd make_cmd) {
int len = ttl_wtrs.length;
for (int i = 0; i < len; i++) {
Xob_tmp_wtr ttl_wtr = ttl_wtrs[i]; if (ttl_wtr == null) continue;
Xow_ns ns = ttl_wtr.Ns_itm();
Io_url make_dir = make_root.GenSubDir_nest(ns.Num_str(), type);
make_cmd.Make_dir_(make_dir);
Basic(usr_dlg
, ttl_wtr.Url_gen()
, tmp_root.GenSubDir_nest(ns.Num_str(), "sort")
, memory_max, key_gen, make_cmd);
}
}
}

View File

@@ -0,0 +1,53 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.xowa; import gplx.*;
import gplx.ios.*; import gplx.lists.*;
import gplx.xowa.tdbs.*;
class Io_sort_filCmd_reg implements Io_sort_filCmd { // 123|bgn|end|1
public Io_sort_filCmd_reg() {}
public void Bfr_add(Io_line_rdr stream) {
++itm_count;
int key_bgn = stream.Key_pos_bgn(), key_end = stream.Key_pos_end();
Bry_.Copy_by_pos(stream.Bfr(), key_bgn, key_end, prv_key, 0); prv_key_len = key_end - key_bgn;
} byte[] prv_key = new byte[1024]; int prv_key_len = 0;
public void Fil_bgn(Io_line_rdr stream) {
bfr.Add_int_variable(fil_idx++).Add_byte(Byte_ascii.Pipe);
bfr.Add_mid(stream.Bfr(), stream.Key_pos_bgn(), stream.Key_pos_end()).Add_byte(Byte_ascii.Pipe);
}
public void Fil_end() {
bfr.Add_mid(prv_key, 0, prv_key_len).Add_byte(Byte_ascii.Pipe)
.Add_int_variable(itm_count).Add_byte(Byte_ascii.Nl);
itm_count = 0;
}
public void Flush(Io_url fil) {
Io_mgr.I.SaveFilBry(fil, bfr.Bfr(), bfr.Len());
} private Bry_bfr bfr = Bry_bfr.new_(); int fil_idx = 0; int itm_count = 0;
}
class Io_url_gen_nest implements gplx.ios.Io_url_gen {
public Io_url Cur_url() {return cur_url;} Io_url cur_url;
public Io_url Nxt_url() {cur_url = Xotdb_fsys_mgr.Url_fil(root_dir, fil_idx++, ext); return cur_url;}
public Io_url[] Prv_urls() {
Io_url[] rv = new Io_url[fil_idx];
for (int i = 0; i < fil_idx; i++) {
rv[i] = Xotdb_fsys_mgr.Url_fil(root_dir, fil_idx++, ext);
}
return rv;
}
public void Del_all() {if (Io_mgr.I.ExistsDir(root_dir)) Io_mgr.I.DeleteDirDeep(root_dir);}
public Io_url_gen_nest(Io_url root_dir, String ext) {this.root_dir = root_dir; this.ext = Bry_.new_u8(ext);} Io_url root_dir; byte[] ext; int fil_idx;
}