mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
update Command-Line/dumps[D[D[D[D
This commit is contained in:
@@ -68,7 +68,7 @@
|
||||
<a href="#Requirements"><span class="tocnumber">2</span> <span class="toctext">Requirements</span></a>
|
||||
<ul>
|
||||
<li class="toclevel-2 tocsection-3">
|
||||
<a href="#commons.wikimedia.org_.28thum"><span class="tocnumber">2.1</span> <span class="toctext">commons.wikimedia.org (thum</span></a>
|
||||
<a href="#commons.wikimedia.org"><span class="tocnumber">2.1</span> <span class="toctext">commons.wikimedia.org</span></a>
|
||||
</li>
|
||||
<li class="toclevel-2 tocsection-4">
|
||||
<a href="#www.wikidata.org"><span class="tocnumber">2.2</span> <span class="toctext">www.wikidata.org</span></a>
|
||||
@@ -158,7 +158,7 @@
|
||||
<span class="mw-headline" id="Requirements">Requirements</span>
|
||||
</h2>
|
||||
<h3>
|
||||
<span class="mw-headline" id="commons.wikimedia.org_.28thum">commons.wikimedia.org (thum</span>
|
||||
<span class="mw-headline" id="commons.wikimedia.org">commons.wikimedia.org</span>
|
||||
</h3>
|
||||
<p>
|
||||
You will need the latest version of commons.wikimedia.org. Note that if you have an older version, you will have missing images or wrong size information.
|
||||
@@ -304,6 +304,10 @@
|
||||
<pre class='code'>
|
||||
app.bldr.pause_at_end_('n');
|
||||
app.scripts.run_file_by_type('xowa_cfg_app');
|
||||
app.cfg.set_temp('app', 'xowa.app.web.enabled', 'y');
|
||||
app.cfg.set_temp('app', 'xowa.bldr.db.layout_size.text', '0');
|
||||
app.cfg.set_temp('app', 'xowa.bldr.db.layout_size.html', '0');
|
||||
app.cfg.set_temp('app', 'xowa.bldr.db.layout_size.file', '0');
|
||||
app.bldr.cmds {
|
||||
// build commons database; this only needs to be done once, whenever commons is updated
|
||||
add ('commons.wikimedia.org' , 'util.cleanup') {delete_all = 'y';}
|
||||
@@ -391,9 +395,11 @@ app.bldr.cmds {
|
||||
// cleanup all downloaded files as well as temporary files
|
||||
add ('simple.wikipedia.org' , 'util.cleanup') {delete_tmp = 'y'; delete_by_match('*.xml|*.sql|*.bz2|*.gz');}
|
||||
|
||||
// OBSOLETE: use v2
|
||||
// v1 html generator
|
||||
// parse every page in the listed namespace and gather data on their lnkis.
|
||||
// this step will take the longest amount of time.
|
||||
/*
|
||||
add ('simple.wikipedia.org' , 'file.lnki_temp') {
|
||||
// save data every # of pages
|
||||
commit_interval = 10000;
|
||||
@@ -426,13 +432,14 @@ app.bldr.cmds {
|
||||
hzip_diff = 'y';
|
||||
}
|
||||
}
|
||||
|
||||
*/
|
||||
// v2 html generator; allows for multi-threaded / multi-machine builds
|
||||
/*
|
||||
add ('simple.wikipedia.org' , 'wiki.mass_parse.init') {cfg {ns_ids = '0|4|14';}}
|
||||
add ('simple.wikipedia.org' , 'wiki.mass_parse.init') {cfg {ns_ids = '0|4|14|8';}}
|
||||
|
||||
add ('simple.wikipedia.org' , 'wiki.mass_parse.exec') {
|
||||
cfg {
|
||||
num_wkrs = 8; load_all_templates = 'y'; cleanup_interval = 50; hzip_enabled = 'y'; hdiff_enabled ='y'; manual_now = '2016-08-01 01:02:03';
|
||||
load_all_imglinks = 'y';
|
||||
|
||||
// uncomment the following 3 lines if using the build script as a "worker" helping a "server"
|
||||
// num_pages_in_pool = 32000;
|
||||
@@ -443,8 +450,7 @@ app.bldr.cmds {
|
||||
|
||||
// note that if multi-machine mode is enabled, all worker directories must be manually copied to the server directory (a build command will be added later)
|
||||
add ('simple.wikipedia.org' , 'wiki.mass_parse.make');
|
||||
*/
|
||||
|
||||
|
||||
// aggregate the lnkis
|
||||
add ('simple.wikipedia.org' , 'file.lnki_regy');
|
||||
|
||||
@@ -492,7 +498,10 @@ app.bldr.run;
|
||||
<pre class='code'>
|
||||
app.bldr.pause_at_end_('n');
|
||||
app.scripts.run_file_by_type('xowa_cfg_app');
|
||||
app.cfgs.get('app.user.cfg.security.web_access_enabled', 'app').val = 'y';
|
||||
app.cfg.set_temp('app', 'xowa.app.web.enabled', 'y');
|
||||
app.cfg.set_temp('app', 'xowa.bldr.db.layout_size.text', '0');
|
||||
app.cfg.set_temp('app', 'xowa.bldr.db.layout_size.html', '0');
|
||||
app.cfg.set_temp('app', 'xowa.bldr.db.layout_size.file', '0');
|
||||
app.bldr.cmds {
|
||||
/*
|
||||
add ('www.wikidata.org' , 'util.cleanup') {delete_all = 'y';}
|
||||
@@ -502,56 +511,47 @@ app.bldr.cmds {
|
||||
add ('www.wikidata.org' , 'util.download') {dump_type = 'image';}
|
||||
add ('www.wikidata.org' , 'text.init');
|
||||
add ('www.wikidata.org' , 'text.page');
|
||||
add ('www.wikidata.org' , 'text.cat.core');
|
||||
add ('www.wikidata.org' , 'text.cat.link');
|
||||
add ('www.wikidata.org' , 'text.cat.hidden');
|
||||
add ('www.wikidata.org' , 'text.term');
|
||||
add ('www.wikidata.org' , 'text.css');
|
||||
add ('www.wikidata.org' , 'wiki.image');
|
||||
add ('www.wikidata.org' , 'file.page_regy') {build_commons = 'y'}
|
||||
add ('www.wikidata.org' , 'wiki.page_dump.make');
|
||||
add ('www.wikidata.org' , 'wiki.page_props');
|
||||
add ('www.wikidata.org' , 'wiki.categorylinks');
|
||||
add ('www.wikidata.org' , 'wiki.redirect') {commit_interval = 1000; progress_interval = 100; cleanup_interval = 100;}
|
||||
add ('www.wikidata.org' , 'util.cleanup') {delete_tmp = 'y'; delete_by_match('*.xml|*.sql|*.bz2|*.gz');}
|
||||
// add ('www.wikidata.org' , 'util.cleanup') {delete_tmp = 'y'; delete_by_match('*.xml|*.sql|*.bz2|*.gz');}
|
||||
add ('commons.wikimedia.org' , 'util.cleanup') {delete_all = 'y';}
|
||||
add ('commons.wikimedia.org' , 'util.download') {dump_type = 'pages-articles';}
|
||||
add ('commons.wikimedia.org' , 'util.download') {dump_type = 'image';}
|
||||
add ('commons.wikimedia.org' , 'util.download') {dump_type = 'categorylinks';}
|
||||
add ('commons.wikimedia.org' , 'util.download') {dump_type = 'page_props';}
|
||||
add ('commons.wikimedia.org' , 'util.download') {dump_type = 'image';}
|
||||
add ('commons.wikimedia.org' , 'text.init');
|
||||
add ('commons.wikimedia.org' , 'text.page');
|
||||
add ('commons.wikimedia.org' , 'text.cat.core');
|
||||
add ('commons.wikimedia.org' , 'text.cat.link');
|
||||
add ('commons.wikimedia.org' , 'text.cat.hidden');
|
||||
add ('commons.wikimedia.org' , 'text.term');
|
||||
add ('commons.wikimedia.org' , 'text.css');
|
||||
add ('commons.wikimedia.org' , 'wiki.image');
|
||||
add ('commons.wikimedia.org' , 'file.page_regy') {build_commons = 'y'}
|
||||
add ('commons.wikimedia.org' , 'wiki.page_dump.make');
|
||||
add ('commons.wikimedia.org' , 'wiki.redirect') {commit_interval = 1000; progress_interval = 100; cleanup_interval = 100;}
|
||||
*/
|
||||
/*
|
||||
// en.wikipedia.org
|
||||
// add ('commons.wikimedia.org' , 'util.cleanup') {delete_tmp = 'y'; delete_by_match('*.xml|*.sql|*.bz2|*.gz');}
|
||||
add ('en.wikipedia.org' , 'util.download') {dump_type = 'pages-articles';}
|
||||
add ('en.wikipedia.org' , 'util.download') {dump_type = 'pagelinks';}
|
||||
add ('en.wikipedia.org' , 'util.download') {dump_type = 'categorylinks';}
|
||||
add ('en.wikipedia.org' , 'util.download') {dump_type = 'page_props';}
|
||||
add ('en.wikipedia.org' , 'util.download') {dump_type = 'image';}
|
||||
add ('en.wikipedia.org' , 'util.download') {dump_type = 'pagelinks';}
|
||||
add ('en.wikipedia.org' , 'util.download') {dump_type = 'imagelinks';}
|
||||
*/
|
||||
/*
|
||||
// en.wikipedia.org
|
||||
add ('en.wikipedia.org' , 'text.init');
|
||||
add ('en.wikipedia.org' , 'text.page') {redirect_id_enabled = 'y';}
|
||||
add ('en.wikipedia.org' , 'text.search');
|
||||
add ('en.wikipedia.org' , 'text.css');
|
||||
add ('en.wikipedia.org' , 'text.cat.core');
|
||||
add ('en.wikipedia.org' , 'text.cat.link');
|
||||
add ('en.wikipedia.org' , 'text.cat.hidden');
|
||||
add ('en.wikipedia.org' , 'text.term');
|
||||
// add ('en.wikipedia.org' , 'wiki.redirect') {commit_interval = 1000; progress_interval = 100; cleanup_interval = 100;}
|
||||
add ('en.wikipedia.org' , 'wiki.image');
|
||||
add ('en.wikipedia.org' , 'wiki.page_dump.make');
|
||||
add ('en.wikipedia.org' , 'wiki.page_link');
|
||||
add ('en.wikipedia.org' , 'wiki.imagelinks');
|
||||
add ('en.wikipedia.org' , 'wiki.page_dump.make');
|
||||
add ('en.wikipedia.org' , 'wiki.redirect') {commit_interval = 1000; progress_interval = 100; cleanup_interval = 100;}
|
||||
add ('en.wikipedia.org' , 'wiki.page_link');
|
||||
add ('en.wikipedia.org' , 'search.page__page_score') {iteration_max = 100;}
|
||||
add ('en.wikipedia.org' , 'search.link__link_score') {page_rank_enabled = 'y';
|
||||
score_adjustment_mgr {
|
||||
@@ -566,32 +566,43 @@ app.bldr.cmds {
|
||||
}
|
||||
}
|
||||
add ('en.wikipedia.org' , 'search.word__link_count')
|
||||
/*
|
||||
// SELECT * FROM xowa_cfg WHERE cfg_key = 'props.modified_latest';
|
||||
add ('en.wikipedia.org' , 'file.lnki_temp') {
|
||||
commit_interval = 10000; progress_interval = 50; cleanup_interval = 50; select_size = 25;
|
||||
ns_ids = '0|4|14|100';
|
||||
hdump_bldr {enabled = 'y'; hzip_enabled = 'y'; hzip_diff = 'y';}
|
||||
}
|
||||
add ('en.wikipedia.org' , 'file.lnki_regy');
|
||||
add ('commons.wikimedia.org' , 'file.page_regy') {build_commons = 'y'}
|
||||
add ('en.wikipedia.org' , 'file.page_regy') {build_commons = 'n';}
|
||||
add ('en.wikipedia.org' , 'wiki.image');
|
||||
add ('en.wikipedia.org' , 'file.orig_regy');
|
||||
// SELECT * FROM orig_regy WHERE lnki_ttl = 'BSicon_CONTr.svg';
|
||||
// SELECT * FROM page_regy WHERE src_ttl = 'BSicon_CONTr.svg';
|
||||
add ('en.wikipedia.org' , 'file.xfer_temp.thumb');
|
||||
// SELECT Count(*) FROM xfer_regy WHERE xfer_status = 0;
|
||||
// SELECT * FROM xfer_regy WHERE xfer_status = 0 AND lnki_page_id = 372692; --en.w:Featured_picture_candidates
|
||||
add ('en.wikipedia.org' , 'file.xfer_regy');
|
||||
add ('en.wikipedia.org' , 'wiki.page_props');
|
||||
add ('en.wikipedia.org' , 'wiki.categorylinks');
|
||||
*/
|
||||
/*
|
||||
add ('en.wikipedia.org' , 'file.page_regy') {build_commons = 'n'}
|
||||
add ('en.wikipedia.org' , 'wiki.mass_parse.init') {cfg {ns_ids = '0|4|100|14|8';}}
|
||||
// add ('en.wikipedia.org' , 'wiki.mass_parse.resume');
|
||||
add ('en.wikipedia.org' , 'wiki.mass_parse.exec') {cfg {
|
||||
num_wkrs = 8; load_all_templates = 'y'; load_ifexists_ns = '*'; cleanup_interval = 25; hzip_enabled = 'y'; hdiff_enabled ='y'; manual_now = '2017-01-01 01:02:03';}
|
||||
// num_wkrs = 1; load_all_templates = 'n'; load_all_imglnks = 'n'; cleanup_interval = 50; hzip_enabled = 'y'; hdiff_enabled ='y'; manual_now = '2016-07-28 01:02:03';}
|
||||
}
|
||||
add ('en.wikipedia.org' , 'wiki.mass_parse.make');
|
||||
*/
|
||||
/*
|
||||
add ('en.wikipedia.org' , 'file.lnki_temp') {
|
||||
commit_interval = 10000; progress_interval = 50; cleanup_interval = 50; select_size = 25;
|
||||
ns_ids = '0|4|14|100|12|8|6|10|828|108|118|446|710|2300|2302|2600';
|
||||
hdump_bldr {enabled = 'y'; hzip_enabled = 'y'; hzip_diff = 'y';}
|
||||
}
|
||||
*/
|
||||
/*
|
||||
add ('commons.wikimedia.org' , 'file.page_regy') {build_commons = 'y'}
|
||||
add ('en.wikipedia.org' , 'file.page_regy') {build_commons = 'n';}
|
||||
add ('en.wikipedia.org' , 'file.lnki_regy');
|
||||
// add ('en.wikipedia.org' , 'wiki.image');
|
||||
add ('en.wikipedia.org' , 'file.orig_regy');
|
||||
add ('en.wikipedia.org' , 'file.xfer_temp.thumb');
|
||||
|
||||
add ('en.wikipedia.org' , 'file.xfer_regy');
|
||||
add ('en.wikipedia.org' , 'file.xfer_regy_update');
|
||||
*/
|
||||
/*
|
||||
add ('en.wikipedia.org' , 'file.fsdb_make') {
|
||||
commit_interval = 1000; progress_interval = 200; select_interval = 10000;
|
||||
ns_ids = '0|4|14|100';
|
||||
ns_ids = '0|4|100|14|8';
|
||||
// // specify whether original wiki databases are v1 (.sqlite3) or v2 (.xowa)
|
||||
// // src_bin_mgr__fsdb_version = 'v2';
|
||||
// src_bin_mgr__fsdb_version = 'v2';
|
||||
|
||||
// trg_bin_mgr__fsdb_version = 'v1';
|
||||
|
||||
@@ -603,6 +614,7 @@ app.bldr.cmds {
|
||||
}
|
||||
add ('en.wikipedia.org' , 'file.orig_reg');
|
||||
add ('en.wikipedia.org' , 'wiki.page_dump.drop');
|
||||
add ('en.wikipedia.org' , 'file.page_file_map.create');
|
||||
*/
|
||||
}
|
||||
app.bldr.run;
|
||||
@@ -614,6 +626,9 @@ app.bldr.run;
|
||||
<li>
|
||||
2016-10-12: explicitly set web_access_enabled to y
|
||||
</li>
|
||||
<li>
|
||||
2017-02-02: updated script for multi-threaded version and new options
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
|
||||
@@ -68,7 +68,7 @@
|
||||
<a href="#Requirements"><span class="tocnumber">2</span> <span class="toctext">Requirements</span></a>
|
||||
<ul>
|
||||
<li class="toclevel-2 tocsection-3">
|
||||
<a href="#commons.wikimedia.org_.28thum"><span class="tocnumber">2.1</span> <span class="toctext">commons.wikimedia.org (thum</span></a>
|
||||
<a href="#commons.wikimedia.org"><span class="tocnumber">2.1</span> <span class="toctext">commons.wikimedia.org</span></a>
|
||||
</li>
|
||||
<li class="toclevel-2 tocsection-4">
|
||||
<a href="#www.wikidata.org"><span class="tocnumber">2.2</span> <span class="toctext">www.wikidata.org</span></a>
|
||||
@@ -158,7 +158,7 @@
|
||||
<span class="mw-headline" id="Requirements">Requirements</span>
|
||||
</h2>
|
||||
<h3>
|
||||
<span class="mw-headline" id="commons.wikimedia.org_.28thum">commons.wikimedia.org (thum</span>
|
||||
<span class="mw-headline" id="commons.wikimedia.org">commons.wikimedia.org</span>
|
||||
</h3>
|
||||
<p>
|
||||
You will need the latest version of commons.wikimedia.org. Note that if you have an older version, you will have missing images or wrong size information.
|
||||
@@ -304,6 +304,10 @@
|
||||
<pre class='code'>
|
||||
app.bldr.pause_at_end_('n');
|
||||
app.scripts.run_file_by_type('xowa_cfg_app');
|
||||
app.cfg.set_temp('app', 'xowa.app.web.enabled', 'y');
|
||||
app.cfg.set_temp('app', 'xowa.bldr.db.layout_size.text', '0');
|
||||
app.cfg.set_temp('app', 'xowa.bldr.db.layout_size.html', '0');
|
||||
app.cfg.set_temp('app', 'xowa.bldr.db.layout_size.file', '0');
|
||||
app.bldr.cmds {
|
||||
// build commons database; this only needs to be done once, whenever commons is updated
|
||||
add ('commons.wikimedia.org' , 'util.cleanup') {delete_all = 'y';}
|
||||
@@ -391,9 +395,11 @@ app.bldr.cmds {
|
||||
// cleanup all downloaded files as well as temporary files
|
||||
add ('simple.wikipedia.org' , 'util.cleanup') {delete_tmp = 'y'; delete_by_match('*.xml|*.sql|*.bz2|*.gz');}
|
||||
|
||||
// OBSOLETE: use v2
|
||||
// v1 html generator
|
||||
// parse every page in the listed namespace and gather data on their lnkis.
|
||||
// this step will take the longest amount of time.
|
||||
/*
|
||||
add ('simple.wikipedia.org' , 'file.lnki_temp') {
|
||||
// save data every # of pages
|
||||
commit_interval = 10000;
|
||||
@@ -426,13 +432,14 @@ app.bldr.cmds {
|
||||
hzip_diff = 'y';
|
||||
}
|
||||
}
|
||||
|
||||
*/
|
||||
// v2 html generator; allows for multi-threaded / multi-machine builds
|
||||
/*
|
||||
add ('simple.wikipedia.org' , 'wiki.mass_parse.init') {cfg {ns_ids = '0|4|14';}}
|
||||
add ('simple.wikipedia.org' , 'wiki.mass_parse.init') {cfg {ns_ids = '0|4|14|8';}}
|
||||
|
||||
add ('simple.wikipedia.org' , 'wiki.mass_parse.exec') {
|
||||
cfg {
|
||||
num_wkrs = 8; load_all_templates = 'y'; cleanup_interval = 50; hzip_enabled = 'y'; hdiff_enabled ='y'; manual_now = '2016-08-01 01:02:03';
|
||||
load_all_imglinks = 'y';
|
||||
|
||||
// uncomment the following 3 lines if using the build script as a "worker" helping a "server"
|
||||
// num_pages_in_pool = 32000;
|
||||
@@ -443,8 +450,7 @@ app.bldr.cmds {
|
||||
|
||||
// note that if multi-machine mode is enabled, all worker directories must be manually copied to the server directory (a build command will be added later)
|
||||
add ('simple.wikipedia.org' , 'wiki.mass_parse.make');
|
||||
*/
|
||||
|
||||
|
||||
// aggregate the lnkis
|
||||
add ('simple.wikipedia.org' , 'file.lnki_regy');
|
||||
|
||||
@@ -492,7 +498,10 @@ app.bldr.run;
|
||||
<pre class='code'>
|
||||
app.bldr.pause_at_end_('n');
|
||||
app.scripts.run_file_by_type('xowa_cfg_app');
|
||||
app.cfgs.get('app.user.cfg.security.web_access_enabled', 'app').val = 'y';
|
||||
app.cfg.set_temp('app', 'xowa.app.web.enabled', 'y');
|
||||
app.cfg.set_temp('app', 'xowa.bldr.db.layout_size.text', '0');
|
||||
app.cfg.set_temp('app', 'xowa.bldr.db.layout_size.html', '0');
|
||||
app.cfg.set_temp('app', 'xowa.bldr.db.layout_size.file', '0');
|
||||
app.bldr.cmds {
|
||||
/*
|
||||
add ('www.wikidata.org' , 'util.cleanup') {delete_all = 'y';}
|
||||
@@ -502,56 +511,47 @@ app.bldr.cmds {
|
||||
add ('www.wikidata.org' , 'util.download') {dump_type = 'image';}
|
||||
add ('www.wikidata.org' , 'text.init');
|
||||
add ('www.wikidata.org' , 'text.page');
|
||||
add ('www.wikidata.org' , 'text.cat.core');
|
||||
add ('www.wikidata.org' , 'text.cat.link');
|
||||
add ('www.wikidata.org' , 'text.cat.hidden');
|
||||
add ('www.wikidata.org' , 'text.term');
|
||||
add ('www.wikidata.org' , 'text.css');
|
||||
add ('www.wikidata.org' , 'wiki.image');
|
||||
add ('www.wikidata.org' , 'file.page_regy') {build_commons = 'y'}
|
||||
add ('www.wikidata.org' , 'wiki.page_dump.make');
|
||||
add ('www.wikidata.org' , 'wiki.page_props');
|
||||
add ('www.wikidata.org' , 'wiki.categorylinks');
|
||||
add ('www.wikidata.org' , 'wiki.redirect') {commit_interval = 1000; progress_interval = 100; cleanup_interval = 100;}
|
||||
add ('www.wikidata.org' , 'util.cleanup') {delete_tmp = 'y'; delete_by_match('*.xml|*.sql|*.bz2|*.gz');}
|
||||
// add ('www.wikidata.org' , 'util.cleanup') {delete_tmp = 'y'; delete_by_match('*.xml|*.sql|*.bz2|*.gz');}
|
||||
add ('commons.wikimedia.org' , 'util.cleanup') {delete_all = 'y';}
|
||||
add ('commons.wikimedia.org' , 'util.download') {dump_type = 'pages-articles';}
|
||||
add ('commons.wikimedia.org' , 'util.download') {dump_type = 'image';}
|
||||
add ('commons.wikimedia.org' , 'util.download') {dump_type = 'categorylinks';}
|
||||
add ('commons.wikimedia.org' , 'util.download') {dump_type = 'page_props';}
|
||||
add ('commons.wikimedia.org' , 'util.download') {dump_type = 'image';}
|
||||
add ('commons.wikimedia.org' , 'text.init');
|
||||
add ('commons.wikimedia.org' , 'text.page');
|
||||
add ('commons.wikimedia.org' , 'text.cat.core');
|
||||
add ('commons.wikimedia.org' , 'text.cat.link');
|
||||
add ('commons.wikimedia.org' , 'text.cat.hidden');
|
||||
add ('commons.wikimedia.org' , 'text.term');
|
||||
add ('commons.wikimedia.org' , 'text.css');
|
||||
add ('commons.wikimedia.org' , 'wiki.image');
|
||||
add ('commons.wikimedia.org' , 'file.page_regy') {build_commons = 'y'}
|
||||
add ('commons.wikimedia.org' , 'wiki.page_dump.make');
|
||||
add ('commons.wikimedia.org' , 'wiki.redirect') {commit_interval = 1000; progress_interval = 100; cleanup_interval = 100;}
|
||||
*/
|
||||
/*
|
||||
// en.wikipedia.org
|
||||
// add ('commons.wikimedia.org' , 'util.cleanup') {delete_tmp = 'y'; delete_by_match('*.xml|*.sql|*.bz2|*.gz');}
|
||||
add ('en.wikipedia.org' , 'util.download') {dump_type = 'pages-articles';}
|
||||
add ('en.wikipedia.org' , 'util.download') {dump_type = 'pagelinks';}
|
||||
add ('en.wikipedia.org' , 'util.download') {dump_type = 'categorylinks';}
|
||||
add ('en.wikipedia.org' , 'util.download') {dump_type = 'page_props';}
|
||||
add ('en.wikipedia.org' , 'util.download') {dump_type = 'image';}
|
||||
add ('en.wikipedia.org' , 'util.download') {dump_type = 'pagelinks';}
|
||||
add ('en.wikipedia.org' , 'util.download') {dump_type = 'imagelinks';}
|
||||
*/
|
||||
/*
|
||||
// en.wikipedia.org
|
||||
add ('en.wikipedia.org' , 'text.init');
|
||||
add ('en.wikipedia.org' , 'text.page') {redirect_id_enabled = 'y';}
|
||||
add ('en.wikipedia.org' , 'text.search');
|
||||
add ('en.wikipedia.org' , 'text.css');
|
||||
add ('en.wikipedia.org' , 'text.cat.core');
|
||||
add ('en.wikipedia.org' , 'text.cat.link');
|
||||
add ('en.wikipedia.org' , 'text.cat.hidden');
|
||||
add ('en.wikipedia.org' , 'text.term');
|
||||
// add ('en.wikipedia.org' , 'wiki.redirect') {commit_interval = 1000; progress_interval = 100; cleanup_interval = 100;}
|
||||
add ('en.wikipedia.org' , 'wiki.image');
|
||||
add ('en.wikipedia.org' , 'wiki.page_dump.make');
|
||||
add ('en.wikipedia.org' , 'wiki.page_link');
|
||||
add ('en.wikipedia.org' , 'wiki.imagelinks');
|
||||
add ('en.wikipedia.org' , 'wiki.page_dump.make');
|
||||
add ('en.wikipedia.org' , 'wiki.redirect') {commit_interval = 1000; progress_interval = 100; cleanup_interval = 100;}
|
||||
add ('en.wikipedia.org' , 'wiki.page_link');
|
||||
add ('en.wikipedia.org' , 'search.page__page_score') {iteration_max = 100;}
|
||||
add ('en.wikipedia.org' , 'search.link__link_score') {page_rank_enabled = 'y';
|
||||
score_adjustment_mgr {
|
||||
@@ -566,32 +566,43 @@ app.bldr.cmds {
|
||||
}
|
||||
}
|
||||
add ('en.wikipedia.org' , 'search.word__link_count')
|
||||
/*
|
||||
// SELECT * FROM xowa_cfg WHERE cfg_key = 'props.modified_latest';
|
||||
add ('en.wikipedia.org' , 'file.lnki_temp') {
|
||||
commit_interval = 10000; progress_interval = 50; cleanup_interval = 50; select_size = 25;
|
||||
ns_ids = '0|4|14|100';
|
||||
hdump_bldr {enabled = 'y'; hzip_enabled = 'y'; hzip_diff = 'y';}
|
||||
}
|
||||
add ('en.wikipedia.org' , 'file.lnki_regy');
|
||||
add ('commons.wikimedia.org' , 'file.page_regy') {build_commons = 'y'}
|
||||
add ('en.wikipedia.org' , 'file.page_regy') {build_commons = 'n';}
|
||||
add ('en.wikipedia.org' , 'wiki.image');
|
||||
add ('en.wikipedia.org' , 'file.orig_regy');
|
||||
// SELECT * FROM orig_regy WHERE lnki_ttl = 'BSicon_CONTr.svg';
|
||||
// SELECT * FROM page_regy WHERE src_ttl = 'BSicon_CONTr.svg';
|
||||
add ('en.wikipedia.org' , 'file.xfer_temp.thumb');
|
||||
// SELECT Count(*) FROM xfer_regy WHERE xfer_status = 0;
|
||||
// SELECT * FROM xfer_regy WHERE xfer_status = 0 AND lnki_page_id = 372692; --en.w:Featured_picture_candidates
|
||||
add ('en.wikipedia.org' , 'file.xfer_regy');
|
||||
add ('en.wikipedia.org' , 'wiki.page_props');
|
||||
add ('en.wikipedia.org' , 'wiki.categorylinks');
|
||||
*/
|
||||
/*
|
||||
add ('en.wikipedia.org' , 'file.page_regy') {build_commons = 'n'}
|
||||
add ('en.wikipedia.org' , 'wiki.mass_parse.init') {cfg {ns_ids = '0|4|100|14|8';}}
|
||||
// add ('en.wikipedia.org' , 'wiki.mass_parse.resume');
|
||||
add ('en.wikipedia.org' , 'wiki.mass_parse.exec') {cfg {
|
||||
num_wkrs = 8; load_all_templates = 'y'; load_ifexists_ns = '*'; cleanup_interval = 25; hzip_enabled = 'y'; hdiff_enabled ='y'; manual_now = '2017-01-01 01:02:03';}
|
||||
// num_wkrs = 1; load_all_templates = 'n'; load_all_imglnks = 'n'; cleanup_interval = 50; hzip_enabled = 'y'; hdiff_enabled ='y'; manual_now = '2016-07-28 01:02:03';}
|
||||
}
|
||||
add ('en.wikipedia.org' , 'wiki.mass_parse.make');
|
||||
*/
|
||||
/*
|
||||
add ('en.wikipedia.org' , 'file.lnki_temp') {
|
||||
commit_interval = 10000; progress_interval = 50; cleanup_interval = 50; select_size = 25;
|
||||
ns_ids = '0|4|14|100|12|8|6|10|828|108|118|446|710|2300|2302|2600';
|
||||
hdump_bldr {enabled = 'y'; hzip_enabled = 'y'; hzip_diff = 'y';}
|
||||
}
|
||||
*/
|
||||
/*
|
||||
add ('commons.wikimedia.org' , 'file.page_regy') {build_commons = 'y'}
|
||||
add ('en.wikipedia.org' , 'file.page_regy') {build_commons = 'n';}
|
||||
add ('en.wikipedia.org' , 'file.lnki_regy');
|
||||
// add ('en.wikipedia.org' , 'wiki.image');
|
||||
add ('en.wikipedia.org' , 'file.orig_regy');
|
||||
add ('en.wikipedia.org' , 'file.xfer_temp.thumb');
|
||||
|
||||
add ('en.wikipedia.org' , 'file.xfer_regy');
|
||||
add ('en.wikipedia.org' , 'file.xfer_regy_update');
|
||||
*/
|
||||
/*
|
||||
add ('en.wikipedia.org' , 'file.fsdb_make') {
|
||||
commit_interval = 1000; progress_interval = 200; select_interval = 10000;
|
||||
ns_ids = '0|4|14|100';
|
||||
ns_ids = '0|4|100|14|8';
|
||||
// // specify whether original wiki databases are v1 (.sqlite3) or v2 (.xowa)
|
||||
// // src_bin_mgr__fsdb_version = 'v2';
|
||||
// src_bin_mgr__fsdb_version = 'v2';
|
||||
|
||||
// trg_bin_mgr__fsdb_version = 'v1';
|
||||
|
||||
@@ -603,6 +614,7 @@ app.bldr.cmds {
|
||||
}
|
||||
add ('en.wikipedia.org' , 'file.orig_reg');
|
||||
add ('en.wikipedia.org' , 'wiki.page_dump.drop');
|
||||
add ('en.wikipedia.org' , 'file.page_file_map.create');
|
||||
*/
|
||||
}
|
||||
app.bldr.run;
|
||||
@@ -614,6 +626,9 @@ app.bldr.run;
|
||||
<li>
|
||||
2016-10-12: explicitly set web_access_enabled to y
|
||||
</li>
|
||||
<li>
|
||||
2017-02-02: updated script for multi-threaded version and new options
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
|
||||
Reference in New Issue
Block a user