mirror of
https://github.com/gnosygnu/xowa.git
synced 2026-03-02 03:49:30 +00:00
buildxowa_make_resume
This commit is contained in:
@@ -86,59 +86,62 @@
|
||||
<li class="toclevel-2 tocsection-6">
|
||||
<a href="#make_wiki"><span class="tocnumber">3.3</span> <span class="toctext">make_wiki</span></a>
|
||||
</li>
|
||||
<li class="toclevel-2 tocsection-7">
|
||||
<a href="#Resuming"><span class="tocnumber">3.4</span> <span class="toctext">Resuming</span></a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toclevel-1 tocsection-7">
|
||||
<li class="toclevel-1 tocsection-8">
|
||||
<a href="#Appendix"><span class="tocnumber">4</span> <span class="toctext">Appendix</span></a>
|
||||
<ul>
|
||||
<li class="toclevel-2 tocsection-8">
|
||||
<li class="toclevel-2 tocsection-9">
|
||||
<a href="#Requirements"><span class="tocnumber">4.1</span> <span class="toctext">Requirements</span></a>
|
||||
<ul>
|
||||
<li class="toclevel-3 tocsection-9">
|
||||
<li class="toclevel-3 tocsection-10">
|
||||
<a href="#Hardware"><span class="tocnumber">4.1.1</span> <span class="toctext">Hardware</span></a>
|
||||
</li>
|
||||
<li class="toclevel-3 tocsection-10">
|
||||
<li class="toclevel-3 tocsection-11">
|
||||
<a href="#Internet-connectivity"><span class="tocnumber">4.1.2</span> <span class="toctext">Internet-connectivity</span></a>
|
||||
</li>
|
||||
<li class="toclevel-3 tocsection-11">
|
||||
<li class="toclevel-3 tocsection-12">
|
||||
<a href="#Pre-existing_image_databases_for_your_wiki_(optional)"><span class="tocnumber">4.1.3</span> <span class="toctext">Pre-existing image databases for your wiki (optional)</span></a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toclevel-2 tocsection-12">
|
||||
<li class="toclevel-2 tocsection-13">
|
||||
<a href="#gfs_script"><span class="tocnumber">4.2</span> <span class="toctext">gfs script</span></a>
|
||||
</li>
|
||||
<li class="toclevel-2 tocsection-13">
|
||||
<li class="toclevel-2 tocsection-14">
|
||||
<a href="#Terms"><span class="tocnumber">4.3</span> <span class="toctext">Terms</span></a>
|
||||
<ul>
|
||||
<li class="toclevel-3 tocsection-14">
|
||||
<li class="toclevel-3 tocsection-15">
|
||||
<a href="#lnki"><span class="tocnumber">4.3.1</span> <span class="toctext">lnki</span></a>
|
||||
</li>
|
||||
<li class="toclevel-3 tocsection-15">
|
||||
<li class="toclevel-3 tocsection-16">
|
||||
<a href="#orig"><span class="tocnumber">4.3.2</span> <span class="toctext">orig</span></a>
|
||||
</li>
|
||||
<li class="toclevel-3 tocsection-16">
|
||||
<li class="toclevel-3 tocsection-17">
|
||||
<a href="#xfer"><span class="tocnumber">4.3.3</span> <span class="toctext">xfer</span></a>
|
||||
</li>
|
||||
<li class="toclevel-3 tocsection-17">
|
||||
<li class="toclevel-3 tocsection-18">
|
||||
<a href="#fsdb"><span class="tocnumber">4.3.4</span> <span class="toctext">fsdb</span></a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toclevel-2 tocsection-18">
|
||||
<li class="toclevel-2 tocsection-19">
|
||||
<a href="#Examples"><span class="tocnumber">4.4</span> <span class="toctext">Examples</span></a>
|
||||
<ul>
|
||||
<li class="toclevel-3 tocsection-19">
|
||||
<li class="toclevel-3 tocsection-20">
|
||||
<a href="#Simple_Wikipedia_example_with_documentation"><span class="tocnumber">4.4.1</span> <span class="toctext">Simple Wikipedia example with documentation</span></a>
|
||||
</li>
|
||||
<li class="toclevel-3 tocsection-20">
|
||||
<li class="toclevel-3 tocsection-21">
|
||||
<a href="#Script:_gnosygnu's_actual_English_Wikipedia_script_(dirty;_provided_for_reference_only)"><span class="tocnumber">4.4.2</span> <span class="toctext">Script: gnosygnu's actual English Wikipedia script (dirty; provided for reference only)</span></a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toclevel-1 tocsection-21">
|
||||
<li class="toclevel-1 tocsection-22">
|
||||
<a href="#Change_log"><span class="tocnumber">5</span> <span class="toctext">Change log</span></a>
|
||||
</li>
|
||||
</ul>
|
||||
@@ -193,7 +196,7 @@
|
||||
Create a text file in your xowa root folder called <code>make_xowa.gfs</code> with a text-editor.
|
||||
<ul>
|
||||
<li>
|
||||
For Windows, Notepad++ is recommended
|
||||
For Windows, Notepad++ is recommended, or any other text editor that does not have Windows line-ending. (Do not use Notepad)
|
||||
</li>
|
||||
<li>
|
||||
For other systems, you can use a text-editor like Atom, jEdit, or whatever you're most comfortable with
|
||||
@@ -207,9 +210,11 @@
|
||||
Run the following command. Make sure to match the jar path and jar file
|
||||
</li>
|
||||
</ul>
|
||||
<p>
|
||||
<code>java -jar C:\xowa\xowa_windows_64.jar --app_mode cmd --cmd_file C:\xowa\make_xowa.gfs --show_license n --show_args n</code>
|
||||
</p>
|
||||
<dl>
|
||||
<dd>
|
||||
<code>java -jar C:\xowa\xowa_windows_64.jar --app_mode cmd --cmd_file C:\xowa\make_xowa.gfs --show_license n --show_args n</code>
|
||||
</dd>
|
||||
</dl>
|
||||
<ul>
|
||||
<li>
|
||||
Wait for the script to complete
|
||||
@@ -393,13 +398,36 @@ app.bldr.cmds {
|
||||
// v2 html generator; allows for multi-threaded / multi-machine builds
|
||||
add ('simple.wikipedia.org' , 'wiki.mass_parse.init') {cfg {ns_ids = '0|4|14|8';}}
|
||||
|
||||
// uncomment the next line to resume parsing. See === Resuming === below
|
||||
// add ('simple.wikipedia.org' , 'wiki.mass_parse.resume');
|
||||
|
||||
// NOTE: must change manual_now
|
||||
add ('simple.wikipedia.org' , 'wiki.mass_parse.exec') {
|
||||
cfg {
|
||||
num_wkrs = 8; load_all_templates = 'y'; cleanup_interval = 50; hzip_enabled = 'y'; hdiff_enabled ='y'; manual_now = '2020-02-01 01:02:03';
|
||||
// locks time to a specific value so all pages use the same time when calling Date.Now()
|
||||
manual_now = '2020-02-01 01:02:03';
|
||||
|
||||
// number of threads; set to 1 to skip multi-threaded behavior
|
||||
num_wkrs = 8;
|
||||
|
||||
// enables building full-text search indexes
|
||||
indexer_enabled = 'y';
|
||||
|
||||
// optimization; loads all templates in memory instead of loading each one from disk
|
||||
load_all_templates = 'y';
|
||||
|
||||
// optimization; loads all imglinks in memory instead of loading each one from disk
|
||||
// an imglink maps a given image (File:Abc.png) to a repo (commons vs local wiki) as well as a rename
|
||||
load_all_imglinks = 'y';
|
||||
|
||||
// uncomment the following 3 lines if using the build script as a "worker" helping a "server"
|
||||
|
||||
// number of pages after which XOWA empties cache
|
||||
cleanup_interval = 50;
|
||||
|
||||
// DEPRECATE: uncomment these 2 lines to use custom HTML zip compression
|
||||
// hzip_enabled = 'y';
|
||||
// hdiff_enabled ='y';
|
||||
|
||||
// uncomment these 3 lines if using the build script as a "worker" helping a "server"
|
||||
// num_pages_in_pool = 32000;
|
||||
// mgr_url = '\\server_machine_name\xowa\wiki\en.wikipedia.org\tmp\xomp\';
|
||||
// wkr_machine_name = 'worker_machine_1'
|
||||
@@ -463,6 +491,50 @@ app.bldr.run;
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<h3>
|
||||
<span class="mw-headline" id="Resuming">Resuming</span>
|
||||
</h3>
|
||||
<p>
|
||||
The <code>wiki.mass_parse.exec</code> may take many hours. For English Wikipedia, it can take up to 5 days, even with 8 threads
|
||||
</p>
|
||||
<p>
|
||||
During this time, the build can be canceled by any of the following:
|
||||
</p>
|
||||
<ul>
|
||||
<li>
|
||||
Manual: User presses Ctrl+C
|
||||
</li>
|
||||
<li>
|
||||
Unanticipated: Process dies or machine shuts down
|
||||
</li>
|
||||
</ul>
|
||||
<p>
|
||||
To resume the build, the following steps can be applied
|
||||
</p>
|
||||
<ul>
|
||||
<li>
|
||||
Comment out all commands before <code>wiki.mass_parse.exec</code> using a block comment
|
||||
<ul>
|
||||
<li>
|
||||
Place a <code>/*</code> before the line with 'util.cleanup'
|
||||
</li>
|
||||
<li>
|
||||
Place a <code>*/</code> after the line with 'wiki.mass_parse.init'
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>
|
||||
Uncomment the line for 'wiki.mass_parse.resume'
|
||||
</li>
|
||||
<li>
|
||||
Run the command-line again
|
||||
</li>
|
||||
</ul>
|
||||
<dl>
|
||||
<dd>
|
||||
<code>java -jar C:\xowa\xowa_windows_64.jar --app_mode cmd --cmd_file C:\xowa\make_xowa.gfs --show_license n --show_args n</code>
|
||||
</dd>
|
||||
</dl>
|
||||
<h2>
|
||||
<span class="mw-headline" id="Appendix">Appendix</span>
|
||||
</h2>
|
||||
|
||||
@@ -86,59 +86,62 @@
|
||||
<li class="toclevel-2 tocsection-6">
|
||||
<a href="#make_wiki"><span class="tocnumber">3.3</span> <span class="toctext">make_wiki</span></a>
|
||||
</li>
|
||||
<li class="toclevel-2 tocsection-7">
|
||||
<a href="#Resuming"><span class="tocnumber">3.4</span> <span class="toctext">Resuming</span></a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toclevel-1 tocsection-7">
|
||||
<li class="toclevel-1 tocsection-8">
|
||||
<a href="#Appendix"><span class="tocnumber">4</span> <span class="toctext">Appendix</span></a>
|
||||
<ul>
|
||||
<li class="toclevel-2 tocsection-8">
|
||||
<li class="toclevel-2 tocsection-9">
|
||||
<a href="#Requirements"><span class="tocnumber">4.1</span> <span class="toctext">Requirements</span></a>
|
||||
<ul>
|
||||
<li class="toclevel-3 tocsection-9">
|
||||
<li class="toclevel-3 tocsection-10">
|
||||
<a href="#Hardware"><span class="tocnumber">4.1.1</span> <span class="toctext">Hardware</span></a>
|
||||
</li>
|
||||
<li class="toclevel-3 tocsection-10">
|
||||
<li class="toclevel-3 tocsection-11">
|
||||
<a href="#Internet-connectivity"><span class="tocnumber">4.1.2</span> <span class="toctext">Internet-connectivity</span></a>
|
||||
</li>
|
||||
<li class="toclevel-3 tocsection-11">
|
||||
<li class="toclevel-3 tocsection-12">
|
||||
<a href="#Pre-existing_image_databases_for_your_wiki_(optional)"><span class="tocnumber">4.1.3</span> <span class="toctext">Pre-existing image databases for your wiki (optional)</span></a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toclevel-2 tocsection-12">
|
||||
<li class="toclevel-2 tocsection-13">
|
||||
<a href="#gfs_script"><span class="tocnumber">4.2</span> <span class="toctext">gfs script</span></a>
|
||||
</li>
|
||||
<li class="toclevel-2 tocsection-13">
|
||||
<li class="toclevel-2 tocsection-14">
|
||||
<a href="#Terms"><span class="tocnumber">4.3</span> <span class="toctext">Terms</span></a>
|
||||
<ul>
|
||||
<li class="toclevel-3 tocsection-14">
|
||||
<li class="toclevel-3 tocsection-15">
|
||||
<a href="#lnki"><span class="tocnumber">4.3.1</span> <span class="toctext">lnki</span></a>
|
||||
</li>
|
||||
<li class="toclevel-3 tocsection-15">
|
||||
<li class="toclevel-3 tocsection-16">
|
||||
<a href="#orig"><span class="tocnumber">4.3.2</span> <span class="toctext">orig</span></a>
|
||||
</li>
|
||||
<li class="toclevel-3 tocsection-16">
|
||||
<li class="toclevel-3 tocsection-17">
|
||||
<a href="#xfer"><span class="tocnumber">4.3.3</span> <span class="toctext">xfer</span></a>
|
||||
</li>
|
||||
<li class="toclevel-3 tocsection-17">
|
||||
<li class="toclevel-3 tocsection-18">
|
||||
<a href="#fsdb"><span class="tocnumber">4.3.4</span> <span class="toctext">fsdb</span></a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toclevel-2 tocsection-18">
|
||||
<li class="toclevel-2 tocsection-19">
|
||||
<a href="#Examples"><span class="tocnumber">4.4</span> <span class="toctext">Examples</span></a>
|
||||
<ul>
|
||||
<li class="toclevel-3 tocsection-19">
|
||||
<li class="toclevel-3 tocsection-20">
|
||||
<a href="#Simple_Wikipedia_example_with_documentation"><span class="tocnumber">4.4.1</span> <span class="toctext">Simple Wikipedia example with documentation</span></a>
|
||||
</li>
|
||||
<li class="toclevel-3 tocsection-20">
|
||||
<li class="toclevel-3 tocsection-21">
|
||||
<a href="#Script:_gnosygnu's_actual_English_Wikipedia_script_(dirty;_provided_for_reference_only)"><span class="tocnumber">4.4.2</span> <span class="toctext">Script: gnosygnu's actual English Wikipedia script (dirty; provided for reference only)</span></a>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li class="toclevel-1 tocsection-21">
|
||||
<li class="toclevel-1 tocsection-22">
|
||||
<a href="#Change_log"><span class="tocnumber">5</span> <span class="toctext">Change log</span></a>
|
||||
</li>
|
||||
</ul>
|
||||
@@ -193,7 +196,7 @@
|
||||
Create a text file in your xowa root folder called <code>make_xowa.gfs</code> with a text-editor.
|
||||
<ul>
|
||||
<li>
|
||||
For Windows, Notepad++ is recommended
|
||||
For Windows, Notepad++ is recommended, or any other text editor that does not have Windows line-ending. (Do not use Notepad)
|
||||
</li>
|
||||
<li>
|
||||
For other systems, you can use a text-editor like Atom, jEdit, or whatever you're most comfortable with
|
||||
@@ -207,9 +210,11 @@
|
||||
Run the following command. Make sure to match the jar path and jar file
|
||||
</li>
|
||||
</ul>
|
||||
<p>
|
||||
<code>java -jar C:\xowa\xowa_windows_64.jar --app_mode cmd --cmd_file C:\xowa\make_xowa.gfs --show_license n --show_args n</code>
|
||||
</p>
|
||||
<dl>
|
||||
<dd>
|
||||
<code>java -jar C:\xowa\xowa_windows_64.jar --app_mode cmd --cmd_file C:\xowa\make_xowa.gfs --show_license n --show_args n</code>
|
||||
</dd>
|
||||
</dl>
|
||||
<ul>
|
||||
<li>
|
||||
Wait for the script to complete
|
||||
@@ -393,13 +398,36 @@ app.bldr.cmds {
|
||||
// v2 html generator; allows for multi-threaded / multi-machine builds
|
||||
add ('simple.wikipedia.org' , 'wiki.mass_parse.init') {cfg {ns_ids = '0|4|14|8';}}
|
||||
|
||||
// uncomment the next line to resume parsing. See === Resuming === below
|
||||
// add ('simple.wikipedia.org' , 'wiki.mass_parse.resume');
|
||||
|
||||
// NOTE: must change manual_now
|
||||
add ('simple.wikipedia.org' , 'wiki.mass_parse.exec') {
|
||||
cfg {
|
||||
num_wkrs = 8; load_all_templates = 'y'; cleanup_interval = 50; hzip_enabled = 'y'; hdiff_enabled ='y'; manual_now = '2020-02-01 01:02:03';
|
||||
// locks time to a specific value so all pages use the same time when calling Date.Now()
|
||||
manual_now = '2020-02-01 01:02:03';
|
||||
|
||||
// number of threads; set to 1 to skip multi-threaded behavior
|
||||
num_wkrs = 8;
|
||||
|
||||
// enables building full-text search indexes
|
||||
indexer_enabled = 'y';
|
||||
|
||||
// optimization; loads all templates in memory instead of loading each one from disk
|
||||
load_all_templates = 'y';
|
||||
|
||||
// optimization; loads all imglinks in memory instead of loading each one from disk
|
||||
// an imglink maps a given image (File:Abc.png) to a repo (commons vs local wiki) as well as a rename
|
||||
load_all_imglinks = 'y';
|
||||
|
||||
// uncomment the following 3 lines if using the build script as a "worker" helping a "server"
|
||||
|
||||
// number of pages after which XOWA empties cache
|
||||
cleanup_interval = 50;
|
||||
|
||||
// DEPRECATE: uncomment these 2 lines to use custom HTML zip compression
|
||||
// hzip_enabled = 'y';
|
||||
// hdiff_enabled ='y';
|
||||
|
||||
// uncomment these 3 lines if using the build script as a "worker" helping a "server"
|
||||
// num_pages_in_pool = 32000;
|
||||
// mgr_url = '\\server_machine_name\xowa\wiki\en.wikipedia.org\tmp\xomp\';
|
||||
// wkr_machine_name = 'worker_machine_1'
|
||||
@@ -463,6 +491,50 @@ app.bldr.run;
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<h3>
|
||||
<span class="mw-headline" id="Resuming">Resuming</span>
|
||||
</h3>
|
||||
<p>
|
||||
The <code>wiki.mass_parse.exec</code> may take many hours. For English Wikipedia, it can take up to 5 days, even with 8 threads
|
||||
</p>
|
||||
<p>
|
||||
During this time, the build can be canceled by any of the following:
|
||||
</p>
|
||||
<ul>
|
||||
<li>
|
||||
Manual: User presses Ctrl+C
|
||||
</li>
|
||||
<li>
|
||||
Unanticipated: Process dies or machine shuts down
|
||||
</li>
|
||||
</ul>
|
||||
<p>
|
||||
To resume the build, the following steps can be applied
|
||||
</p>
|
||||
<ul>
|
||||
<li>
|
||||
Comment out all commands before <code>wiki.mass_parse.exec</code> using a block comment
|
||||
<ul>
|
||||
<li>
|
||||
Place a <code>/*</code> before the line with 'util.cleanup'
|
||||
</li>
|
||||
<li>
|
||||
Place a <code>*/</code> after the line with 'wiki.mass_parse.init'
|
||||
</li>
|
||||
</ul>
|
||||
</li>
|
||||
<li>
|
||||
Uncomment the line for 'wiki.mass_parse.resume'
|
||||
</li>
|
||||
<li>
|
||||
Run the command-line again
|
||||
</li>
|
||||
</ul>
|
||||
<dl>
|
||||
<dd>
|
||||
<code>java -jar C:\xowa\xowa_windows_64.jar --app_mode cmd --cmd_file C:\xowa\make_xowa.gfs --show_license n --show_args n</code>
|
||||
</dd>
|
||||
</dl>
|
||||
<h2>
|
||||
<span class="mw-headline" id="Appendix">Appendix</span>
|
||||
</h2>
|
||||
|
||||
Reference in New Issue
Block a user