mirror of
https://github.com/gnosygnu/xowa.git
synced 2024-10-27 20:34:16 +00:00
815 lines
37 KiB
HTML
815 lines
37 KiB
HTML
<!DOCTYPE html>
|
|
<html dir="ltr">
|
|
<head>
|
|
<meta http-equiv="content-type" content="text/html;charset=UTF-8" />
|
|
<title>Options/Import - XOWA</title>
|
|
<link rel="shortcut icon" href="https://gnosygnu.github.io/xowa/xowa_logo.png" />
|
|
<link rel="stylesheet" href="https://gnosygnu.github.io/xowa/xowa_common.css" type="text/css">
|
|
<style data-source="xowa" type="text/css">
|
|
.options_row_label {font-weight:bold; vertical-align:text-top; text-align:right; width:200px;}
|
|
.options_table {border: 1px solid #AAAAAA; border-collapse: collapse; color: black; margin: 1em 0; background-color: #F9F9F9;}
|
|
.options_table td {border: 1px solid #AAAAAA; padding: 0.2em;}
|
|
.options_textbox {padding: 2px; border: 1px solid black;}
|
|
.options_textbox_num {padding: 2px; border: 1px solid black; text-align:right;}
|
|
.options_textarea {padding: 2px; border: 1px solid black;}
|
|
.options_select {padding: 2px; border: 1px solid black;}
|
|
.options_button {padding: 2px; border: 1px solid black; background: white; height: 22px; margin-left: 1px; vertical-align: top;}
|
|
.options_checkbox {padding: 2px; border: 1px solid black;}
|
|
.options_readonly {background-color:#F9F9F9;}
|
|
</style>
|
|
<style data-source="xowa" type="text/css">
|
|
.optionsButton
|
|
{
|
|
width: 100px;
|
|
height: 40px;
|
|
font-family: 'Segoe UI';
|
|
font-size: 12px;
|
|
margin-left: -5px;
|
|
cursor: pointer;
|
|
background-color: lightgray;
|
|
text-align: center;
|
|
}
|
|
|
|
.optionsLink
|
|
{
|
|
color: black !important;
|
|
text-decoration: none;
|
|
}
|
|
|
|
.optionsButton.active
|
|
{
|
|
width: 100px;
|
|
height: 40px;
|
|
font-family: 'Segoe UI';
|
|
font-size: 12px;
|
|
margin-left: -5px;
|
|
cursor: pointer;
|
|
background-color: dimgray;
|
|
color: white !important;
|
|
border: solid 1px dimgray;
|
|
}
|
|
.optionsButton.active a
|
|
{
|
|
color: white !important;
|
|
}
|
|
|
|
.tabs
|
|
{
|
|
width: 100%;
|
|
margin-top: 10px;
|
|
}
|
|
|
|
.tab-links ul
|
|
{
|
|
margin-left: -40px;
|
|
}
|
|
.tab-links li
|
|
{
|
|
margin: 0px 0px 0px 0px;
|
|
float: left;
|
|
list-style: none;
|
|
}
|
|
|
|
.tab-links a
|
|
{
|
|
padding: 9px 15px 9px 15px;
|
|
border-radius: 3px 3px 0px 0px;
|
|
background: #f0f0f0;
|
|
font-family: 'Segoe UI';
|
|
font-size: 13px;
|
|
color: black !important;
|
|
text-decoration: none;
|
|
border-radius: 3px;
|
|
border-top: solid 1px gray;
|
|
border-left: solid 1px gray;
|
|
border-right: solid 1px gray;
|
|
}
|
|
|
|
.tab-links a:hover
|
|
{
|
|
background: #B0B0B0;
|
|
text-decoration: none;
|
|
}
|
|
|
|
li.active a, li.active a:hover
|
|
{
|
|
background: #fff;
|
|
color: #4c4c4c;
|
|
font-weight: bold;
|
|
}
|
|
|
|
.tab-content
|
|
{
|
|
padding: 15px;
|
|
border-radius: 3px;
|
|
background: #fff;
|
|
border-top: solid 1px gray;
|
|
margin-top: 5px;
|
|
min-height: 300px;
|
|
}
|
|
</style>
|
|
<style data-source="xowa" type="text/css">
|
|
.options_row_label {font-weight:bold; vertical-align:text-top; text-align:right; width:200px;}
|
|
.options_table {border: 1px solid #AAAAAA; border-collapse: collapse; color: black; margin: 1em 0; background-color: #F9F9F9;}
|
|
.options_table td {border: 1px solid #AAAAAA; padding: 0.2em;}
|
|
.options_textbox {padding: 2px; border: 1px solid black;}
|
|
.options_textbox_num {padding: 2px; border: 1px solid black; text-align:right;}
|
|
.options_textarea {padding: 2px; border: 1px solid black;}
|
|
.options_select {padding: 2px; border: 1px solid black;}
|
|
.options_button {padding: 2px; border: 1px solid black; background: white; height: 22px; margin-left: 1px; vertical-align: top;}
|
|
.options_checkbox {padding: 2px; border: 1px solid black;}
|
|
.options_readonly {background-color:#F9F9F9;}
|
|
</style>
|
|
<style data-source="xowa" type="text/css">
|
|
.optionsButton
|
|
{
|
|
width: 100px;
|
|
height: 40px;
|
|
font-family: 'Segoe UI';
|
|
font-size: 12px;
|
|
margin-left: -5px;
|
|
cursor: pointer;
|
|
background-color: lightgray;
|
|
text-align: center;
|
|
}
|
|
|
|
.optionsLink
|
|
{
|
|
color: black !important;
|
|
text-decoration: none;
|
|
}
|
|
|
|
.optionsButton.active
|
|
{
|
|
width: 100px;
|
|
height: 40px;
|
|
font-family: 'Segoe UI';
|
|
font-size: 12px;
|
|
margin-left: -5px;
|
|
cursor: pointer;
|
|
background-color: dimgray;
|
|
color: white !important;
|
|
border: solid 1px dimgray;
|
|
}
|
|
.optionsButton.active a
|
|
{
|
|
color: white !important;
|
|
}
|
|
|
|
.tabs
|
|
{
|
|
width: 100%;
|
|
margin-top: 10px;
|
|
}
|
|
|
|
.tab-links ul
|
|
{
|
|
margin-left: -40px;
|
|
}
|
|
.tab-links li
|
|
{
|
|
margin: 0px 0px 0px 0px;
|
|
float: left;
|
|
list-style: none;
|
|
}
|
|
|
|
.tab-links a
|
|
{
|
|
padding: 9px 15px 9px 15px;
|
|
border-radius: 3px 3px 0px 0px;
|
|
background: #f0f0f0;
|
|
font-family: 'Segoe UI';
|
|
font-size: 13px;
|
|
color: black !important;
|
|
text-decoration: none;
|
|
border-radius: 3px;
|
|
border-top: solid 1px gray;
|
|
border-left: solid 1px gray;
|
|
border-right: solid 1px gray;
|
|
}
|
|
|
|
.tab-links a:hover
|
|
{
|
|
background: #B0B0B0;
|
|
text-decoration: none;
|
|
}
|
|
|
|
li.active a, li.active a:hover
|
|
{
|
|
background: #fff;
|
|
color: #4c4c4c;
|
|
font-weight: bold;
|
|
}
|
|
|
|
.tab-content
|
|
{
|
|
padding: 15px;
|
|
border-radius: 3px;
|
|
background: #fff;
|
|
border-top: solid 1px gray;
|
|
margin-top: 5px;
|
|
min-height: 300px;
|
|
}
|
|
</style>
|
|
|
|
</head>
|
|
<body class="mediawiki ltr sitedir-ltr ns-0 ns-subject skin-vector action-submit vector-animateLayout" spellcheck="false">
|
|
<div id="mw-page-base" class="noprint"></div>
|
|
<div id="mw-head-base" class="noprint"></div>
|
|
<div id="content" class="mw-body">
|
|
<h1 id="firstHeading" class="firstHeading"><span>Options/Import</span></h1>
|
|
<div id="bodyContent" class="mw-body-content">
|
|
<div id="siteSub">From XOWA: the free, open-source, offline wiki application</div>
|
|
<div id="contentSub"></div>
|
|
<div id="mw-content-text" lang="en" dir="ltr" class="mw-content-ltr">
|
|
|
|
<div>
|
|
<table>
|
|
<tr>
|
|
<td class='optionsButton'>
|
|
<a class='optionsLink' href='/wiki/Options/Window'>Apps (basic)</a>
|
|
</td>
|
|
<td class='optionsButton'>
|
|
<a class='optionsLink' href='/wiki/Options/Security'>Apps (advanced)</a>
|
|
</td>
|
|
<td class='optionsButton active'>
|
|
<a class='optionsLink' href='/wiki/Options/Import'>Wiki</a>
|
|
</td>
|
|
<td class='optionsButton'>
|
|
<a class='optionsLink' href='/wiki/Options/HTML'>HTML</a>
|
|
</td>
|
|
<td class='optionsButton'>
|
|
<a class='optionsLink' href='/wiki/Options/Content_apps'>External apps</a>
|
|
</td>
|
|
<td class='optionsButton'>
|
|
<a class='optionsLink' href='/wiki/Options/Search_suggest'>Modules</a>
|
|
</td>
|
|
<td class='optionsButton'>
|
|
<a class='optionsLink' href='/wiki/Options/Math'>Extensions</a>
|
|
</td>
|
|
<td class='optionsButton'>
|
|
<a class='optionsLink' href='/wiki/Options/Dev'>Misc</a>
|
|
</td>
|
|
</tr>
|
|
</table>
|
|
</div>
|
|
<div class="tabs">
|
|
<div>
|
|
<ul class='tab-links' style='margin-left:0px;'>
|
|
<li class='active'>
|
|
<a href='/wiki/Options/Import'>Import</a>
|
|
</li>
|
|
<li class=''>
|
|
<a href='/wiki/Options/Files'>Files</a>
|
|
</li>
|
|
<li class=''>
|
|
<a href='/wiki/Options/HTML_databases'>HTML databases</a>
|
|
</li>
|
|
<li class=''>
|
|
<a href='/wiki/Options/Page_sync'>Page sync</a>
|
|
</li>
|
|
<li class=''>
|
|
<a href='/wiki/Options/Category'>Category</a>
|
|
</li>
|
|
<li class=''>
|
|
<a href='/wiki/Options/Import_Dansguardian'>Import Dansguardian</a>
|
|
</li>
|
|
</ul>
|
|
</div><br>
|
|
<div class='tab-content'>
|
|
<div id="toc" class="toc">
|
|
<div id="toctitle">
|
|
<h2>
|
|
Contents
|
|
</h2>
|
|
</div>
|
|
<ul>
|
|
<li class="toclevel-1 tocsection-1">
|
|
<a href="#Wiki_setup"><span class="tocnumber">1</span> <span class="toctext">Wiki setup</span></a>
|
|
</li>
|
|
<li class="toclevel-1 tocsection-2">
|
|
<a href="#Import_process"><span class="tocnumber">2</span> <span class="toctext">Import process</span></a>
|
|
</li>
|
|
<li class="toclevel-1 tocsection-3">
|
|
<a href="#PageRank"><span class="tocnumber">3</span> <span class="toctext">PageRank</span></a>
|
|
</li>
|
|
<li class="toclevel-1 tocsection-4">
|
|
<a href="#Database_layout"><span class="tocnumber">4</span> <span class="toctext">Database layout</span></a>
|
|
</li>
|
|
<li class="toclevel-1 tocsection-5">
|
|
<a href="#Decompression_apps"><span class="tocnumber">5</span> <span class="toctext">Decompression apps</span></a>
|
|
</li>
|
|
<li class="toclevel-1 tocsection-6">
|
|
<a href="#Notes"><span class="tocnumber">6</span> <span class="toctext">Notes</span></a>
|
|
</li>
|
|
</ul>
|
|
</div>
|
|
<div style='float:right; margin-right:10px; margin-top:5px;'>
|
|
<p>
|
|
<a href="xowa-cmd:app.user.prefs.save();" accesskey='s'><img src='file:///C:/xowa/bin/any/xowa/file/app.window/save.png' width="16" height="16">Save</a>
|
|
</p>
|
|
</div><br>
|
|
|
|
<h2>
|
|
<span class="mw-headline" id="Wiki_setup">Wiki setup</span>
|
|
</h2>
|
|
<table class='options_table'>
|
|
<tr>
|
|
<td class='options_row_label'>
|
|
Page storage format: <sup id="cite_ref-data_storage_format_0-0" class="reference"><a href="#cite_note-data_storage_format-0">[1]</a></sup>
|
|
</td>
|
|
<td>
|
|
<select xowa_prop="xowa.api.bldr.wiki.import.zip_tid_text" xowa_prop_list="xowa.api.bldr.wiki.import.zip_tid_list" class="options_textbox" size='3' id='xowa_prop_0'>
|
|
<option value='raw'>
|
|
text
|
|
</option>
|
|
<option value='gzip' selected='selected'>
|
|
gzip
|
|
</option>
|
|
<option value='bzip2'>
|
|
bzip2
|
|
</option>
|
|
<option value='xz'>
|
|
xz
|
|
</option>
|
|
</select>
|
|
</td>
|
|
</tr>
|
|
</table>
|
|
<h2>
|
|
<span class="mw-headline" id="Import_process">Import process</span>
|
|
</h2>
|
|
<table class='options_table'>
|
|
<tr>
|
|
<td class='options_row_label'>
|
|
Dump servers: <sup id="cite_ref-dump_server_urls_1-0" class="reference"><a href="#cite_note-dump_server_urls-1">[2]</a></sup>
|
|
</td>
|
|
<td>
|
|
<textarea xowa_prop="app.setup.dumps.server_urls" class="options_textarea" style="width: 400px; height:72px;" id='xowa_prop_1'>
|
|
https://dumps.wikimedia.org/,
|
|
http://dumps.wikimedia.your.org/,
|
|
http://wikipedia.c3sl.ufpr.br/,
|
|
http://ftp.fi.muni.cz/pub/wikimedia/
|
|
</textarea>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class='options_row_label'>
|
|
Import bz2 by stdout: <sup id="cite_ref-import_bz2_by_stdout_2-0" class="reference"><a href="#cite_note-import_bz2_by_stdout-2">[3]</a></sup>
|
|
</td>
|
|
<td>
|
|
<input xowa_prop="app.setup.dumps.import_bz2_by_stdout" type="checkbox" class="options_checkbox" id='xowa_prop_2' checked='checked'>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class='options_row_label'>
|
|
Import bz2 by stdout process:<sup id="cite_ref-3" class="reference"><a href="#cite_note-3">[4]</a></sup>
|
|
</td>
|
|
<td>
|
|
<input xowa_prop="app.fsys.apps.decompress_bz2_by_stdout.cmd" class="options_textbox" style="width: 380px; margin-bottom:2px;" type="xowa_io" id='xowa_prop_3' value='C:\xowa\bin\windows_64\7-zip\7za'><button id='xowa_prop_3_io' class='options_button' onclick='xowa_io_select("file", "xowa_prop_3", "Please select a file.");'>...</button>
|
|
<p>
|
|
<input xowa_prop="app.fsys.apps.decompress_bz2_by_stdout.args" class="options_textbox" style="width: 380px;" id='xowa_prop_4' value='x -so "~{src}"'>
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class='options_row_label'>
|
|
Custom wiki commands: <sup id="cite_ref-custom_cmds_4-0" class="reference"><a href="#cite_note-custom_cmds-4">[5]</a></sup>
|
|
</td>
|
|
<td>
|
|
<input xowa_prop="app.setup.dumps.custom_cmds" class="options_textbox" style="width: 400px;" id='xowa_prop_5' value='wiki.download,wiki.import'>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class='options_row_label'>
|
|
Download xowa_common.css: <sup id="cite_ref-download_xowa_common_css_5-0" class="reference"><a href="#cite_note-download_xowa_common_css-5">[6]</a></sup>
|
|
</td>
|
|
<td>
|
|
<input xowa_prop="app.setup.dumps.css_commons_download" type="checkbox" class="options_checkbox" id='xowa_prop_6' checked='checked'>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class='options_row_label'>
|
|
Delete xml file after import: <sup id="cite_ref-delete_xml_file_6-0" class="reference"><a href="#cite_note-delete_xml_file-6">[7]</a></sup>
|
|
</td>
|
|
<td>
|
|
<input xowa_prop="app.setup.dumps.delete_xml_file" type="checkbox" class="options_checkbox" id='xowa_prop_7' checked='checked'>
|
|
</td>
|
|
</tr>
|
|
</table>
|
|
<h2>
|
|
<span class="mw-headline" id="PageRank">PageRank</span>
|
|
</h2>
|
|
<table class='options_table'>
|
|
<tr>
|
|
<td class='options_row_label'>
|
|
PageRank iteration max: <sup id="cite_ref-page_rank-iteration_max_7-0" class="reference"><a href="#cite_note-page_rank-iteration_max-7">[8]</a></sup>
|
|
</td>
|
|
<td>
|
|
<input xowa_prop="xowa.api.bldr.wiki.import.page_rank.iteration_max" class="options_textbox" style="width: 400px;" id='xowa_prop_8' value='0'>
|
|
</td>
|
|
</tr>
|
|
</table>
|
|
<h2>
|
|
<span class="mw-headline" id="Database_layout">Database layout</span>
|
|
</h2>
|
|
<table class='options_table'>
|
|
<tr>
|
|
<td class='options_row_label'>
|
|
Max file size for single text database: <sup id="cite_ref-layout_text_max_8-0" class="reference"><a href="#cite_note-layout_text_max-8">[9]</a></sup>
|
|
</td>
|
|
<td>
|
|
<input xowa_prop="xowa.api.bldr.wiki.import.layout_text_max" class="options_textbox" style="width: 400px;" id='xowa_prop_9' value='0'>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class='options_row_label'>
|
|
Max file size for single file database: <sup id="cite_ref-layout_text_max_8-1" class="reference"><a href="#cite_note-layout_text_max-8">[9]</a></sup>
|
|
</td>
|
|
<td>
|
|
<input xowa_prop="xowa.api.bldr.wiki.import.layout_file_max" class="options_textbox" style="width: 400px;" id='xowa_prop_10' value='0'>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class='options_row_label'>
|
|
Max file size for single html database: <sup id="cite_ref-layout_text_max_8-2" class="reference"><a href="#cite_note-layout_text_max-8">[9]</a></sup>
|
|
</td>
|
|
<td>
|
|
<input xowa_prop="xowa.api.bldr.wiki.import.layout_html_max" class="options_textbox" style="width: 400px;" id='xowa_prop_11' value='0'>
|
|
</td>
|
|
</tr>
|
|
</table>
|
|
<h2>
|
|
<span class="mw-headline" id="Decompression_apps">Decompression apps</span>
|
|
</h2>
|
|
<table class="options_table">
|
|
<tr>
|
|
<td class='options_row_label'>
|
|
Decompress bz2 file
|
|
<p>
|
|
<sup id="cite_ref-11" class="reference"><a href="#cite_note-11">[10]</a></sup>
|
|
</p>
|
|
</td>
|
|
<td>
|
|
<input xowa_prop="app.fsys.apps.decompress_bz2.cmd" class="options_textbox" style="width: 380px; margin-bottom:2px;" type="xowa_io" id='xowa_prop_12' value='C:\xowa\bin\windows_64\7-zip\7za'><button id='xowa_prop_12_io' class='options_button' onclick='xowa_io_select("file", "xowa_prop_12", "Please select a file.");'>...</button>
|
|
<p>
|
|
<input xowa_prop="app.fsys.apps.decompress_bz2.args" class="options_textbox" style="width: 380px;" id='xowa_prop_13' value='x -y -r "~{src}" -o"~{trg_dir}"'>
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class='options_row_label'>
|
|
Decompress zip file
|
|
<p>
|
|
<sup id="cite_ref-12" class="reference"><a href="#cite_note-12">[11]</a></sup>
|
|
</p>
|
|
</td>
|
|
<td>
|
|
<input xowa_prop="app.fsys.apps.decompress_bz2.cmd" class="options_textbox" style="width: 380px; margin-bottom:2px;" type="xowa_io" id='xowa_prop_14' value='C:\xowa\bin\windows_64\7-zip\7za'><button id='xowa_prop_14_io' class='options_button' onclick='xowa_io_select("file", "xowa_prop_14", "Please select a file.");'>...</button>
|
|
<p>
|
|
<input xowa_prop="app.fsys.apps.decompress_bz2.args" class="options_textbox" style="width: 380px;" id='xowa_prop_15' value='x -y -r "~{src}" -o"~{trg_dir}"'>
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
<tr>
|
|
<td class='options_row_label'>
|
|
Decompress gz file
|
|
<p>
|
|
<sup id="cite_ref-13" class="reference"><a href="#cite_note-13">[12]</a></sup>
|
|
</p>
|
|
</td>
|
|
<td>
|
|
<input xowa_prop="app.fsys.apps.decompress_gz.cmd" class="options_textbox" style="width: 380px; margin-bottom:2px;" type="xowa_io" id='xowa_prop_16' value='C:\xowa\bin\windows_64\7-zip\7za'><button id='xowa_prop_16_io' class='options_button' onclick='xowa_io_select("file", "xowa_prop_16", "Please select a file.");'>...</button>
|
|
<p>
|
|
<input xowa_prop="app.fsys.apps.decompress_gz.args" class="options_textbox" style="width: 380px;" id='xowa_prop_17' value='x -y -r "~{src}" -o"~{trg_dir}"'>
|
|
</p>
|
|
</td>
|
|
</tr>
|
|
</table>
|
|
<h2>
|
|
<span class="mw-headline" id="Notes">Notes</span>
|
|
</h2>
|
|
<ol class="references">
|
|
<li id="cite_note-data_storage_format-0">
|
|
<span class="mw-cite-backlink"><a href="#cite_ref-data_storage_format_0-0">^</a></span> <span class="reference-text">Choose one of the following: (default is <code>.gz</code>)</span>
|
|
<ul>
|
|
<li>
|
|
<span class="reference-text"><b>text</b>: fastest for reading but has no compression. Simple Wikipedia will be 300 MB</span>
|
|
</li>
|
|
<li>
|
|
<span class="reference-text"><b>gzip</b>: (default) fast for reading and has compression. Simple Wikipedia will be 100 MB</span>
|
|
</li>
|
|
<li>
|
|
<span class="reference-text"><b>bzip2</b>: very slow for reading but has best compression. Simple Wikipedia will be 85 MB (Note: The performance is very noticeable. Please try this with Simple Wikipedia first before using on a large wiki.)</span>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li id="cite_note-dump_server_urls-1">
|
|
<span class="mw-cite-backlink"><a href="#cite_ref-dump_server_urls_1-0">^</a></span> <span class="reference-text">Enter a list of server urls separated by a comma and newline.</span>
|
|
<ul>
|
|
<li>
|
|
<span class="reference-text">The default value is:</span>
|
|
</li>
|
|
</ul>
|
|
<pre>
|
|
<span class="reference-text">http://dumps.wikimedia.your.org/,
|
|
http://dumps.wikimedia.org/,
|
|
http://wikipedia.c3sl.ufpr.br/,
|
|
http://ftp.fi.muni.cz/pub/wikimedia/
|
|
</span>
|
|
</pre>
|
|
<ul>
|
|
<li>
|
|
<span class="reference-text">Note that servers are prioritized from left-to-right. In the default example, <b>your.org</b> will be tried first. If it is offline, then the next server -- <b>dumps.wikimedia.org</b> -- will be tried, etc.</span>
|
|
</li>
|
|
<li>
|
|
<span class="reference-text">See <a href="http://xowa.org/home/wiki/App/Import/Download/Dump_servers.html" id="xolnki_2" title="App/Import/Download/Dump servers">App/Import/Download/Dump_servers</a> for more info</span>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li id="cite_note-import_bz2_by_stdout-2">
|
|
<span class="mw-cite-backlink"><a href="#cite_ref-import_bz2_by_stdout_2-0">^</a></span> <span class="reference-text"><b>NOTE 1: this option only applies if the "Custom wiki commands" option is <code>wiki.download,wiki.import</code> (wiki.unzip must be removed)</b><br>
|
|
Select the method for importing a wiki dump bz2 file. (default is <code>checked</code>)</span>
|
|
<ul>
|
|
<li>
|
|
<span class="reference-text"><b>checked</b> : import through a native process's stdout. This will be faster, but may not work on all Operating Systems. A 95 MB file takes 85 seconds</span>
|
|
</li>
|
|
<li>
|
|
<span class="reference-text"><b>unchecked</b>: import though Apache Common's Java bz2 compression library. This will be slower, but will work on all Operating Systems. A 95 MB file takes 215 seconds.</span>
|
|
</li>
|
|
</ul><span class="reference-text"><b>NOTE 2: lbzip2 (Many thanks to Anselm for making this suggestion, as well as compiling the data to support it. See <a href="http://sourceforge.net/p/xowa/tickets/263/?limit=10&page=6#f2fb/dcb6" rel="nofollow" class="external free">http://sourceforge.net/p/xowa/tickets/263/?limit=10&page=6#f2fb/dcb6</a>)</b> Linux users should consider using lbzip2, as lbzip2 has significant performance differences (30% in many cases).</span>
|
|
<ul>
|
|
<li>
|
|
<span class="reference-text">install lbzip2</span>
|
|
<ul>
|
|
<li>
|
|
<span class="reference-text">(Debian) <code>sudo apt-get install lbzip2</code></span>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li>
|
|
<span class="reference-text">change "Import bz2 by stdout process" to</span>
|
|
<ul>
|
|
<li>
|
|
<span class="reference-text"><code>lbzip2</code></span>
|
|
</li>
|
|
<li>
|
|
<span class="reference-text"><code>-dkc "~{src}"</code></span>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li id="cite_note-3">
|
|
<span class="mw-cite-backlink"><a href="#cite_ref-3">^</a></span> <span class="reference-text">Process used to decompress bz2 by stdout. Recommended: Operating System default</span>
|
|
</li>
|
|
<li id="cite_note-custom_cmds-4">
|
|
<span class="mw-cite-backlink"><a href="#cite_ref-custom_cmds_4-0">^</a></span> <span class="reference-text">Select custom commands: (default is <code>wiki.download,wiki.unzip,wiki.import</code>)<br>
|
|
<b>Short version:</b></span>
|
|
<ul>
|
|
<li>
|
|
<span class="reference-text">For fast imports, but high disk space usage, use <code>wiki.download,wiki.unzip,wiki.import</code></span>
|
|
</li>
|
|
<li>
|
|
<span class="reference-text">For slow imports, but low disk space usage, use <code>wiki.download,wiki.import</code></span>
|
|
</li>
|
|
</ul><span class="reference-text"><b>Long version:</b> Enter a list of commands separated by a comma. Valid commands are listed below. Note that simple.wikipedia.org is used for all examples, but the commands apply to any wiki.</span>
|
|
<ul>
|
|
<li>
|
|
<span class="reference-text"><code>wiki.download</code>: downloads the wiki data dump from the dump server</span>
|
|
</li>
|
|
</ul>
|
|
<dl>
|
|
<dd>
|
|
<span class="reference-text">A file will be generated in "/xowa/wiki/simple.wikipedia.org/simplewiki-latest-pages-articles.xml.bz2"</span>
|
|
</dd>
|
|
</dl>
|
|
<ul>
|
|
<li>
|
|
<span class="reference-text"><code>wiki.unzip</code>: unzips an xml file from the wiki data dump</span>
|
|
</li>
|
|
</ul>
|
|
<dl>
|
|
<dd>
|
|
<span class="reference-text">A file will be created for "/xowa/wiki/simple.wikipedia.org/simplewiki-latest-pages-articles.xml" (assuming the corresponding .xml.bz2 exists)</span>
|
|
</dd>
|
|
<dd>
|
|
<span class="reference-text">If this step is omitted, then XOWA will read directly from the .bz2 file. Although this will use less space (no .xml file to unzip), it will be significantly slower. <b>Also, due to a program limitation, the progress percentage will not be accurate. It may hover at 99.99% for several minutes</b></span>
|
|
</dd>
|
|
</dl>
|
|
<ul>
|
|
<li>
|
|
<span class="reference-text"><code>wiki.import</code>: imports the xml file</span>
|
|
</li>
|
|
</ul>
|
|
<dl>
|
|
<dd>
|
|
<span class="reference-text">A wiki will be imported from "/xowa/wiki/simple.wikipedia.org/simplewiki-latest-pages-articles.xml"</span>
|
|
</dd>
|
|
</dl><span class="reference-text">The following lists possible combinations:</span>
|
|
<ul>
|
|
<li>
|
|
<span class="reference-text"><code>wiki.download,wiki.unzip,wiki.import</code> AKA: <b>fastest</b></span>
|
|
</li>
|
|
</ul>
|
|
<dl>
|
|
<dd>
|
|
<span class="reference-text">This is the default. Note that this will be the fastest to set up, but will take more space. For example, English Wikipedia will set up in 5 hours and require at least 45 GB of temp space</span>
|
|
</dd>
|
|
</dl>
|
|
<ul>
|
|
<li>
|
|
<span class="reference-text"><code>wiki.download,wiki.import</code> AKA: <b>smallest</b></span>
|
|
</li>
|
|
</ul>
|
|
<dl>
|
|
<dd>
|
|
<span class="reference-text">This will read directly from the bz2 file. Note that this will use the least disk space, but will take more time. For example, English Wikipedia will set up in 8 hours but will only use 5 GB of temp space</span>
|
|
</dd>
|
|
</dl>
|
|
</li>
|
|
<li id="cite_note-download_xowa_common_css-5">
|
|
<span class="mw-cite-backlink"><a href="#cite_ref-download_xowa_common_css_5-0">^</a></span> <span class="reference-text">Affects the xowa_common.css in /xowa/user/anonymous/wiki/wiki_name/html/. Occurs when importing a wiki. (default is <code>checked</code>)</span>
|
|
<ul>
|
|
<li>
|
|
<span class="reference-text"><b>checked</b> : downloads xowa_common.css from the Wikimedia servers. Note that this stylesheet will be the latest copy but it may cause unexpected formatting in XOWA.</span>
|
|
</li>
|
|
<li>
|
|
<span class="reference-text"><b>unchecked</b>: (default) copies xowa_common.css from /xowa/bin/any/html/html/import/. Note that this stylesheet is the one XOWA is coded against. It is the most stable, but will not have the latest logo</span>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li id="cite_note-delete_xml_file-6">
|
|
<span class="mw-cite-backlink"><a href="#cite_ref-delete_xml_file_6-0">^</a></span> <span class="reference-text">(Only relevant for wiki.unzip) Choose one of the following: (default is <code>checked</code>)</span>
|
|
<ul>
|
|
<li>
|
|
<span class="reference-text"><b>checked</b> : (default) the .xml file is automatically deleted once the import process completes</span>
|
|
</li>
|
|
<li>
|
|
<span class="reference-text"><b>unchecked</b>: the .xml file is untouched</span>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li id="cite_note-page_rank-iteration_max-7">
|
|
<span class="mw-cite-backlink"><a href="#cite_ref-page_rank-iteration_max_7-0">^</a></span> <span class="reference-text">Specify one of the following: (default is <code>0</code>)</span>
|
|
<ul>
|
|
<li>
|
|
<span class="reference-text"><b>0</b> : (default) page rank is disabled</span>
|
|
</li>
|
|
<li>
|
|
<span class="reference-text"><b>(number greater than 1)</b>: page rank will be calculated until it is finished or maximum number of interations are reached. For more info, see <a href="http://xowa.org/home/wiki/Help/Features/Search/Build.html" id="xolnki_3" title="Help/Features/Search/Build">Help/Features/Search/Build</a></span>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li id="cite_note-layout_text_max-8">
|
|
<span class="mw-cite-backlink">^ <sup><a href="#cite_ref-layout_text_max_8-0">a</a></sup> <sup><a href="#cite_ref-layout_text_max_8-1">b</a></sup> <sup><a href="#cite_ref-layout_text_max_8-2">c</a></sup></span> <span class="reference-text">Enter a number in MB to represent the cutoff for generating sets of page databases as one file or many files (default is <code>1500</code>)<br>
|
|
XOWA generates three types of page databases:</span>
|
|
<ul>
|
|
<li>
|
|
<span class="reference-text"><b>text</b>: These are Wikitext databases and have entries like ''italics''. They have <code>-text-</code> in their file name.</span>
|
|
</li>
|
|
<li>
|
|
<span class="reference-text"><b>html</b>: These the html-dump databases and have entries like <i>italics</i>. They have <code>-html-</code> in their file name</span>
|
|
</li>
|
|
<li>
|
|
<span class="reference-text"><b>file</b>: These are image databases which have the raw binary images. They have <code>-file-</code> in their file name</span>
|
|
</li>
|
|
</ul><span class="reference-text"><br>
|
|
Different wikis will have different numbers of databases for a given set.</span>
|
|
<ul>
|
|
<li>
|
|
<span class="reference-text">For small wikis, XOWA generates one database for the entire wiki. For example, Simple Wikipedia will just have "simple.wikipedia.org-text.xowa". This way is preferred as it is simpler.</span>
|
|
</li>
|
|
<li>
|
|
<span class="reference-text">For large wikis, XOWA generates many databases for the entire wiki. For example, English Wikipedia will have "en.wikipedia.org-text-ns.000.xowa", "en.wikipedia.org-text-ns.000-db.002.xowa", etc. This way is necessary, because some file-systems don't support large databases. For example, creating an "en.wikipedia.org-text.xowa" file will generate a 20 GB file. This 20 GB file will generally fail on flash drives (FAT32), as well as Android (SQLite library allows 2 GB max)</span>
|
|
</li>
|
|
</ul><span class="reference-text"><br>
|
|
These options can force XOWA to generate a wiki using either one database (Simple Wikipedia style) or many databases (English Wikipedia style). It does this by using a cutoff for the XML database dump<br>
|
|
For example, 1500 means that a wiki with a dump file size of 1.5 GB or less will generate a single file. Any wiki with a dump file size larger than 1.5 GB will generate multiple files.</span>
|
|
<ul>
|
|
<li>
|
|
<span class="reference-text">If you always want to generate a set with only one file, set the value to a large number like 999,999 (999 GB)</span>
|
|
</li>
|
|
<li>
|
|
<span class="reference-text">If you always want to generate a set with multiple files, set the value to 0.</span>
|
|
</li>
|
|
<li>
|
|
<span class="reference-text">Otherwise, set the value to a cutoff. Wikis below that cutoff will be "single file"; wikis above it will be "multiple files"</span>
|
|
</li>
|
|
</ul>
|
|
</li>
|
|
<li id="cite_note-11">
|
|
<span class="mw-cite-backlink"><a href="#cite_ref-11">^</a></span> <span class="reference-text">Decompress bz2 file(needed for importing dumps) . Recommended: <a href="http://7-zip.org/" rel="nofollow" class="external text">7-zip</a></span>
|
|
</li>
|
|
<li id="cite_note-12">
|
|
<span class="mw-cite-backlink"><a href="#cite_ref-12">^</a></span> <span class="reference-text">Decompress zip file(needed for importing dumps) . Recommended: <a href="http://7-zip.org/" rel="nofollow" class="external text">7-zip</a></span>
|
|
</li>
|
|
<li id="cite_note-13">
|
|
<span class="mw-cite-backlink"><a href="#cite_ref-13">^</a></span> <span class="reference-text">Decompress gz file(needed for importing dumps) . Recommended: <a href="http://7-zip.org/" rel="nofollow" class="external text">7-zip</a></span>
|
|
</li>
|
|
</ol>
|
|
</div>
|
|
</div>
|
|
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
<div id="mw-head" class="noprint">
|
|
<div id="left-navigation">
|
|
<div id="p-namespaces" class="vectorTabs">
|
|
<h3>Namespaces</h3>
|
|
<ul>
|
|
<li id="ca-nstab-main" class="selected"><span><a id="ca-nstab-main-href" href="index.html">Page</a></span></li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div id='mw-panel' class='noprint'>
|
|
<div id='p-logo'>
|
|
<a style="background-image: url(https://gnosygnu.github.io/xowa/xowa_logo.png);" href="http://xowa.org/" title="Visit the main page"></a>
|
|
</div>
|
|
<div class="portal" id='xowa-portal-home'>
|
|
<h3>XOWA</h3>
|
|
<div class="body">
|
|
<ul>
|
|
<li><a href="http://xowa.org/index.html" title='Visit the main page'>Main page</a></li>
|
|
<li><a href="http://xowa.org/screenshots.html" title='See screenshots of XOWA'>Screenshots</a></li>
|
|
<li><a href="https://www.youtube.com/watch?v=q0qbXYXEH6M" title="See a video of XOWA Desktop in action">Video</a></li>
|
|
<li><a href="http://xowa.org/home/wiki/Help/Download_XOWA.html" title='Download the XOWA application'>Download XOWA</a></li>
|
|
<li><a href="http://xowa.org/home/wiki/Dashboard/Image_databases.html" title='Download offline wikis and image databases'>Download wikis</a></li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="portal" id='xowa-portal-started'>
|
|
<h3>Getting started</h3>
|
|
<div class="body">
|
|
<ul>
|
|
<li><a href="http://xowa.org/home/wiki/App/Setup/System_requirements.html" title='Get XOWA's system requirements'>Requirements</a></li>
|
|
<li><a href="http://xowa.org/home/wiki/App/Setup/Installation.html" title='Get instructions for installing XOWA'>Installation</a></li>
|
|
<li><a href="http://xowa.org/home/wiki/App/Import/Simple_Wikipedia.html" title='Learn how to set up Simple Wikipedia'>Simple Wikipedia</a></li>
|
|
<li><a href="http://xowa.org/home/wiki/App/Import/English_Wikipedia.html" title='Learn how to set up English Wikipedia'>English Wikipedia</a></li>
|
|
<li><a href="http://xowa.org/home/wiki/App/Import/Other_wikis.html" title='Learn how to set up other Wikipedias'>Other Wikipedias</a></li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="portal" id='xowa-portal-android'>
|
|
<h3>Android</h3>
|
|
<div class="body">
|
|
<ul>
|
|
<li><a href="http://xowa.org/home/wiki/Android/Setup.html" title='Setup XOWA on your Android device'>Setup</a></li>
|
|
<li><a href="https://www.youtube.com/watch?v=jsMTBxGweUw" title="See a video of XOWA Android in action">Video</a></li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="portal" id='xowa-portal-help'>
|
|
<h3>Help</h3>
|
|
<div class="body">
|
|
<ul>
|
|
<li><a href="http://xowa.org/home/wiki/Help/About.html" title='Get more information about XOWA'>About</a></li>
|
|
<li><a href="http://xowa.org/home/wiki/Help/Contents.html" title='View a list of help topics'>Contents</a></li>
|
|
<li><a href="http://xowa.org/home/wiki/Help/Media.html" title='Read what others have written about XOWA'>Media</a></li>
|
|
<li><a href="http://xowa.org/home/wiki/Help/Feedback.html" title='Questions? Comments? Leave feedback for XOWA'>Feedback</a></li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="portal" id='xowa-portal-blog'>
|
|
<h3>Blog</h3>
|
|
<div class="body">
|
|
<ul>
|
|
<li><a href="http://xowa.org/home/wiki/Blog.html" title='Follow XOWA''s development process'>Current</a></li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="portal" id='xowa-portal-links'>
|
|
<h3>Links</h3>
|
|
<div class="body">
|
|
<ul>
|
|
<li><a href="http://dumps.wikimedia.org/backup-index.html" title="Get wiki datababase dumps directly from Wikimedia">Wikimedia dumps</a></li>
|
|
<li><a href="https://archive.org/search.php?query=xowa" title="Search archive.org for XOWA files">XOWA @ archive.org</a></li>
|
|
<li><a href="http://en.wikipedia.org" title="Visit Wikipedia (and compare to XOWA!)">English Wikipedia</a></li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="portal" id='xowa-portal-donate'>
|
|
<h3>Donate</h3>
|
|
<div class="body">
|
|
<ul>
|
|
<li><a href="https://archive.org/donate/index.php" title="Support archive.org!">archive.org</a></li><!-- listed first due to recent fire damages: http://blog.archive.org/2013/11/06/scanning-center-fire-please-help-rebuild/ -->
|
|
<li><a href="https://donate.wikimedia.org/wiki/Special:FundraiserRedirector" title="Support Wikipedia!">Wikipedia</a></li>
|
|
<!-- <li><a href="" title="Support XOWA! (but only after you've supported archive.org and Wikipedia)">XOWA</a></li> -->
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
</div>
|
|
</body>
|
|
</html> |