mirror of
https://github.com/gnosygnu/xowa.git
synced 2024-10-27 20:34:16 +00:00
Mass_parse: Clear all mem when ns changes else outofmemory error
This commit is contained in:
parent
bac00076b5
commit
6ee274efd6
@ -111,7 +111,7 @@ public class Xomp_parse_wkr implements Gfo_invk {
|
|||||||
// if ns changed and prv_ns is main
|
// if ns changed and prv_ns is main
|
||||||
if (cur_ns != prv_ns) {
|
if (cur_ns != prv_ns) {
|
||||||
if (prv_ns == gplx.xowa.wikis.nss.Xow_ns_.Tid__main)
|
if (prv_ns == gplx.xowa.wikis.nss.Xow_ns_.Tid__main)
|
||||||
wiki.Cache_mgr().Free_mem__wbase(); // NOTE: clears page and wbase cache only; needed else OutOfMemory error for en.w in 25th hour; DATE:2017-01-07
|
wiki.Cache_mgr().Free_mem__all(); // NOTE: clears page and wbase cache only; needed else OutOfMemory error for en.w in 25th hour; DATE:2017-01-07
|
||||||
prv_ns = cur_ns;
|
prv_ns = cur_ns;
|
||||||
}
|
}
|
||||||
Xoae_page wpg = Xoae_page.New(wiki, ttl);
|
Xoae_page wpg = Xoae_page.New(wiki, ttl);
|
||||||
|
@ -230,12 +230,12 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls
|
|||||||
case Byte_ascii.Angle_bgn: found = Found__angle; break;
|
case Byte_ascii.Angle_bgn: found = Found__angle; break;
|
||||||
case Byte_ascii.Nl: found = in_heading ? Found__line_end : Found__line_bgn; break;
|
case Byte_ascii.Nl: found = in_heading ? Found__line_end : Found__line_bgn; break;
|
||||||
|
|
||||||
// PORT:"elseif ( $curChar == $currentClosing )"
|
// PORTED: "elseif ( $curChar == $currentClosing )"
|
||||||
case Byte_ascii.Curly_end: found = Found__close; break;
|
case Byte_ascii.Curly_end: found = Found__close; break;
|
||||||
case Byte_ascii.Brack_end: found = Found__close; break;
|
case Byte_ascii.Brack_end: found = Found__close; break;
|
||||||
case Byte_ascii.Bang: found = Found__close; break;
|
case Byte_ascii.Bang: found = Found__close; break;
|
||||||
|
|
||||||
// PORT:"elseif ( isset( $this->rules[$curChar] ) )"
|
// PORTED: "elseif ( isset( $this->rules[$curChar] ) )"
|
||||||
case Byte_ascii.Curly_bgn: {found = Found__open; rule = rule_curly; break;}
|
case Byte_ascii.Curly_bgn: {found = Found__open; rule = rule_curly; break;}
|
||||||
case Byte_ascii.Brack_bgn: {found = Found__open; rule = rule_brack; break;}
|
case Byte_ascii.Brack_bgn: {found = Found__open; rule = rule_brack; break;}
|
||||||
case Byte_ascii.Dash: {found = Found__open; rule = rule_langv; break;}
|
case Byte_ascii.Dash: {found = Found__open; rule = rule_langv; break;}
|
||||||
@ -257,7 +257,7 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Determine element name
|
// Determine element name
|
||||||
// PORT: $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA"; EX: "(span|div)(?:\s|\/>|>)|(!--)
|
// PORTED: $elementsRegex = "~($xmlishRegex)(?:\s|\/>|>)|(!--)~iA"; EX: "(span|div)(?:\s|\/>|>)|(!--)
|
||||||
Xomw_prepro_elem element = (Xomw_prepro_elem)elements_trie.Match_at(trv, src, i + 1, src_len);
|
Xomw_prepro_elem element = (Xomw_prepro_elem)elements_trie.Match_at(trv, src, i + 1, src_len);
|
||||||
if (element == null) {
|
if (element == null) {
|
||||||
// Element name missing or not listed
|
// Element name missing or not listed
|
||||||
|
234
400_xowa/src/gplx/xowa/parsers/mws/tblws/Xomw_tblw_wkr.java
Normal file
234
400_xowa/src/gplx/xowa/parsers/mws/tblws/Xomw_tblw_wkr.java
Normal file
@ -0,0 +1,234 @@
|
|||||||
|
/*
|
||||||
|
XOWA: the XOWA Offline Wiki Application
|
||||||
|
Copyright (C) 2012 gnosygnu@gmail.com
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Affero General Public License as
|
||||||
|
published by the Free Software Foundation, either version 3 of the
|
||||||
|
License, or (at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Affero General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
package gplx.xowa.parsers.mws.tblws; import gplx.*; import gplx.xowa.*; import gplx.xowa.parsers.*; import gplx.xowa.parsers.mws.*;
|
||||||
|
public class Xomw_tblw_wkr {
|
||||||
|
private final Bry_bfr bfr = Bry_bfr_.New(), tmp_bfr = Bry_bfr_.New();
|
||||||
|
private final List_adp
|
||||||
|
td_history = List_adp_.New()
|
||||||
|
, last_tag_history = List_adp_.New()
|
||||||
|
, tr_history = List_adp_.New()
|
||||||
|
, tr_attributes = List_adp_.New()
|
||||||
|
, has_opened_tr = List_adp_.New()
|
||||||
|
;
|
||||||
|
private static final byte[]
|
||||||
|
Bry__tblw_end = Bry_.new_a7("|}"), Bry__tr = Bry_.new_a7("|-"), Bry__th = Bry_.new_a7("|+")
|
||||||
|
, Bry__special_case = Bry_.new_a7("<table>\n<tr><td></td></tr>\n</table>")
|
||||||
|
;
|
||||||
|
private static final int Len__special_case = Bry__special_case.length;
|
||||||
|
public byte[] Do_table_stuff(byte[] src) {
|
||||||
|
int src_len = src.length;
|
||||||
|
byte[][] lines = Bry_split_.Split(src, 0, src_len, Byte_ascii.Nl_bry); // PORTED: $lines = StringUtils::explode( "\n", $text );
|
||||||
|
int lines_len = lines.length;
|
||||||
|
|
||||||
|
// PORTED:member variables
|
||||||
|
// $td_history = []; // Is currently a td tag open?
|
||||||
|
// $last_tag_history = []; // Save history of last lag activated (td, th or caption)
|
||||||
|
// $tr_history = []; // Is currently a tr tag open?
|
||||||
|
// $tr_attributes = []; // history of tr attributes
|
||||||
|
// $has_opened_tr = []; // Did this table open a <tr> element?
|
||||||
|
|
||||||
|
int indent_level = 0; // indent level of the table
|
||||||
|
byte[] chars_2 = new byte[2];
|
||||||
|
for (int i = 0; i < lines_len; i++) {
|
||||||
|
byte[] line_orig = lines[i];
|
||||||
|
byte[] line = Bry_.Trim(line_orig);
|
||||||
|
|
||||||
|
int line_len = line.length;
|
||||||
|
if (line_len == 0) { // empty line, go to next line
|
||||||
|
bfr.Add(line_orig).Add_byte_nl();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
byte char_0 = line[0];
|
||||||
|
chars_2[0] = line[0];
|
||||||
|
if (line_len > 1) chars_2[1] = line[1];
|
||||||
|
|
||||||
|
boolean is_indented_table = false;
|
||||||
|
// ":*" , "\s*" , "{|" , ".*"
|
||||||
|
if (is_indented_table) {
|
||||||
|
// if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) {
|
||||||
|
// // First check if we are starting a new table
|
||||||
|
// $indent_level = strlen( $matches[1] );
|
||||||
|
//
|
||||||
|
// $attributes = $this->mStripState->unstripBoth( $matches[2] );
|
||||||
|
// $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
|
||||||
|
//
|
||||||
|
// line_orig = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
|
||||||
|
// array_push( $td_history, false );
|
||||||
|
// array_push( $last_tag_history, '' );
|
||||||
|
// array_push( $tr_history, false );
|
||||||
|
// array_push( $tr_attributes, '' );
|
||||||
|
// array_push( $has_opened_tr, false );
|
||||||
|
}
|
||||||
|
else if (td_history.Len() == 0) {
|
||||||
|
// Don't do any of the following
|
||||||
|
bfr.Add(line_orig).Add_byte_nl();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else if (Bry_.Eq(chars_2, Bry__tblw_end)) {
|
||||||
|
// We are ending a table
|
||||||
|
line = tmp_bfr.Add_str_a7("</table>").Add_mid(line, 2, line.length).To_bry_and_clear();
|
||||||
|
byte[] last_tag = (byte[])List_adp_.Pop(last_tag_history);
|
||||||
|
|
||||||
|
if (has_opened_tr.Len() == 0) {
|
||||||
|
line = tmp_bfr.Add_str_a7("<tr><td></td></tr>").Add(line).To_bry_and_clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tr_history.Len() > 0) {
|
||||||
|
List_adp_.Pop(tr_history);
|
||||||
|
line = tmp_bfr.Add_str_a7("</tr>").Add(line).To_bry_and_clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (td_history.Len() > 0) {
|
||||||
|
List_adp_.Pop(td_history);
|
||||||
|
line = tmp_bfr.Add_str_a7("</").Add(last_tag).Add_str_a7(">").Add(line).To_bry_and_clear();
|
||||||
|
}
|
||||||
|
List_adp_.Pop(tr_attributes);
|
||||||
|
line_orig = tmp_bfr.Add(line).Add(Bry_.Repeat_bry(Bry_.new_a7("</dd></dl>"), indent_level)).To_bry_and_clear();
|
||||||
|
}
|
||||||
|
else if (Bry_.Eq(chars_2, Bry__tr)) {
|
||||||
|
// // Now we have a table row
|
||||||
|
// $line = preg_replace( '#^\|-+#', '', $line );
|
||||||
|
//
|
||||||
|
// // Whats after the tag is now only attributes
|
||||||
|
// $attributes = $this->mStripState->unstripBoth( $line );
|
||||||
|
// $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
|
||||||
|
// array_pop( $tr_attributes );
|
||||||
|
// array_push( $tr_attributes, $attributes );
|
||||||
|
//
|
||||||
|
// $line = '';
|
||||||
|
// $last_tag = array_pop( $last_tag_history );
|
||||||
|
// array_pop( $has_opened_tr );
|
||||||
|
// array_push( $has_opened_tr, true );
|
||||||
|
//
|
||||||
|
// if ( array_pop( $tr_history ) ) {
|
||||||
|
// $line = '</tr>';
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// if ( array_pop( $td_history ) ) {
|
||||||
|
// $line = "</{$last_tag}>{$line}";
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// line_orig = $line;
|
||||||
|
// array_push( $tr_history, false );
|
||||||
|
// array_push( $td_history, false );
|
||||||
|
// array_push( $last_tag_history, '' );
|
||||||
|
}
|
||||||
|
else if ( char_0 == Byte_ascii.Pipe
|
||||||
|
|| char_0 == Byte_ascii.Bang
|
||||||
|
|| Bry_.Eq(chars_2, Bry__th)
|
||||||
|
) {
|
||||||
|
// This might be cell elements, td, th or captions
|
||||||
|
// if ( $first_two === '|+' ) {
|
||||||
|
// $first_character = '+';
|
||||||
|
// $line = substr( $line, 2 );
|
||||||
|
// } else {
|
||||||
|
// $line = substr( $line, 1 );
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// // Implies both are valid for table headings.
|
||||||
|
// if ( $first_character === '!' ) {
|
||||||
|
// $line = StringUtils::replaceMarkup( '!!', '||', $line );
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// // Split up multiple cells on the same line.
|
||||||
|
// // FIXME : This can result in improper nesting of tags processed
|
||||||
|
// // by earlier parser steps.
|
||||||
|
// $cells = explode( '||', $line );
|
||||||
|
//
|
||||||
|
// line_orig = '';
|
||||||
|
//
|
||||||
|
// // Loop through each table cell
|
||||||
|
// foreach ( $cells as $cell ) {
|
||||||
|
// $previous = '';
|
||||||
|
// if ( $first_character !== '+' ) {
|
||||||
|
// $tr_after = array_pop( $tr_attributes );
|
||||||
|
// if ( !array_pop( $tr_history ) ) {
|
||||||
|
// $previous = "<tr{$tr_after}>\n";
|
||||||
|
// }
|
||||||
|
// array_push( $tr_history, true );
|
||||||
|
// array_push( $tr_attributes, '' );
|
||||||
|
// array_pop( $has_opened_tr );
|
||||||
|
// array_push( $has_opened_tr, true );
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// $last_tag = array_pop( $last_tag_history );
|
||||||
|
//
|
||||||
|
// if ( array_pop( $td_history ) ) {
|
||||||
|
// $previous = "</{$last_tag}>\n{$previous}";
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// if ( $first_character === '|' ) {
|
||||||
|
// $last_tag = 'td';
|
||||||
|
// } elseif ( $first_character === '!' ) {
|
||||||
|
// $last_tag = 'th';
|
||||||
|
// } elseif ( $first_character === '+' ) {
|
||||||
|
// $last_tag = 'caption';
|
||||||
|
// } else {
|
||||||
|
// $last_tag = '';
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// array_push( $last_tag_history, $last_tag );
|
||||||
|
//
|
||||||
|
// // A cell could contain both parameters and data
|
||||||
|
// $cell_data = explode( '|', $cell, 2 );
|
||||||
|
//
|
||||||
|
// // Bug 553: Note that a '|' inside an invalid link should not
|
||||||
|
// // be mistaken as delimiting cell parameters
|
||||||
|
// if ( strpos( $cell_data[0], '[[' ) !== false ) {
|
||||||
|
// $cell = "{$previous}<{$last_tag}>{$cell}";
|
||||||
|
// } elseif ( count( $cell_data ) == 1 ) {
|
||||||
|
// $cell = "{$previous}<{$last_tag}>{$cell_data[0]}";
|
||||||
|
// } else {
|
||||||
|
// $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
|
||||||
|
// $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
|
||||||
|
// $cell = "{$previous}<{$last_tag}{$attributes}>{$cell_data[1]}";
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// line_orig .= $cell;
|
||||||
|
// array_push( $td_history, true );
|
||||||
|
// }
|
||||||
|
}
|
||||||
|
bfr.Add(line_orig).Add_byte_nl();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Closing open td, tr && table
|
||||||
|
while (td_history.Len() > 0) {
|
||||||
|
if (tr_history.Len() > 0) {
|
||||||
|
List_adp_.Del_at_last(tr_history);
|
||||||
|
bfr.Add_str_a7("</td>\n");
|
||||||
|
}
|
||||||
|
if (has_opened_tr.Len() == 0) {
|
||||||
|
bfr.Add_str_a7("<tr><td></td></tr>\n");
|
||||||
|
}
|
||||||
|
bfr.Add_str_a7("</table>\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove trailing line-ending (b/c)
|
||||||
|
if (bfr.Get_at_last_or_nil_if_empty() == Byte_ascii.Nl) {
|
||||||
|
bfr.Del_by_1();
|
||||||
|
}
|
||||||
|
|
||||||
|
// special case: don't return empty table
|
||||||
|
if ( bfr.Len() == Len__special_case
|
||||||
|
&& Bry_.Eq(bfr.Bfr(), 0, Len__special_case, Bry__special_case)) {
|
||||||
|
return Bry_.Empty;
|
||||||
|
}
|
||||||
|
return bfr.To_bry_and_clear();
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user