mirror of
https://github.com/gnosygnu/xowa.git
synced 2025-06-02 07:24:19 +00:00
Mw_parse: Support bracket, template, and tplarg
This commit is contained in:
parent
08eb098422
commit
b989ff63df
@ -235,6 +235,16 @@ public class Bry_find_ {
|
|||||||
cur++;
|
cur++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
public static int Find_fwd_while(byte[] src, int cur, int end, byte[] while_bry) {
|
||||||
|
int while_len = while_bry.length;
|
||||||
|
while (true) {
|
||||||
|
if (cur == end) return cur;
|
||||||
|
for (int i = 0; i < while_len; i++) {
|
||||||
|
if (while_bry[i] != src[i + cur]) return cur;
|
||||||
|
}
|
||||||
|
cur += while_len;
|
||||||
|
}
|
||||||
|
}
|
||||||
public static int Find_fwd_until(byte[] src, int cur, int end, byte until_byte) {
|
public static int Find_fwd_until(byte[] src, int cur, int end, byte until_byte) {
|
||||||
while (true) {
|
while (true) {
|
||||||
if ( cur == end
|
if ( cur == end
|
||||||
|
@ -37,7 +37,8 @@ class Xomw_prepro_stack {
|
|||||||
public Xomw_prepro_part Get_current_part() {
|
public Xomw_prepro_part Get_current_part() {
|
||||||
if (top == null) {
|
if (top == null) {
|
||||||
return null;
|
return null;
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
return top.Get_current_part();
|
return top.Get_current_part();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -45,7 +46,7 @@ class Xomw_prepro_stack {
|
|||||||
public void Push(Xomw_prepro_piece item) {
|
public void Push(Xomw_prepro_piece item) {
|
||||||
stack.Add(item);
|
stack.Add(item);
|
||||||
this.top = (Xomw_prepro_piece)stack.Get_at(stack.Len() - 1);
|
this.top = (Xomw_prepro_piece)stack.Get_at(stack.Len() - 1);
|
||||||
accum.Clear().Add(top.Get_accum());
|
accum = top.Get_accum();
|
||||||
}
|
}
|
||||||
|
|
||||||
public Xomw_prepro_piece Pop() {
|
public Xomw_prepro_piece Pop() {
|
||||||
@ -56,10 +57,11 @@ class Xomw_prepro_stack {
|
|||||||
|
|
||||||
Xomw_prepro_piece rv = (Xomw_prepro_piece)stack.Get_at(len - 1);
|
Xomw_prepro_piece rv = (Xomw_prepro_piece)stack.Get_at(len - 1);
|
||||||
stack.Del_at(len - 1);
|
stack.Del_at(len - 1);
|
||||||
|
len--;
|
||||||
|
|
||||||
if (len > 0) {
|
if (len > 0) {
|
||||||
this.top = (Xomw_prepro_piece)stack.Get_at(stack.Len() - 1);
|
this.top = (Xomw_prepro_piece)stack.Get_at(stack.Len() - 1);
|
||||||
accum.Clear().Add(top.Get_accum());
|
accum = top.Get_accum();
|
||||||
} else {
|
} else {
|
||||||
this.top = null;
|
this.top = null;
|
||||||
this.accum = root_accum;
|
this.accum = root_accum;
|
||||||
@ -69,7 +71,7 @@ class Xomw_prepro_stack {
|
|||||||
|
|
||||||
public void Add_part(byte[] bry) {
|
public void Add_part(byte[] bry) {
|
||||||
top.Add_part(bry);
|
top.Add_part(bry);
|
||||||
accum.Clear().Add(top.Get_accum());
|
accum = top.Get_accum();
|
||||||
}
|
}
|
||||||
|
|
||||||
public Xomw_prepro_flags Get_flags() {
|
public Xomw_prepro_flags Get_flags() {
|
||||||
@ -103,12 +105,13 @@ class Xomw_prepro_piece {
|
|||||||
this.count = count;
|
this.count = count;
|
||||||
this.start_pos = start_pos;
|
this.start_pos = start_pos;
|
||||||
this.line_start = line_start;
|
this.line_start = line_start;
|
||||||
|
parts.Add(new Xomw_prepro_part(Bry_.Empty));
|
||||||
}
|
}
|
||||||
public Xomw_prepro_part Get_current_part() {
|
public Xomw_prepro_part Get_current_part() {
|
||||||
return (Xomw_prepro_part)parts.Get_at(parts.Len() - 1);
|
return (Xomw_prepro_part)parts.Get_at(parts.Len() - 1);
|
||||||
}
|
}
|
||||||
public byte[] Get_accum() {
|
public Bry_bfr Get_accum() {
|
||||||
return Get_current_part().bry;
|
return Get_current_part().bfr;
|
||||||
}
|
}
|
||||||
public void Add_part(byte[] bry) {
|
public void Add_part(byte[] bry) {
|
||||||
parts.Add(new Xomw_prepro_part(bry));
|
parts.Add(new Xomw_prepro_part(bry));
|
||||||
@ -126,13 +129,13 @@ class Xomw_prepro_piece {
|
|||||||
public byte[] Break_syntax(Bry_bfr tmp_bfr, int opening_count) {
|
public byte[] Break_syntax(Bry_bfr tmp_bfr, int opening_count) {
|
||||||
byte[] rv = Bry_.Empty;
|
byte[] rv = Bry_.Empty;
|
||||||
if (Bry_.Eq(open, Byte_ascii.Nl_bry)) {
|
if (Bry_.Eq(open, Byte_ascii.Nl_bry)) {
|
||||||
rv = ((Xomw_prepro_part)parts.Get_at(0)).bry;
|
rv = ((Xomw_prepro_part)parts.Get_at(0)).bfr.To_bry();
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (opening_count == -1) {
|
if (opening_count == -1) {
|
||||||
opening_count = count;
|
opening_count = count;
|
||||||
}
|
}
|
||||||
rv = Bry_.Repeat_bry(open, opening_count);
|
tmp_bfr.Add(Bry_.Repeat_bry(open, opening_count));
|
||||||
|
|
||||||
// concat parts with "|"
|
// concat parts with "|"
|
||||||
boolean first = true;
|
boolean first = true;
|
||||||
@ -145,7 +148,7 @@ class Xomw_prepro_piece {
|
|||||||
else {
|
else {
|
||||||
tmp_bfr.Add_byte_pipe();
|
tmp_bfr.Add_byte_pipe();
|
||||||
}
|
}
|
||||||
tmp_bfr.Add(part.bry);
|
tmp_bfr.Add(part.bfr.To_bry());
|
||||||
}
|
}
|
||||||
rv = tmp_bfr.To_bry_and_clear();
|
rv = tmp_bfr.To_bry_and_clear();
|
||||||
}
|
}
|
||||||
@ -154,9 +157,9 @@ class Xomw_prepro_piece {
|
|||||||
}
|
}
|
||||||
class Xomw_prepro_part {
|
class Xomw_prepro_part {
|
||||||
public Xomw_prepro_part(byte[] bry) {
|
public Xomw_prepro_part(byte[] bry) {
|
||||||
this.bry = bry;
|
bfr.Add(bry);
|
||||||
}
|
}
|
||||||
public final byte[] bry;
|
public final Bry_bfr bfr = Bry_bfr_.New();
|
||||||
public int Eqpos = -1;
|
public int Eqpos = -1;
|
||||||
public int comment_end = -1;
|
public int comment_end = -1;
|
||||||
public int visual_end = -1;
|
public int visual_end = -1;
|
||||||
|
@ -21,7 +21,7 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls
|
|||||||
private final Bry_bfr tmp_bfr = Bry_bfr_.New();
|
private final Bry_bfr tmp_bfr = Bry_bfr_.New();
|
||||||
private final List_adp comments_list = List_adp_.New();
|
private final List_adp comments_list = List_adp_.New();
|
||||||
private final Hash_adp_bry xmlish_elems = Hash_adp_bry.ci_a7();
|
private final Hash_adp_bry xmlish_elems = Hash_adp_bry.ci_a7();
|
||||||
private final Hash_adp_bry xmlish_allow_missing_end_tag = Hash_adp_bry.cs();
|
private final Hash_adp_bry xmlish_allow_missing_end_tag = Hash_adp_bry.cs().Add_many_str("includeonly", "noinclude", "onlyinclude");
|
||||||
private final Hash_adp_bry no_more_closing_tag = Hash_adp_bry.cs();
|
private final Hash_adp_bry no_more_closing_tag = Hash_adp_bry.cs();
|
||||||
// private final Btrie_slim_mgr search_dflt_trie = Btrie_slim_mgr.cs().Add_many_int(0, "[", "{", "<", "\n"); // $searchBase = "[{<\n";
|
// private final Btrie_slim_mgr search_dflt_trie = Btrie_slim_mgr.cs().Add_many_int(0, "[", "{", "<", "\n"); // $searchBase = "[{<\n";
|
||||||
private final Xomw_prepro_stack stack = new Xomw_prepro_stack();
|
private final Xomw_prepro_stack stack = new Xomw_prepro_stack();
|
||||||
@ -47,7 +47,7 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls
|
|||||||
|
|
||||||
public byte[] Preprocess_to_xml(byte[] src, boolean for_inclusion) {
|
public byte[] Preprocess_to_xml(byte[] src, boolean for_inclusion) {
|
||||||
xmlish_elems.Clear(); // TODO.XO: parser->getStripList();
|
xmlish_elems.Clear(); // TODO.XO: parser->getStripList();
|
||||||
xmlish_allow_missing_end_tag.Add_many_str("includeonly", "noinclude", "onlyinclude");
|
// PERF: xmlish_allow_missing_end_tag.Add_many_str("includeonly", "noinclude", "onlyinclude")
|
||||||
boolean enable_only_include = false;
|
boolean enable_only_include = false;
|
||||||
|
|
||||||
Hash_adp_bry ignored_tags, ignored_elements;
|
Hash_adp_bry ignored_tags, ignored_elements;
|
||||||
@ -108,6 +108,7 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls
|
|||||||
Btrie_slim_mgr elements_trie = Btrie_slim_mgr.ci_a7();
|
Btrie_slim_mgr elements_trie = Btrie_slim_mgr.ci_a7();
|
||||||
Btrie_slim_mgr elements_end_trie = Btrie_slim_mgr.ci_a7();
|
Btrie_slim_mgr elements_end_trie = Btrie_slim_mgr.ci_a7();
|
||||||
|
|
||||||
|
byte[] cur_char = Bry_.Empty;
|
||||||
byte[] cur_closing = Bry_.Empty;
|
byte[] cur_closing = Bry_.Empty;
|
||||||
byte[] inner = null;
|
byte[] inner = null;
|
||||||
|
|
||||||
@ -126,7 +127,6 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls
|
|||||||
find_only_include = false;
|
find_only_include = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
byte[] cur_char = Bry_.Empty;
|
|
||||||
Xomw_prepro_rule rule = null;
|
Xomw_prepro_rule rule = null;
|
||||||
if (fake_line_start) {
|
if (fake_line_start) {
|
||||||
found = Found__line_bgn;
|
found = Found__line_bgn;
|
||||||
@ -208,25 +208,33 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
Xomw_prepro_curchar_itm cur_char_itm = (Xomw_prepro_curchar_itm)cur_char_trie.Match_at(trv, src, i, src_len);
|
boolean match = false;
|
||||||
cur_char = cur_char_itm.bry;
|
if (cur_closing != Bry_.Empty) {
|
||||||
switch (cur_char_itm.type) {
|
if (Bry_.Match(src, i, i + cur_closing.length, cur_closing)) {
|
||||||
case Byte_ascii.Pipe: found = Found__pipe; break;
|
match = true;
|
||||||
case Byte_ascii.Eq: found = Found__equals; break;
|
found = Found__close;
|
||||||
case Byte_ascii.Angle_bgn: found = Found__angle; break;
|
cur_char = cur_closing;
|
||||||
case Byte_ascii.Nl: found = in_heading ? Found__line_end : Found__line_bgn; break;
|
}
|
||||||
case Byte_ascii.Curly_bgn: {found = Found__open; rule = rule_curly; break;}
|
}
|
||||||
case Byte_ascii.Brack_bgn: {found = Found__open; rule = rule_brack; break;}
|
else {
|
||||||
case Byte_ascii.Dash: {found = Found__open; rule = rule_langv; break;}
|
Xomw_prepro_curchar_itm cur_char_itm = (Xomw_prepro_curchar_itm)cur_char_trie.Match_at(trv, src, i, src_len);
|
||||||
default:
|
if (cur_char_itm != null) {
|
||||||
if (Bry_.Eq(cur_char, cur_closing)) {
|
match = true;
|
||||||
found = Found__close;
|
cur_char = cur_char_itm.bry;
|
||||||
|
switch (cur_char_itm.type) {
|
||||||
|
case Byte_ascii.Pipe: found = Found__pipe; break;
|
||||||
|
case Byte_ascii.Eq: found = Found__equals; break;
|
||||||
|
case Byte_ascii.Angle_bgn: found = Found__angle; break;
|
||||||
|
case Byte_ascii.Nl: found = in_heading ? Found__line_end : Found__line_bgn; break;
|
||||||
|
case Byte_ascii.Curly_bgn: {found = Found__open; rule = rule_curly; break;}
|
||||||
|
case Byte_ascii.Brack_bgn: {found = Found__open; rule = rule_brack; break;}
|
||||||
|
case Byte_ascii.Dash: {found = Found__open; rule = rule_langv; break;}
|
||||||
}
|
}
|
||||||
else {
|
}
|
||||||
i++;
|
}
|
||||||
continue;
|
if (!match) {
|
||||||
}
|
i++;
|
||||||
break;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -528,7 +536,7 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls
|
|||||||
}
|
}
|
||||||
else if (found == Found__open) {
|
else if (found == Found__open) {
|
||||||
// count opening brace characters
|
// count opening brace characters
|
||||||
int count = Bry_find_.Find_fwd(src, cur_char, i, src_len);
|
int count = Bry_find_.Find_fwd_while(src, i, src_len, cur_char) - i;
|
||||||
|
|
||||||
// we need to add to stack only if opening brace count is enough for one of the rules
|
// we need to add to stack only if opening brace count is enough for one of the rules
|
||||||
if (count >= rule.min) {
|
if (count >= rule.min) {
|
||||||
@ -550,7 +558,7 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls
|
|||||||
else if (found == Found__close) {
|
else if (found == Found__close) {
|
||||||
Xomw_prepro_piece piece = stack.top;
|
Xomw_prepro_piece piece = stack.top;
|
||||||
// lets check if there are enough characters for closing brace
|
// lets check if there are enough characters for closing brace
|
||||||
int count = Bry_find_.Find_fwd(src, cur_char, i, src_len);
|
int count = Bry_find_.Find_fwd_while(src, i, src_len, cur_char) - i;
|
||||||
int max_count = piece.count;
|
int max_count = piece.count;
|
||||||
if (count > max_count) count = max_count;
|
if (count > max_count) count = max_count;
|
||||||
|
|
||||||
@ -582,13 +590,13 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls
|
|||||||
byte[] element = null;
|
byte[] element = null;
|
||||||
if (name_type == Xomw_prepro_rule.Name__null) {
|
if (name_type == Xomw_prepro_rule.Name__null) {
|
||||||
// No element, just literal text
|
// No element, just literal text
|
||||||
piece.Break_syntax(tmp_bfr, matching_count);
|
tmp_bfr.Add(piece.Break_syntax(tmp_bfr, matching_count));
|
||||||
element = tmp_bfr.Add(Bry_.Repeat_bry(rule.end, matching_count)).To_bry_and_clear();
|
element = tmp_bfr.Add(Bry_.Repeat_bry(rule.end, matching_count)).To_bry_and_clear();
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
// Create XML element; Note: $parts is already XML, does not need to be encoded further
|
// Create XML element; Note: $parts is already XML, does not need to be encoded further
|
||||||
List_adp parts = piece.parts;
|
List_adp parts = piece.parts;
|
||||||
byte[] title = ((Xomw_prepro_part)parts.Get_at(0)).bry;
|
byte[] title = ((Xomw_prepro_part)parts.Get_at(0)).bfr.To_bry_and_clear();
|
||||||
parts.Del_at(0);
|
parts.Del_at(0);
|
||||||
|
|
||||||
// The invocation is at the start of the line if lineStart is set in the stack, and all opening brackets are used up.
|
// The invocation is at the start of the line if lineStart is set in the stack, and all opening brackets are used up.
|
||||||
@ -609,12 +617,13 @@ public class Xomw_prepro_wkr { // TS.UNSAFE:caching for repeated calls
|
|||||||
for (int j = 0; j < parts_len; j++) {
|
for (int j = 0; j < parts_len; j++) {
|
||||||
Xomw_prepro_part part = (Xomw_prepro_part)parts.Get_at(j);
|
Xomw_prepro_part part = (Xomw_prepro_part)parts.Get_at(j);
|
||||||
if (part.Eqpos != -1) {
|
if (part.Eqpos != -1) {
|
||||||
byte[] arg_key = Bry_.Mid(part.bry, 0, part.Eqpos);
|
byte[] part_bry = part.bfr.To_bry();
|
||||||
byte[] arg_val = Bry_.Mid(part.bry, part.Eqpos + 1);
|
byte[] arg_key = Bry_.Mid(part_bry, 0, part.Eqpos);
|
||||||
|
byte[] arg_val = Bry_.Mid(part_bry, part.Eqpos + 1);
|
||||||
tmp_bfr.Add_str_a7("<part><name>").Add(arg_key).Add_str_a7("</name>=<value>").Add(arg_val).Add_str_a7("</value></part>");
|
tmp_bfr.Add_str_a7("<part><name>").Add(arg_key).Add_str_a7("</name>=<value>").Add(arg_val).Add_str_a7("</value></part>");
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
tmp_bfr.Add_str_a7("<part><name index=\"").Add_int_variable(arg_idx).Add_str_a7("\" /><value>{").Add(part.bry).Add_str_a7("}</value></part>");
|
tmp_bfr.Add_str_a7("<part><name index=\"").Add_int_variable(arg_idx).Add_str_a7("\" /><value>{").Add(part.bfr.To_bry()).Add_str_a7("}</value></part>");
|
||||||
arg_idx++;
|
arg_idx++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -22,9 +22,15 @@ public class Xomw_prepro_wkr__tst {
|
|||||||
@Test public void Text() {
|
@Test public void Text() {
|
||||||
fxt.Test__parse("abc", "<root>abc</root>");
|
fxt.Test__parse("abc", "<root>abc</root>");
|
||||||
}
|
}
|
||||||
// @Test public void Brack() {
|
@Test public void Brack() {
|
||||||
// fxt.Test__parse("a[[b]]c", "<root>abc</root>");
|
fxt.Test__parse("a[[b]]c", "<root>a[[b]]c</root>");
|
||||||
// }
|
}
|
||||||
|
@Test public void Template() {
|
||||||
|
fxt.Test__parse("a{{b}}c", "<root>a<template lineStart=\"1\"><title>b</title></template>c</root>");
|
||||||
|
}
|
||||||
|
@Test public void Tplarg() {
|
||||||
|
fxt.Test__parse("a{{{b}}}c", "<root>a<tplarg lineStart=\"1\"><title>b</title></tplarg>c</root>");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
class Xomw_prepro_wkr__fxt {
|
class Xomw_prepro_wkr__fxt {
|
||||||
private final Xomw_prepro_wkr wkr = new Xomw_prepro_wkr();
|
private final Xomw_prepro_wkr wkr = new Xomw_prepro_wkr();
|
||||||
|
Loading…
Reference in New Issue
Block a user