1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00
Files
gnosygnu_xowa/400_xowa/src/gplx/langs/phps/Php_lxr.java
2015-11-01 20:50:05 -05:00

283 lines
13 KiB
Java

/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012 gnosygnu@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps; import gplx.*; import gplx.langs.*;
import gplx.core.btries.*; import gplx.core.log_msgs.*;
interface Php_lxr {
int Lxr_tid();
void Lxr_ini(Btrie_slim_mgr trie, Php_parser_interrupt[] parser_interrupts);
void Lxr_bgn(byte[] src, int src_len, Php_tkn_wkr tkn_wkr, Php_tkn_factory tkn_factory);
int Lxr_make(Php_ctx ctx, int bgn, int cur);
}
class Php_lxr_ {
public static final byte Tid_declaration = 1, Tid_ws = 2, Tid_comment = 3, Tid_var = 4, Tid_sym = 5, Tid_keyword = 6, Tid_num = 7, Tid_quote = 8;
}
abstract class Php_lxr_base implements Php_lxr {
protected byte[] src; protected int src_len; protected Php_tkn_wkr tkn_wkr; protected Php_tkn_factory tkn_factory;
public abstract int Lxr_tid();
public abstract void Lxr_ini(Btrie_slim_mgr trie, Php_parser_interrupt[] parser_interrupts);
public void Lxr_bgn(byte[] src, int src_len, Php_tkn_wkr tkn_wkr, Php_tkn_factory tkn_factory) {this.src = src; this.src_len = src_len; this.tkn_wkr = tkn_wkr; this.tkn_factory = tkn_factory;}
public abstract int Lxr_make(Php_ctx ctx, int bgn, int cur);
}
class Php_lxr_declaration extends Php_lxr_base {
@Override public int Lxr_tid() {return Php_lxr_.Tid_declaration;}
@Override public void Lxr_ini(Btrie_slim_mgr trie, Php_parser_interrupt[] parser_interrupts) {
trie.Add_obj(Bry_declaration, this);
parser_interrupts[Byte_ascii.Lt] = Php_parser_interrupt.Char;
}
@Override public int Lxr_make(Php_ctx ctx, int bgn, int cur) {
boolean loop = true;
boolean ws_found = false;
while (loop) {
if (cur == src_len) break;
byte b = src[cur];
switch (b) {
case Byte_ascii.Nl: case Byte_ascii.Cr:
ws_found = true;
++cur;
break;
default:
if (ws_found) loop = false;
else return Php_parser.NotFound;
break;
}
}
tkn_wkr.Process(tkn_factory.Declaration(bgn, cur));
return cur;
}
private static final byte[] Bry_declaration = Bry_.new_a7("<?php");
}
class Php_lxr_ws extends Php_lxr_base {
public Php_lxr_ws(byte ws_tid) {
this.ws_tid = ws_tid;
switch (ws_tid) {
case Php_tkn_ws.Tid_space: ws_bry = Bry_ws_space; break;
case Php_tkn_ws.Tid_nl: ws_bry = Bry_ws_nl; break;
case Php_tkn_ws.Tid_tab: ws_bry = Bry_ws_tab; break;
case Php_tkn_ws.Tid_cr: ws_bry = Bry_ws_cr; break;
}
}
public byte Ws_tid() {return ws_tid;} private byte ws_tid;
public byte[] Ws_bry() {return ws_bry;} private byte[] ws_bry;
@Override public int Lxr_tid() {return Php_lxr_.Tid_ws;}
@Override public void Lxr_ini(Btrie_slim_mgr trie, Php_parser_interrupt[] parser_interrupts) {
trie.Add_obj(ws_bry, this);
parser_interrupts[ws_bry[0]] = Php_parser_interrupt.Char;
}
@Override public int Lxr_make(Php_ctx ctx, int bgn, int cur) {
boolean loop = true;
while (loop) {
if (cur == src_len) break;
byte b = src[cur];
switch (b) {
case Byte_ascii.Space: case Byte_ascii.Nl: case Byte_ascii.Tab: case Byte_ascii.Cr:
++cur;
break;
default:
loop = false;
break;
}
}
tkn_wkr.Process(tkn_factory.Ws(bgn, cur, ws_tid));
return cur;
}
public static final byte[] Bry_ws_space = Bry_.new_a7(" "), Bry_ws_nl = Bry_.new_a7("\n"), Bry_ws_tab = Bry_.new_a7("\t"), Bry_ws_cr = Bry_.new_a7("\r");
}
class Php_lxr_comment extends Php_lxr_base {
public Php_lxr_comment(byte comment_tid) {
this.comment_tid = comment_tid;
switch (comment_tid) {
case Php_tkn_comment.Tid_mult: comment_bgn = Bry_bgn_mult; comment_end = Bry_end_mult; break;
case Php_tkn_comment.Tid_slash: comment_bgn = Bry_bgn_slash; comment_end = Bry_end_nl; break;
case Php_tkn_comment.Tid_hash: comment_bgn = Bry_bgn_hash; comment_end = Bry_end_nl; break;
}
}
@Override public int Lxr_tid() {return Php_lxr_.Tid_comment;}
@Override public void Lxr_ini(Btrie_slim_mgr trie, Php_parser_interrupt[] parser_interrupts) {
trie.Add_obj(comment_bgn, this);
parser_interrupts[Byte_ascii.Slash] = Php_parser_interrupt.Char;
parser_interrupts[Byte_ascii.Hash] = Php_parser_interrupt.Char;
}
public byte Comment_tid() {return comment_tid;} private byte comment_tid;
public byte[] Comment_bgn() {return comment_bgn;} private byte[] comment_bgn;
public byte[] Comment_end() {return comment_end;} private byte[] comment_end;
@Override public int Lxr_make(Php_ctx ctx, int bgn, int cur) {
int end = Bry_find_.Find_fwd(src, comment_end, bgn);
if (end == Bry_find_.Not_found) {
tkn_wkr.Msg_many(src, bgn, cur, Php_lxr_comment.Dangling_comment, comment_tid, comment_end);
cur = src_len; // NOTE: terminating sequence not found; assume rest of src is comment
}
else
cur = end + comment_end.length;
tkn_wkr.Process(tkn_factory.Comment(bgn, cur, comment_tid));
return cur;
}
public static final Gfo_msg_itm Dangling_comment = Gfo_msg_itm_.new_warn_(Php_parser.Log_nde, "dangling_comment", "dangling_comment");
public static final byte[] Bry_bgn_mult = Bry_.new_a7("/*"), Bry_bgn_slash = Bry_.new_a7("//"), Bry_bgn_hash = Bry_.new_a7("#")
, Bry_end_mult = Bry_.new_a7("*/"), Bry_end_nl = Bry_.new_a7("\n");
}
class Php_lxr_var extends Php_lxr_base {
@Override public int Lxr_tid() {return Php_lxr_.Tid_var;}
@Override public void Lxr_ini(Btrie_slim_mgr trie, Php_parser_interrupt[] parser_interrupts) {
trie.Add_obj(Bry_var, this);
parser_interrupts[Byte_ascii.Dollar] = Php_parser_interrupt.Char;
}
@Override public int Lxr_make(Php_ctx ctx, int bgn, int cur) {
boolean loop = true;
while (loop) {
if (cur == src_len) break;
byte b = src[cur];
switch (b) {
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
case Byte_ascii.Ltr_A: case Byte_ascii.Ltr_B: case Byte_ascii.Ltr_C: case Byte_ascii.Ltr_D: case Byte_ascii.Ltr_E:
case Byte_ascii.Ltr_F: case Byte_ascii.Ltr_G: case Byte_ascii.Ltr_H: case Byte_ascii.Ltr_I: case Byte_ascii.Ltr_J:
case Byte_ascii.Ltr_K: case Byte_ascii.Ltr_L: case Byte_ascii.Ltr_M: case Byte_ascii.Ltr_N: case Byte_ascii.Ltr_O:
case Byte_ascii.Ltr_P: case Byte_ascii.Ltr_Q: case Byte_ascii.Ltr_R: case Byte_ascii.Ltr_S: case Byte_ascii.Ltr_T:
case Byte_ascii.Ltr_U: case Byte_ascii.Ltr_V: case Byte_ascii.Ltr_W: case Byte_ascii.Ltr_X: case Byte_ascii.Ltr_Y: case Byte_ascii.Ltr_Z:
case Byte_ascii.Ltr_a: case Byte_ascii.Ltr_b: case Byte_ascii.Ltr_c: case Byte_ascii.Ltr_d: case Byte_ascii.Ltr_e:
case Byte_ascii.Ltr_f: case Byte_ascii.Ltr_g: case Byte_ascii.Ltr_h: case Byte_ascii.Ltr_i: case Byte_ascii.Ltr_j:
case Byte_ascii.Ltr_k: case Byte_ascii.Ltr_l: case Byte_ascii.Ltr_m: case Byte_ascii.Ltr_n: case Byte_ascii.Ltr_o:
case Byte_ascii.Ltr_p: case Byte_ascii.Ltr_q: case Byte_ascii.Ltr_r: case Byte_ascii.Ltr_s: case Byte_ascii.Ltr_t:
case Byte_ascii.Ltr_u: case Byte_ascii.Ltr_v: case Byte_ascii.Ltr_w: case Byte_ascii.Ltr_x: case Byte_ascii.Ltr_y: case Byte_ascii.Ltr_z:
case Byte_ascii.Underline:
++cur;
break;
default:
loop = false;
break;
}
}
tkn_wkr.Process(tkn_factory.Var(bgn, cur));
return cur;
}
private static final byte[] Bry_var = Bry_.new_a7("$");
}
class Php_lxr_sym extends Php_lxr_base {
public Php_lxr_sym(String hook_str, byte tkn_tid) {this.hook = Bry_.new_a7(hook_str); this.tkn_tid = tkn_tid;} private byte[] hook; byte tkn_tid;
@Override public int Lxr_tid() {return Php_lxr_.Tid_sym;}
@Override public void Lxr_ini(Btrie_slim_mgr trie, Php_parser_interrupt[] parser_interrupts) {
trie.Add_obj(hook, this);
parser_interrupts[hook[0]] = Php_parser_interrupt.Char;
}
@Override public int Lxr_make(Php_ctx ctx, int bgn, int cur) {
tkn_wkr.Process(tkn_factory.Generic(bgn, cur, tkn_tid));
return cur;
}
}
class Php_lxr_quote extends Php_lxr_base {
public Php_lxr_quote(byte quote_tid) {
this.quote_tid = quote_tid;
switch (quote_tid) {
case Byte_ascii.Apos: quote_bry = Quote_bry_single; break;
case Byte_ascii.Quote: quote_bry = Quote_bry_double; break;
}
}
@Override public int Lxr_tid() {return Php_lxr_.Tid_quote;}
@Override public void Lxr_ini(Btrie_slim_mgr trie, Php_parser_interrupt[] parser_interrupts) {
trie.Add_obj(quote_bry, this);
parser_interrupts[quote_tid] = Php_parser_interrupt.Char;
}
public byte Quote_tid() {return quote_tid;} private byte quote_tid;
public byte[] Quote_bry() {return quote_bry;} private byte[] quote_bry;
@Override public int Lxr_make(Php_ctx ctx, int bgn, int cur) {
int end = -1;
while (true) {
end = Bry_find_.Find_fwd(src, quote_bry, cur);
if (end == Bry_find_.Not_found) {
tkn_wkr.Msg_many(src, bgn, cur, Php_lxr_quote.Dangling_quote, quote_tid, quote_bry);
cur = src_len; // NOTE: terminating sequence not found; assume rest of src is comment
break;
}
else {
boolean end_quote = true;
if (src[end - 1] == Byte_ascii.Backslash) { // \' encountered;
int backslash_count = 1;
for (int i = end - 2; i > -1; i--) { // count preceding backslashes
if (src[i] == Byte_ascii.Backslash)
++backslash_count;
else
break;
}
if (backslash_count % 2 == 1) { // odd backslashes; this means that ' is escaped; EX: \' and \\\'; note that even backslashes means not escaped; EX: \\'
end_quote = false;
cur = end + 1;
}
}
if (end_quote) {
cur = end + quote_bry.length;
break;
}
}
}
tkn_wkr.Process(tkn_factory.Quote(bgn, cur, quote_tid));
return cur;
}
public static final Gfo_msg_itm Dangling_quote = Gfo_msg_itm_.new_warn_(Php_parser.Log_nde, "dangling_quote", "dangling_quote");
public static final byte[] Quote_bry_single = Bry_.new_a7("'"), Quote_bry_double = Bry_.new_a7("\"");
}
class Php_lxr_keyword extends Php_lxr_base {
public Php_lxr_keyword(String hook_str, byte tkn_tid) {this.hook = Bry_.new_a7(hook_str); this.tkn_tid = tkn_tid;} private byte[] hook; byte tkn_tid;
@Override public int Lxr_tid() {return Php_lxr_.Tid_keyword;}
@Override public void Lxr_ini(Btrie_slim_mgr trie, Php_parser_interrupt[] parser_interrupts) {trie.Add_obj(hook, this);}
@Override public int Lxr_make(Php_ctx ctx, int bgn, int cur) {
if (cur < src_len) {
byte next_byte = src[cur];
switch (next_byte) { // valid characters for end of word; EX: 'null '; 'null='; etc..
case Byte_ascii.Space: case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr:
case Byte_ascii.Hash: case Byte_ascii.Slash:
case Byte_ascii.Quote: case Byte_ascii.Apos:
case Byte_ascii.Bang: case Byte_ascii.Dollar: case Byte_ascii.Percent: case Byte_ascii.Amp:
case Byte_ascii.Paren_bgn: case Byte_ascii.Paren_end: case Byte_ascii.Star: case Byte_ascii.Plus:
case Byte_ascii.Comma: case Byte_ascii.Dash: case Byte_ascii.Dot: case Byte_ascii.Semic:
case Byte_ascii.Lt: case Byte_ascii.Eq: case Byte_ascii.Gt: case Byte_ascii.Question: case Byte_ascii.At:
case Byte_ascii.Brack_bgn: case Byte_ascii.Backslash: case Byte_ascii.Brack_end: case Byte_ascii.Pow: case Byte_ascii.Tick:
case Byte_ascii.Curly_bgn: case Byte_ascii.Pipe: case Byte_ascii.Curly_end: case Byte_ascii.Tilde:
break;
default: // num,ltr or extended utf8 character sequence; treat keyword as false match; EX: 'nulla'; 'null0'
return Php_parser.NotFound;
}
}
tkn_wkr.Process(tkn_factory.Generic(bgn, cur, tkn_tid));
return cur;
}
}
class Php_lxr_num extends Php_lxr_base {
@Override public int Lxr_tid() {return Php_lxr_.Tid_keyword;}
@Override public void Lxr_ini(Btrie_slim_mgr trie, Php_parser_interrupt[] parser_interrupts) {
for (int i = 0; i < 10; i++)
trie.Add_obj(new byte[] {(byte)(i + Byte_ascii.Num_0)}, this);
}
@Override public int Lxr_make(Php_ctx ctx, int bgn, int cur) {
boolean loop = true;
while (loop) {
if (cur == src_len) break;
byte b = src[cur];
switch (b) {
case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4:
case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9:
++cur;
break;
default:
loop = false;
break;
}
}
tkn_wkr.Process(tkn_factory.Num(bgn, cur));
return cur;
}
}