/* XOWA: the XOWA Offline Wiki Application Copyright (C) 2012 gnosygnu@gmail.com This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ package gplx.langs.phps; import gplx.*; import gplx.langs.*; import gplx.core.btries.*; import gplx.core.log_msgs.*; interface Php_lxr { int Lxr_tid(); void Lxr_ini(Btrie_slim_mgr trie, Php_parser_interrupt[] parser_interrupts); void Lxr_bgn(byte[] src, int src_len, Php_tkn_wkr tkn_wkr, Php_tkn_factory tkn_factory); int Lxr_make(Php_ctx ctx, int bgn, int cur); } class Php_lxr_ { public static final byte Tid_declaration = 1, Tid_ws = 2, Tid_comment = 3, Tid_var = 4, Tid_sym = 5, Tid_keyword = 6, Tid_num = 7, Tid_quote = 8; } abstract class Php_lxr_base implements Php_lxr { protected byte[] src; protected int src_len; protected Php_tkn_wkr tkn_wkr; protected Php_tkn_factory tkn_factory; public abstract int Lxr_tid(); public abstract void Lxr_ini(Btrie_slim_mgr trie, Php_parser_interrupt[] parser_interrupts); public void Lxr_bgn(byte[] src, int src_len, Php_tkn_wkr tkn_wkr, Php_tkn_factory tkn_factory) {this.src = src; this.src_len = src_len; this.tkn_wkr = tkn_wkr; this.tkn_factory = tkn_factory;} public abstract int Lxr_make(Php_ctx ctx, int bgn, int cur); } class Php_lxr_declaration extends Php_lxr_base { @Override public int Lxr_tid() {return Php_lxr_.Tid_declaration;} @Override public void Lxr_ini(Btrie_slim_mgr trie, Php_parser_interrupt[] parser_interrupts) { trie.Add_obj(Bry_declaration, this); parser_interrupts[Byte_ascii.Lt] = Php_parser_interrupt.Char; } @Override public int Lxr_make(Php_ctx ctx, int bgn, int cur) { boolean loop = true; boolean ws_found = false; while (loop) { if (cur == src_len) break; byte b = src[cur]; switch (b) { case Byte_ascii.Nl: case Byte_ascii.Cr: ws_found = true; ++cur; break; default: if (ws_found) loop = false; else return Php_parser.NotFound; break; } } tkn_wkr.Process(tkn_factory.Declaration(bgn, cur)); return cur; } private static final byte[] Bry_declaration = Bry_.new_a7(" -1; i--) { // count preceding backslashes if (src[i] == Byte_ascii.Backslash) ++backslash_count; else break; } if (backslash_count % 2 == 1) { // odd backslashes; this means that ' is escaped; EX: \' and \\\'; note that even backslashes means not escaped; EX: \\' end_quote = false; cur = end + 1; } } if (end_quote) { cur = end + quote_bry.length; break; } } } tkn_wkr.Process(tkn_factory.Quote(bgn, cur, quote_tid)); return cur; } public static final Gfo_msg_itm Dangling_quote = Gfo_msg_itm_.new_warn_(Php_parser.Log_nde, "dangling_quote", "dangling_quote"); public static final byte[] Quote_bry_single = Bry_.new_a7("'"), Quote_bry_double = Bry_.new_a7("\""); } class Php_lxr_keyword extends Php_lxr_base { public Php_lxr_keyword(String hook_str, byte tkn_tid) {this.hook = Bry_.new_a7(hook_str); this.tkn_tid = tkn_tid;} private byte[] hook; byte tkn_tid; @Override public int Lxr_tid() {return Php_lxr_.Tid_keyword;} @Override public void Lxr_ini(Btrie_slim_mgr trie, Php_parser_interrupt[] parser_interrupts) {trie.Add_obj(hook, this);} @Override public int Lxr_make(Php_ctx ctx, int bgn, int cur) { if (cur < src_len) { byte next_byte = src[cur]; switch (next_byte) { // valid characters for end of word; EX: 'null '; 'null='; etc.. case Byte_ascii.Space: case Byte_ascii.Tab: case Byte_ascii.Nl: case Byte_ascii.Cr: case Byte_ascii.Hash: case Byte_ascii.Slash: case Byte_ascii.Quote: case Byte_ascii.Apos: case Byte_ascii.Bang: case Byte_ascii.Dollar: case Byte_ascii.Percent: case Byte_ascii.Amp: case Byte_ascii.Paren_bgn: case Byte_ascii.Paren_end: case Byte_ascii.Star: case Byte_ascii.Plus: case Byte_ascii.Comma: case Byte_ascii.Dash: case Byte_ascii.Dot: case Byte_ascii.Semic: case Byte_ascii.Lt: case Byte_ascii.Eq: case Byte_ascii.Gt: case Byte_ascii.Question: case Byte_ascii.At: case Byte_ascii.Brack_bgn: case Byte_ascii.Backslash: case Byte_ascii.Brack_end: case Byte_ascii.Pow: case Byte_ascii.Tick: case Byte_ascii.Curly_bgn: case Byte_ascii.Pipe: case Byte_ascii.Curly_end: case Byte_ascii.Tilde: break; default: // num,ltr or extended utf8 character sequence; treat keyword as false match; EX: 'nulla'; 'null0' return Php_parser.NotFound; } } tkn_wkr.Process(tkn_factory.Generic(bgn, cur, tkn_tid)); return cur; } } class Php_lxr_num extends Php_lxr_base { @Override public int Lxr_tid() {return Php_lxr_.Tid_keyword;} @Override public void Lxr_ini(Btrie_slim_mgr trie, Php_parser_interrupt[] parser_interrupts) { for (int i = 0; i < 10; i++) trie.Add_obj(new byte[] {(byte)(i + Byte_ascii.Num_0)}, this); } @Override public int Lxr_make(Php_ctx ctx, int bgn, int cur) { boolean loop = true; while (loop) { if (cur == src_len) break; byte b = src[cur]; switch (b) { case Byte_ascii.Num_0: case Byte_ascii.Num_1: case Byte_ascii.Num_2: case Byte_ascii.Num_3: case Byte_ascii.Num_4: case Byte_ascii.Num_5: case Byte_ascii.Num_6: case Byte_ascii.Num_7: case Byte_ascii.Num_8: case Byte_ascii.Num_9: ++cur; break; default: loop = false; break; } } tkn_wkr.Process(tkn_factory.Num(bgn, cur)); return cur; } }