1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2024-09-28 14:30:51 +00:00

Graph: Add stripComments and removeTrailingCommas

This commit is contained in:
gnosygnu 2017-09-24 17:02:32 -04:00
parent e9381c9b38
commit 209601744e
3 changed files with 175 additions and 1 deletions

View File

@ -25,8 +25,14 @@ public class Graph_xnde implements Xox_xnde {
ctx.Para().Process_block__xnde(xnde.Tag(), Xop_xnde_tag.Block_end);
}
public void Xtn_write(Bry_bfr bfr, Xoae_app app, Xop_ctx ctx, Xoh_html_wtr html_wtr, Xoh_wtr_ctx hctx, Xoae_page wpg, Xop_xnde_tkn xnde, byte[] src) {
// cleanup json
byte[] json = Bry_.Mid(src, xnde.Tag_open_end(), xnde.Tag_close_bgn());
Bry_bfr tmp_bfr = Bry_bfr_.New();
json = Json_fmtr.clean(tmp_bfr, json);
// add to bfr
bfr.Add(Html__div_bgn);
bfr.Add_mid(src, xnde.Tag_open_end(), xnde.Tag_close_bgn());
bfr.Add(json);
bfr.Add(Html__div_end);
}
public static Xop_log_basic_wkr Log_wkr = Xop_log_basic_wkr.Null;

View File

@ -0,0 +1,98 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.graphs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*;
public class Json_fmtr {
public static byte[] clean(Bry_bfr tmp_bfr, byte[] json) {
int maxLen = json.length;
int mark = 0;
boolean inString = false;
boolean inComment = false;
boolean multiline = false;
for (int idx = 0; idx < maxLen; idx++) {
switch (json[idx]) {
case Byte_ascii.Quote: {
byte lookBehind = (idx - 1 >= 0) ? json[idx - 1] : Byte_ascii.Null;
if (!inComment && lookBehind != Byte_ascii.Backslash) {
// Either started or ended a String
inString = !inString;
}
break;
}
case Byte_ascii.Slash: {
byte lookAhead = (idx + 1 < maxLen) ? json[idx + 1] : Byte_ascii.Null;
byte lookBehind = (idx - 1 >= 0) ? json[idx - 1] : Byte_ascii.Null;
if (inString) {
continue;
}
else if ( !inComment
&& (lookAhead == Byte_ascii.Slash || lookAhead == Byte_ascii.Star)
) {
// Transition into a comment
// Add characters seen to buffer
tmp_bfr.Add_mid(json, mark, idx);
// Consume the look ahead character
idx++;
// Track state
inComment = true;
multiline = lookAhead == Byte_ascii.Star;
} else if (multiline && lookBehind == Byte_ascii.Star) {
// Found the end of the current comment
mark = idx + 1;
inComment = false;
multiline = false;
}
break;
}
case Byte_ascii.Nl:
if (inComment && !multiline) {
// Found the end of the current comment
mark = idx + 1;
inComment = false;
}
break;
case Byte_ascii.Comma: { // remove trailing commas of the form {a,}; note that FormatJson.php does this in a separate regex call; '/,([ \t]*[}\]][^"\r\n]*([\r\n]|$)|[ \t]*[\r\n][ \t\r\n]*[}\]])/'
if (inComment || inString) continue;
int peek_next = Bry_find_.Find_fwd_while_ws(json, idx + 1, maxLen);
if (peek_next != maxLen
&& ( json[peek_next] == Byte_ascii.Brack_end
|| json[peek_next] == Byte_ascii.Curly_end)
) {
// Add characters seen to buffer
tmp_bfr.Add_mid(json, mark, idx);
// position after comma
mark = idx + 1;
}
break;
}
}
}
if (inComment) {
// Comment ends with input
// Technically we should check to ensure that we aren't in
// a multiline comment that hasn't been properly ended, but this
// is a strip filter, not a validating parser.
mark = maxLen;
}
// Add final chunk to buffer before returning
tmp_bfr.Add_mid(json, mark, maxLen);
return tmp_bfr.To_bry_and_clear();
}
}

View File

@ -0,0 +1,70 @@
/*
XOWA: the XOWA Offline Wiki Application
Copyright (C) 2012-2017 gnosygnu@gmail.com
XOWA is licensed under the terms of the General Public License (GPL) Version 3,
or alternatively under the terms of the Apache License Version 2.0.
You may use XOWA according to either of these licenses as is most appropriate
for your project on a case-by-case basis.
The terms of each license can be found in the source code repository:
GPLv3 License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-GPLv3.txt
Apache License: https://github.com/gnosygnu/xowa/blob/master/LICENSE-APACHE2.txt
*/
package gplx.xowa.xtns.graphs; import gplx.*; import gplx.xowa.*; import gplx.xowa.xtns.*;
import org.junit.*; import gplx.core.tests.*;
public class Json_fmtr_tst {
private final Json_fmtr_fxt fxt = new Json_fmtr_fxt();
@Test public void Comments() {
// basic: // \n
fxt.Test_clean("a//b\nc", "ac");
// basic: /* */
fxt.Test_clean("a/*b*/c", "ac");
// unterminated: /* */
fxt.Test_clean("a/*bc", "a");
// ignore inside quote: // \n
fxt.Test_clean("\"a//b\nc\"");
// ignore inside quote: /* */
fxt.Test_clean("\"a/*b*/c\"");
// ignore quotes inside quotes else inside-quote turns off quotes and comment will be stripped
fxt.Test_clean("\"a\\\"/*b*/c\"");
}
@Test public void Trailing_commas() {
// remove: ]
fxt.Test_clean("[a,]", "[a]");
// remove: }
fxt.Test_clean("{a,}", "{a}");
// remove: ws
fxt.Test_clean("[a \t,\t ]", "[a \t\t ]");
// ignore: normal
fxt.Test_clean("a,b");
// ignore: String
fxt.Test_clean("\"b\"");
// ignore: comment block: /* */
fxt.Test_clean("/*[b,]*", "");
// ignore: comment block: // \n
fxt.Test_clean("//[b,]\n", "");
}
}
class Json_fmtr_fxt {
private final Bry_bfr tmp_bfr = Bry_bfr_.New();
public void Test_clean(String src) {Test_clean(src, src);}
public void Test_clean(String src, String expd) {
byte[] actl = Json_fmtr.clean(tmp_bfr, Bry_.new_u8(src));
Gftest.Eq__bry(Bry_.new_u8(expd), actl);
}
}