1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2026-03-02 03:49:30 +00:00

Mw_parse: Add clean_url and associated functions to sanitizer

This commit is contained in:
gnosygnu
2017-01-30 09:51:17 -05:00
parent c77e8a4374
commit 9a5c70b506
15 changed files with 914 additions and 265 deletions

View File

@@ -62,6 +62,12 @@ public class Gfo_url_encoder_ {
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.N)
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Plus);
}
public static Gfo_url_encoder_mkr New__php_urlencode() {
// equivalent to php's urlencode; http://php.net/manual/en/function.urlencode.php;
// "Returns a String in which all non-alphanumeric characters except -_. have been replaced with a percent (%) sign followed by two hex digits and spaces encoded as plus (+) signs"
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.Y)
.Init__diff__one(Byte_ascii.Space, Byte_ascii.Plus);
}
private static Gfo_url_encoder_mkr New__http_url_ttl() {
return new Gfo_url_encoder_mkr().Init(Byte_ascii.Percent).Init_common(Bool_.Y);
}
@@ -103,5 +109,6 @@ public class Gfo_url_encoder_ {
, Http_url = Gfo_url_encoder_.New__http_url().Make()
, Http_url_ttl = Gfo_url_encoder_.New__http_url_ttl().Make()
, Mw_ttl = Gfo_url_encoder_.New__mw_ttl().Make()
, Php_urlencode = Gfo_url_encoder_.New__php_urlencode().Make()
;
}

View File

@@ -16,7 +16,7 @@ You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package gplx.langs.phps.utls; import gplx.*; import gplx.langs.*; import gplx.langs.phps.*;
import gplx.core.btries.*;
import gplx.core.btries.*; import gplx.core.brys.*;
import gplx.core.primitives.*;
public class Php_preg_ {
public static byte[][] Split(Int_list list, byte[] src, int src_bgn, int src_end, byte[] dlm, boolean extend) {
@@ -72,4 +72,41 @@ public class Php_preg_ {
}
return null;
}
public static void Replace(Bry_tmp bry, Bry_bfr tmp, Btrie_slim_mgr find_trie, Btrie_rv trv, byte[] repl_bry) {
byte[] src = bry.src;
int src_bgn = bry.src_bgn;
int src_end = bry.src_end;
int cur = src_bgn;
int prv = cur;
boolean dirty = false;
while (true) {
// eos
if (cur == src_end) {
if (dirty) {
tmp.Add_mid(src, prv, src_end);
}
break;
}
byte b = src[cur];
Object o = find_trie.Match_at_w_b0(trv, b, src, cur, src_end);
if (o == null) {
cur += gplx.core.intls.Utf8_.Len_of_char_by_1st_byte(b);
}
else {
dirty = true;
tmp.Add_mid(src, prv, cur);
tmp.Add(repl_bry);
cur = trv.Pos();
prv = cur;
}
}
if (dirty) {
bry.Set_by_bfr(tmp);
}
}
}

View File

@@ -44,7 +44,7 @@ public class Php_str_ {
if (max == -1) max = src_len;
int rv = 0;
for (int i = bgn; i < src_len; i++) {
if (find[src[i]] && rv < max)
if (find[src[i] & 0xFF] && rv < max) // PATCH.JAVA:need to convert to unsigned byte
rv++;
else
break;
@@ -94,7 +94,7 @@ public class Php_str_ {
if (max == -1) max = Int_.Max_value;
int rv = 0;
for (int i = bgn - 1; i > -1; i--) {
if (find[src[i]] && rv < max)
if (find[src[i & 0xFF]] && rv < max) // PATCH.JAVA:need to convert to unsigned byte
rv++;
else
break;