diff --git a/400_xowa/src/gplx/xowa/files/fsdb/fs_roots/Fs_root_mgr.java b/400_xowa/src/gplx/xowa/files/fsdb/fs_roots/Fs_root_mgr.java
index a015e7ad6..0b66e2fd5 100644
--- a/400_xowa/src/gplx/xowa/files/fsdb/fs_roots/Fs_root_mgr.java
+++ b/400_xowa/src/gplx/xowa/files/fsdb/fs_roots/Fs_root_mgr.java
@@ -29,10 +29,13 @@ class Fs_root_mgr {
}
public Fs_root_wkr Wkr() {return wkr;}
public boolean Find_file(Xof_fsdb_itm fsdb_itm) {
+ // get orig; exit if not found in dir
byte[] orig_ttl = fsdb_itm.Orig_ttl();
Orig_fil_row orig_itm = wkr.Get_by_ttl(orig_ttl);
if (orig_itm == Orig_fil_row.Null) return false;
- fsdb_itm.Change_repo(Xof_repo_tid_.Tid__local, wiki.Domain_bry());
+
+ // update orig
+ fsdb_itm.Init_at_orig(Xof_repo_tid_.Tid__local, wiki.Domain_bry(), orig_ttl, Xof_ext_.new_by_id_(orig_itm.Ext_id()), orig_itm.W(), orig_itm.H(), null);
Io_url orig_url = orig_itm.Url();
if (fsdb_itm.File_is_orig()) {
fsdb_itm.Html_size_(orig_itm.W(), orig_itm.H());
diff --git a/400_xowa/src/gplx/xowa/files/fsdb/fs_roots/Fs_root_wkr.java b/400_xowa/src/gplx/xowa/files/fsdb/fs_roots/Fs_root_wkr.java
index c040d4e80..ac9937ee7 100644
--- a/400_xowa/src/gplx/xowa/files/fsdb/fs_roots/Fs_root_wkr.java
+++ b/400_xowa/src/gplx/xowa/files/fsdb/fs_roots/Fs_root_wkr.java
@@ -64,19 +64,57 @@ class Fs_root_wkr {
}
private Orig_fil_mgr Init_fs_fil_mgr() { // NOTE: need to read entire dir, b/c ttl may be "A.png", but won't know which subdir
Orig_fil_mgr rv = new Orig_fil_mgr();
- Io_url[] fils = Io_mgr.Instance.QueryDir_args(orig_dir).Recur_(recurse).ExecAsUrlAry();
- int fils_len = fils.length;
- for (int i = 0; i < fils_len; i++) {
- Io_url fil = fils[i];
- byte[] fil_name_bry = To_fil_bry(fil);
- Orig_fil_row fil_itm = rv.Get_by_ttl(fil_name_bry);
- if (fil_itm != Orig_fil_row.Null) {
- Gfo_usr_dlg_.Instance.Warn_many("", "", "file already exists: cur=~{0} new=~{1}", fil_itm.Url().Raw(), fil.Raw());
- continue;
+ Io_url orig_changes_log = orig_dir.GenSubFil("xowa.orig.changes.log");
+
+ // loop over all dirs in orig_dir
+ Io_url[] sub_dirs = Io_mgr.Instance.QueryDir_args(orig_dir).DirInclude_(true).ExecAsUrlAry();
+ int sub_dirs_len = sub_dirs.length;
+ for (int i = 0; i < sub_dirs_len; i++) {
+ Io_url sub_dir = sub_dirs[i];
+ if (String_.Len(sub_dir.NameOnly()) != 1) continue; // only look at subdirs with 1 char; EX: "/orig_dir/a/" vs "/orig_dir/math/"
+
+ // loop over all fils in that 1-char dir
+ Io_url[] fils = Io_mgr.Instance.QueryDir_args(sub_dir).Recur_(recurse).ExecAsUrlAry();
+ int fils_len = fils.length;
+ for (int j = 0; j < fils_len; j++) {
+ Io_url fil = fils[j];
+ byte[] fil_name_bry = Bry_.new_u8(fil.NameAndExt());
+
+ String orig_change_type = null;
+ // if url has space, replace it with underscore
+ if (Bry_.Has(fil_name_bry, Byte_ascii.Space)) {
+ fil_name_bry = Bry_.Replace(fil_name_bry, Byte_ascii.Space, Byte_ascii.Underline);
+ orig_change_type = "space_to_underscore";
+ }
+
+ // TOMBSTONE: code below had unit_test, but not sure if needed; file's title should be title-cased, but it's possible to be lower-case for "File:" namespaces with case_match; DATE:2017-02-01
+ // if url's first char is lowercase, uppercase it;
+ // byte b_0 = fil_name_bry[0];
+ // if (b_0 >= Byte_ascii.Ltr_a && b_0 <= Byte_ascii.Ltr_z) {
+ // fil_name_bry = Bry_.Ucase__1st(fil_name_bry);
+ // orig_change_type = "ucase_1st";
+ // }
+
+ // if changed above, rename it and log it
+ if (orig_change_type != null) {
+ Io_url new_url = fil.GenNewNameAndExt(String_.new_u8(fil_name_bry));
+ Io_mgr.Instance.MoveFil_args(fil, new_url, true).Exec();
+ Io_mgr.Instance.AppendFilStr(orig_changes_log, orig_change_type + "|" + fil.Raw() + "\n");
+ fil = new_url;
+ }
+
+ // if file already seen, ignore it
+ Orig_fil_row fil_itm = rv.Get_by_ttl(fil_name_bry);
+ if (fil_itm != Orig_fil_row.Null) {
+ Gfo_usr_dlg_.Instance.Warn_many("", "", "file already exists: cur=~{0} new=~{1}", fil_itm.Url().Raw(), fil.Raw());
+ continue;
+ }
+
+ // add it to cache
+ Xof_ext ext = Xof_ext_.new_by_ttl_(fil_name_bry);
+ fil_itm = Orig_fil_row.New_by_fs(fil, fil_name_bry, ext.Id());
+ rv.Add(fil_itm);
}
- Xof_ext ext = Xof_ext_.new_by_ttl_(fil_name_bry);
- fil_itm = Orig_fil_row.New_by_fs(fil, fil_name_bry, ext.Id());
- rv.Add(fil_itm);
}
return rv;
}
@@ -106,10 +144,4 @@ class Fs_root_wkr {
}
private static final String Cfg_grp_root_dir = "xowa.root_dir", Cfg_key_fil_id_next = "fil_id_next";
public static final String Url_orig_dir = "~{orig_dir}";
- public static byte[] To_fil_bry(Io_url url) {
- byte[] rv = Bry_.new_u8(url.NameAndExt());
- rv = Bry_.Replace(rv, Byte_ascii.Space, Byte_ascii.Underline);
- rv = Bry_.Ucase__1st(rv);
- return rv;
- }
}
diff --git a/400_xowa/src/gplx/xowa/files/fsdb/fs_roots/Fs_root_wkr_tst.java b/400_xowa/src/gplx/xowa/files/fsdb/fs_roots/Fs_root_wkr_tst.java
index 2619f6cee..57f163571 100644
--- a/400_xowa/src/gplx/xowa/files/fsdb/fs_roots/Fs_root_wkr_tst.java
+++ b/400_xowa/src/gplx/xowa/files/fsdb/fs_roots/Fs_root_wkr_tst.java
@@ -30,11 +30,6 @@ public class Fs_root_wkr_tst {
fxt.Init_fs("mem/dir/sub1/A1.png", 200, 100);
fxt.Test_get("A1.png", fxt.itm_().Url_("mem/dir/sub1/A1.png").Size_(200, 100));
}
- @Test public void Xto_fil_bry() {
- fxt.Test_xto_fil_bry("/dir/A.png" , "A.png"); // basic
- fxt.Test_xto_fil_bry("/dir/A b.png" , "A_b.png"); // lower
- fxt.Test_xto_fil_bry("/dir/a.png" , "A.png"); // title
- }
}
class Fs_root_wkr_fxt {
private Fs_root_wkr root_dir = new Fs_root_wkr();
@@ -61,10 +56,6 @@ class Fs_root_wkr_fxt {
gplx.gfui.SizeAdp img_size = gplx.gfui.SizeAdp_.new_(w, h);
Io_mgr.Instance.SaveFilStr(url, img_size.To_str());
}
- public void Test_xto_fil_bry(String url_str, String expd) {
- Io_url url = Io_url_.new_fil_(url_str);
- Tfds.Eq(expd, String_.new_u8(Fs_root_wkr.To_fil_bry(url)));
- }
}
class Orig_fil_mok {
private int uid = -1;
diff --git a/400_xowa/src/gplx/xowa/mws/parsers/lnkes/Xomw_lnke_wkr.java b/400_xowa/src/gplx/xowa/mws/parsers/lnkes/Xomw_lnke_wkr.java
index 5ea23fc3f..f268f9ef8 100644
--- a/400_xowa/src/gplx/xowa/mws/parsers/lnkes/Xomw_lnke_wkr.java
+++ b/400_xowa/src/gplx/xowa/mws/parsers/lnkes/Xomw_lnke_wkr.java
@@ -19,13 +19,21 @@ package gplx.xowa.mws.parsers.lnkes; import gplx.*; import gplx.xowa.*; import g
import gplx.core.btries.*; import gplx.core.primitives.*;
import gplx.langs.phps.utls.*;
import gplx.xowa.mws.htmls.*;
-// TODO.XO: add proto-rel; EX: [//a.org b]
+/* TODO.XO
+ * P8: url = sanitizer.Clean_url(url);
+ * P8: The characters '<' and '>' (which were escaped by
+ * P7: add proto-rel; EX: [//a.org b]
+ * P7: list( $dtrail, $trail ) = Linker::splitTrail( $trail );
+ * P3: $langObj->formatNum( ++$this->mAutonumber );
+ * P2: $this->getConverterLanguage()->markNoConversion( $text );
+*/
public class Xomw_lnke_wkr {// THREAD.UNSAFE: caching for repeated calls
private final Bry_bfr tmp;
private Btrie_slim_mgr protocol_trie; private final Btrie_rv trv = new Btrie_rv();
private int autonumber;
private final Xomw_parser parser;
private final Xomw_linker linker;
+ // private final Xomw_sanitizer sanitizer;
private final Xomw_atr_mgr attribs = new Xomw_atr_mgr();
private Xomw_regex_url regex_url;
private Xomw_regex_space regex_space;
@@ -33,12 +41,14 @@ public class Xomw_lnke_wkr {// THREAD.UNSAFE: caching for repeated calls
this.parser = parser;
this.tmp = parser.Tmp();
this.linker = parser.Linker();
+ // this.sanitizer = parser.Sanitizer();
}
public void Init_by_wiki(Btrie_slim_mgr protocol_trie, Xomw_regex_url regex_url, Xomw_regex_space regex_space) {
this.protocol_trie = protocol_trie;
this.regex_url = regex_url;
this.regex_space = regex_space;
}
+ // XO.MW:SYNC:1.29; DATE:2017-02-01
public void Replace_external_links(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
// XO.PBFR
Bry_bfr src_bfr = pbfr.Src();
@@ -150,12 +160,10 @@ public class Xomw_lnke_wkr {// THREAD.UNSAFE: caching for repeated calls
// If the link text is an image URL, replace it with an
tag
// This happened by accident in the original parser, but some people used it extensively
- // TODO.XO:
- //$img = $this->maybeMakeExternalImage( $text );
- //if ( $img !== false ) {
- // $text = $img;
- //}
- //
+ // XO.MW.UNSUPPORTED.NON-WMF: not supporting images from freefrom url; (EX: "http://a.org/image.png" -> "
"); haven't seen this used on WMF wikis
+ // $img = $this->maybeMakeExternalImage( $text );
+ // if ($img !== false) $text = $img;
+
//$dtrail = '';
// Set linktype for CSS - if URL==text, link is essentially free
@@ -181,7 +189,7 @@ public class Xomw_lnke_wkr {// THREAD.UNSAFE: caching for repeated calls
// $text = $this->getConverterLanguage()->markNoConversion( $text );
// TODO.XO:
- // $url = Sanitizer::cleanUrl( $url );
+ // url = sanitizer.Clean_url(url);
bfr.Add_mid(src, prv, lnke_bgn);
prv = cur;
@@ -191,6 +199,7 @@ public class Xomw_lnke_wkr {// THREAD.UNSAFE: caching for repeated calls
// This was changed in August 2004
linker.Make_external_link(bfr, Bry_.Mid(src, url_bgn, url_end), Bry_.Mid(src, text_bgn, text_end), Bool_.N, link_type, parser.Get_external_link_attribs(attribs), Bry_.Empty);
+ // XO.MW.UNSUPPORTED.HOOK: registers link for processing by other extensions?
// Register link in the output Object.
// Replace unnecessary URL escape codes with the referenced character
// This prevents spammers from hiding links from the filters