diff --git a/400_xowa/src/gplx/xowa/files/fsdb/fs_roots/Fs_root_mgr.java b/400_xowa/src/gplx/xowa/files/fsdb/fs_roots/Fs_root_mgr.java index a015e7ad6..0b66e2fd5 100644 --- a/400_xowa/src/gplx/xowa/files/fsdb/fs_roots/Fs_root_mgr.java +++ b/400_xowa/src/gplx/xowa/files/fsdb/fs_roots/Fs_root_mgr.java @@ -29,10 +29,13 @@ class Fs_root_mgr { } public Fs_root_wkr Wkr() {return wkr;} public boolean Find_file(Xof_fsdb_itm fsdb_itm) { + // get orig; exit if not found in dir byte[] orig_ttl = fsdb_itm.Orig_ttl(); Orig_fil_row orig_itm = wkr.Get_by_ttl(orig_ttl); if (orig_itm == Orig_fil_row.Null) return false; - fsdb_itm.Change_repo(Xof_repo_tid_.Tid__local, wiki.Domain_bry()); + + // update orig + fsdb_itm.Init_at_orig(Xof_repo_tid_.Tid__local, wiki.Domain_bry(), orig_ttl, Xof_ext_.new_by_id_(orig_itm.Ext_id()), orig_itm.W(), orig_itm.H(), null); Io_url orig_url = orig_itm.Url(); if (fsdb_itm.File_is_orig()) { fsdb_itm.Html_size_(orig_itm.W(), orig_itm.H()); diff --git a/400_xowa/src/gplx/xowa/files/fsdb/fs_roots/Fs_root_wkr.java b/400_xowa/src/gplx/xowa/files/fsdb/fs_roots/Fs_root_wkr.java index c040d4e80..ac9937ee7 100644 --- a/400_xowa/src/gplx/xowa/files/fsdb/fs_roots/Fs_root_wkr.java +++ b/400_xowa/src/gplx/xowa/files/fsdb/fs_roots/Fs_root_wkr.java @@ -64,19 +64,57 @@ class Fs_root_wkr { } private Orig_fil_mgr Init_fs_fil_mgr() { // NOTE: need to read entire dir, b/c ttl may be "A.png", but won't know which subdir Orig_fil_mgr rv = new Orig_fil_mgr(); - Io_url[] fils = Io_mgr.Instance.QueryDir_args(orig_dir).Recur_(recurse).ExecAsUrlAry(); - int fils_len = fils.length; - for (int i = 0; i < fils_len; i++) { - Io_url fil = fils[i]; - byte[] fil_name_bry = To_fil_bry(fil); - Orig_fil_row fil_itm = rv.Get_by_ttl(fil_name_bry); - if (fil_itm != Orig_fil_row.Null) { - Gfo_usr_dlg_.Instance.Warn_many("", "", "file already exists: cur=~{0} new=~{1}", fil_itm.Url().Raw(), fil.Raw()); - continue; + Io_url orig_changes_log = orig_dir.GenSubFil("xowa.orig.changes.log"); + + // loop over all dirs in orig_dir + Io_url[] sub_dirs = Io_mgr.Instance.QueryDir_args(orig_dir).DirInclude_(true).ExecAsUrlAry(); + int sub_dirs_len = sub_dirs.length; + for (int i = 0; i < sub_dirs_len; i++) { + Io_url sub_dir = sub_dirs[i]; + if (String_.Len(sub_dir.NameOnly()) != 1) continue; // only look at subdirs with 1 char; EX: "/orig_dir/a/" vs "/orig_dir/math/" + + // loop over all fils in that 1-char dir + Io_url[] fils = Io_mgr.Instance.QueryDir_args(sub_dir).Recur_(recurse).ExecAsUrlAry(); + int fils_len = fils.length; + for (int j = 0; j < fils_len; j++) { + Io_url fil = fils[j]; + byte[] fil_name_bry = Bry_.new_u8(fil.NameAndExt()); + + String orig_change_type = null; + // if url has space, replace it with underscore + if (Bry_.Has(fil_name_bry, Byte_ascii.Space)) { + fil_name_bry = Bry_.Replace(fil_name_bry, Byte_ascii.Space, Byte_ascii.Underline); + orig_change_type = "space_to_underscore"; + } + + // TOMBSTONE: code below had unit_test, but not sure if needed; file's title should be title-cased, but it's possible to be lower-case for "File:" namespaces with case_match; DATE:2017-02-01 + // if url's first char is lowercase, uppercase it; + // byte b_0 = fil_name_bry[0]; + // if (b_0 >= Byte_ascii.Ltr_a && b_0 <= Byte_ascii.Ltr_z) { + // fil_name_bry = Bry_.Ucase__1st(fil_name_bry); + // orig_change_type = "ucase_1st"; + // } + + // if changed above, rename it and log it + if (orig_change_type != null) { + Io_url new_url = fil.GenNewNameAndExt(String_.new_u8(fil_name_bry)); + Io_mgr.Instance.MoveFil_args(fil, new_url, true).Exec(); + Io_mgr.Instance.AppendFilStr(orig_changes_log, orig_change_type + "|" + fil.Raw() + "\n"); + fil = new_url; + } + + // if file already seen, ignore it + Orig_fil_row fil_itm = rv.Get_by_ttl(fil_name_bry); + if (fil_itm != Orig_fil_row.Null) { + Gfo_usr_dlg_.Instance.Warn_many("", "", "file already exists: cur=~{0} new=~{1}", fil_itm.Url().Raw(), fil.Raw()); + continue; + } + + // add it to cache + Xof_ext ext = Xof_ext_.new_by_ttl_(fil_name_bry); + fil_itm = Orig_fil_row.New_by_fs(fil, fil_name_bry, ext.Id()); + rv.Add(fil_itm); } - Xof_ext ext = Xof_ext_.new_by_ttl_(fil_name_bry); - fil_itm = Orig_fil_row.New_by_fs(fil, fil_name_bry, ext.Id()); - rv.Add(fil_itm); } return rv; } @@ -106,10 +144,4 @@ class Fs_root_wkr { } private static final String Cfg_grp_root_dir = "xowa.root_dir", Cfg_key_fil_id_next = "fil_id_next"; public static final String Url_orig_dir = "~{orig_dir}"; - public static byte[] To_fil_bry(Io_url url) { - byte[] rv = Bry_.new_u8(url.NameAndExt()); - rv = Bry_.Replace(rv, Byte_ascii.Space, Byte_ascii.Underline); - rv = Bry_.Ucase__1st(rv); - return rv; - } } diff --git a/400_xowa/src/gplx/xowa/files/fsdb/fs_roots/Fs_root_wkr_tst.java b/400_xowa/src/gplx/xowa/files/fsdb/fs_roots/Fs_root_wkr_tst.java index 2619f6cee..57f163571 100644 --- a/400_xowa/src/gplx/xowa/files/fsdb/fs_roots/Fs_root_wkr_tst.java +++ b/400_xowa/src/gplx/xowa/files/fsdb/fs_roots/Fs_root_wkr_tst.java @@ -30,11 +30,6 @@ public class Fs_root_wkr_tst { fxt.Init_fs("mem/dir/sub1/A1.png", 200, 100); fxt.Test_get("A1.png", fxt.itm_().Url_("mem/dir/sub1/A1.png").Size_(200, 100)); } - @Test public void Xto_fil_bry() { - fxt.Test_xto_fil_bry("/dir/A.png" , "A.png"); // basic - fxt.Test_xto_fil_bry("/dir/A b.png" , "A_b.png"); // lower - fxt.Test_xto_fil_bry("/dir/a.png" , "A.png"); // title - } } class Fs_root_wkr_fxt { private Fs_root_wkr root_dir = new Fs_root_wkr(); @@ -61,10 +56,6 @@ class Fs_root_wkr_fxt { gplx.gfui.SizeAdp img_size = gplx.gfui.SizeAdp_.new_(w, h); Io_mgr.Instance.SaveFilStr(url, img_size.To_str()); } - public void Test_xto_fil_bry(String url_str, String expd) { - Io_url url = Io_url_.new_fil_(url_str); - Tfds.Eq(expd, String_.new_u8(Fs_root_wkr.To_fil_bry(url))); - } } class Orig_fil_mok { private int uid = -1; diff --git a/400_xowa/src/gplx/xowa/mws/parsers/lnkes/Xomw_lnke_wkr.java b/400_xowa/src/gplx/xowa/mws/parsers/lnkes/Xomw_lnke_wkr.java index 5ea23fc3f..f268f9ef8 100644 --- a/400_xowa/src/gplx/xowa/mws/parsers/lnkes/Xomw_lnke_wkr.java +++ b/400_xowa/src/gplx/xowa/mws/parsers/lnkes/Xomw_lnke_wkr.java @@ -19,13 +19,21 @@ package gplx.xowa.mws.parsers.lnkes; import gplx.*; import gplx.xowa.*; import g import gplx.core.btries.*; import gplx.core.primitives.*; import gplx.langs.phps.utls.*; import gplx.xowa.mws.htmls.*; -// TODO.XO: add proto-rel; EX: [//a.org b] +/* TODO.XO + * P8: url = sanitizer.Clean_url(url); + * P8: The characters '<' and '>' (which were escaped by + * P7: add proto-rel; EX: [//a.org b] + * P7: list( $dtrail, $trail ) = Linker::splitTrail( $trail ); + * P3: $langObj->formatNum( ++$this->mAutonumber ); + * P2: $this->getConverterLanguage()->markNoConversion( $text ); +*/ public class Xomw_lnke_wkr {// THREAD.UNSAFE: caching for repeated calls private final Bry_bfr tmp; private Btrie_slim_mgr protocol_trie; private final Btrie_rv trv = new Btrie_rv(); private int autonumber; private final Xomw_parser parser; private final Xomw_linker linker; + // private final Xomw_sanitizer sanitizer; private final Xomw_atr_mgr attribs = new Xomw_atr_mgr(); private Xomw_regex_url regex_url; private Xomw_regex_space regex_space; @@ -33,12 +41,14 @@ public class Xomw_lnke_wkr {// THREAD.UNSAFE: caching for repeated calls this.parser = parser; this.tmp = parser.Tmp(); this.linker = parser.Linker(); + // this.sanitizer = parser.Sanitizer(); } public void Init_by_wiki(Btrie_slim_mgr protocol_trie, Xomw_regex_url regex_url, Xomw_regex_space regex_space) { this.protocol_trie = protocol_trie; this.regex_url = regex_url; this.regex_space = regex_space; } + // XO.MW:SYNC:1.29; DATE:2017-02-01 public void Replace_external_links(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) { // XO.PBFR Bry_bfr src_bfr = pbfr.Src(); @@ -150,12 +160,10 @@ public class Xomw_lnke_wkr {// THREAD.UNSAFE: caching for repeated calls // If the link text is an image URL, replace it with an tag // This happened by accident in the original parser, but some people used it extensively - // TODO.XO: - //$img = $this->maybeMakeExternalImage( $text ); - //if ( $img !== false ) { - // $text = $img; - //} - // + // XO.MW.UNSUPPORTED.NON-WMF: not supporting images from freefrom url; (EX: "http://a.org/image.png" -> ""); haven't seen this used on WMF wikis + // $img = $this->maybeMakeExternalImage( $text ); + // if ($img !== false) $text = $img; + //$dtrail = ''; // Set linktype for CSS - if URL==text, link is essentially free @@ -181,7 +189,7 @@ public class Xomw_lnke_wkr {// THREAD.UNSAFE: caching for repeated calls // $text = $this->getConverterLanguage()->markNoConversion( $text ); // TODO.XO: - // $url = Sanitizer::cleanUrl( $url ); + // url = sanitizer.Clean_url(url); bfr.Add_mid(src, prv, lnke_bgn); prv = cur; @@ -191,6 +199,7 @@ public class Xomw_lnke_wkr {// THREAD.UNSAFE: caching for repeated calls // This was changed in August 2004 linker.Make_external_link(bfr, Bry_.Mid(src, url_bgn, url_end), Bry_.Mid(src, text_bgn, text_end), Bool_.N, link_type, parser.Get_external_link_attribs(attribs), Bry_.Empty); + // XO.MW.UNSUPPORTED.HOOK: registers link for processing by other extensions? // Register link in the output Object. // Replace unnecessary URL escape codes with the referenced character // This prevents spammers from hiding links from the filters