1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2024-10-27 20:34:16 +00:00

Custom_wiki: Rename file if title has spaces to use underscore

This commit is contained in:
gnosygnu 2017-02-01 07:42:32 -05:00
parent 32b4fafe10
commit 3581371d29
4 changed files with 71 additions and 36 deletions

View File

@ -29,10 +29,13 @@ class Fs_root_mgr {
}
public Fs_root_wkr Wkr() {return wkr;}
public boolean Find_file(Xof_fsdb_itm fsdb_itm) {
// get orig; exit if not found in dir
byte[] orig_ttl = fsdb_itm.Orig_ttl();
Orig_fil_row orig_itm = wkr.Get_by_ttl(orig_ttl);
if (orig_itm == Orig_fil_row.Null) return false;
fsdb_itm.Change_repo(Xof_repo_tid_.Tid__local, wiki.Domain_bry());
// update orig
fsdb_itm.Init_at_orig(Xof_repo_tid_.Tid__local, wiki.Domain_bry(), orig_ttl, Xof_ext_.new_by_id_(orig_itm.Ext_id()), orig_itm.W(), orig_itm.H(), null);
Io_url orig_url = orig_itm.Url();
if (fsdb_itm.File_is_orig()) {
fsdb_itm.Html_size_(orig_itm.W(), orig_itm.H());

View File

@ -64,19 +64,57 @@ class Fs_root_wkr {
}
private Orig_fil_mgr Init_fs_fil_mgr() { // NOTE: need to read entire dir, b/c ttl may be "A.png", but won't know which subdir
Orig_fil_mgr rv = new Orig_fil_mgr();
Io_url[] fils = Io_mgr.Instance.QueryDir_args(orig_dir).Recur_(recurse).ExecAsUrlAry();
int fils_len = fils.length;
for (int i = 0; i < fils_len; i++) {
Io_url fil = fils[i];
byte[] fil_name_bry = To_fil_bry(fil);
Orig_fil_row fil_itm = rv.Get_by_ttl(fil_name_bry);
if (fil_itm != Orig_fil_row.Null) {
Gfo_usr_dlg_.Instance.Warn_many("", "", "file already exists: cur=~{0} new=~{1}", fil_itm.Url().Raw(), fil.Raw());
continue;
Io_url orig_changes_log = orig_dir.GenSubFil("xowa.orig.changes.log");
// loop over all dirs in orig_dir
Io_url[] sub_dirs = Io_mgr.Instance.QueryDir_args(orig_dir).DirInclude_(true).ExecAsUrlAry();
int sub_dirs_len = sub_dirs.length;
for (int i = 0; i < sub_dirs_len; i++) {
Io_url sub_dir = sub_dirs[i];
if (String_.Len(sub_dir.NameOnly()) != 1) continue; // only look at subdirs with 1 char; EX: "/orig_dir/a/" vs "/orig_dir/math/"
// loop over all fils in that 1-char dir
Io_url[] fils = Io_mgr.Instance.QueryDir_args(sub_dir).Recur_(recurse).ExecAsUrlAry();
int fils_len = fils.length;
for (int j = 0; j < fils_len; j++) {
Io_url fil = fils[j];
byte[] fil_name_bry = Bry_.new_u8(fil.NameAndExt());
String orig_change_type = null;
// if url has space, replace it with underscore
if (Bry_.Has(fil_name_bry, Byte_ascii.Space)) {
fil_name_bry = Bry_.Replace(fil_name_bry, Byte_ascii.Space, Byte_ascii.Underline);
orig_change_type = "space_to_underscore";
}
// TOMBSTONE: code below had unit_test, but not sure if needed; file's title should be title-cased, but it's possible to be lower-case for "File:" namespaces with case_match; DATE:2017-02-01
// if url's first char is lowercase, uppercase it;
// byte b_0 = fil_name_bry[0];
// if (b_0 >= Byte_ascii.Ltr_a && b_0 <= Byte_ascii.Ltr_z) {
// fil_name_bry = Bry_.Ucase__1st(fil_name_bry);
// orig_change_type = "ucase_1st";
// }
// if changed above, rename it and log it
if (orig_change_type != null) {
Io_url new_url = fil.GenNewNameAndExt(String_.new_u8(fil_name_bry));
Io_mgr.Instance.MoveFil_args(fil, new_url, true).Exec();
Io_mgr.Instance.AppendFilStr(orig_changes_log, orig_change_type + "|" + fil.Raw() + "\n");
fil = new_url;
}
// if file already seen, ignore it
Orig_fil_row fil_itm = rv.Get_by_ttl(fil_name_bry);
if (fil_itm != Orig_fil_row.Null) {
Gfo_usr_dlg_.Instance.Warn_many("", "", "file already exists: cur=~{0} new=~{1}", fil_itm.Url().Raw(), fil.Raw());
continue;
}
// add it to cache
Xof_ext ext = Xof_ext_.new_by_ttl_(fil_name_bry);
fil_itm = Orig_fil_row.New_by_fs(fil, fil_name_bry, ext.Id());
rv.Add(fil_itm);
}
Xof_ext ext = Xof_ext_.new_by_ttl_(fil_name_bry);
fil_itm = Orig_fil_row.New_by_fs(fil, fil_name_bry, ext.Id());
rv.Add(fil_itm);
}
return rv;
}
@ -106,10 +144,4 @@ class Fs_root_wkr {
}
private static final String Cfg_grp_root_dir = "xowa.root_dir", Cfg_key_fil_id_next = "fil_id_next";
public static final String Url_orig_dir = "~{orig_dir}";
public static byte[] To_fil_bry(Io_url url) {
byte[] rv = Bry_.new_u8(url.NameAndExt());
rv = Bry_.Replace(rv, Byte_ascii.Space, Byte_ascii.Underline);
rv = Bry_.Ucase__1st(rv);
return rv;
}
}

View File

@ -30,11 +30,6 @@ public class Fs_root_wkr_tst {
fxt.Init_fs("mem/dir/sub1/A1.png", 200, 100);
fxt.Test_get("A1.png", fxt.itm_().Url_("mem/dir/sub1/A1.png").Size_(200, 100));
}
@Test public void Xto_fil_bry() {
fxt.Test_xto_fil_bry("/dir/A.png" , "A.png"); // basic
fxt.Test_xto_fil_bry("/dir/A b.png" , "A_b.png"); // lower
fxt.Test_xto_fil_bry("/dir/a.png" , "A.png"); // title
}
}
class Fs_root_wkr_fxt {
private Fs_root_wkr root_dir = new Fs_root_wkr();
@ -61,10 +56,6 @@ class Fs_root_wkr_fxt {
gplx.gfui.SizeAdp img_size = gplx.gfui.SizeAdp_.new_(w, h);
Io_mgr.Instance.SaveFilStr(url, img_size.To_str());
}
public void Test_xto_fil_bry(String url_str, String expd) {
Io_url url = Io_url_.new_fil_(url_str);
Tfds.Eq(expd, String_.new_u8(Fs_root_wkr.To_fil_bry(url)));
}
}
class Orig_fil_mok {
private int uid = -1;

View File

@ -19,13 +19,21 @@ package gplx.xowa.mws.parsers.lnkes; import gplx.*; import gplx.xowa.*; import g
import gplx.core.btries.*; import gplx.core.primitives.*;
import gplx.langs.phps.utls.*;
import gplx.xowa.mws.htmls.*;
// TODO.XO: add proto-rel; EX: [//a.org b]
/* TODO.XO
* P8: url = sanitizer.Clean_url(url);
* P8: The characters '<' and '>' (which were escaped by
* P7: add proto-rel; EX: [//a.org b]
* P7: list( $dtrail, $trail ) = Linker::splitTrail( $trail );
* P3: $langObj->formatNum( ++$this->mAutonumber );
* P2: $this->getConverterLanguage()->markNoConversion( $text );
*/
public class Xomw_lnke_wkr {// THREAD.UNSAFE: caching for repeated calls
private final Bry_bfr tmp;
private Btrie_slim_mgr protocol_trie; private final Btrie_rv trv = new Btrie_rv();
private int autonumber;
private final Xomw_parser parser;
private final Xomw_linker linker;
// private final Xomw_sanitizer sanitizer;
private final Xomw_atr_mgr attribs = new Xomw_atr_mgr();
private Xomw_regex_url regex_url;
private Xomw_regex_space regex_space;
@ -33,12 +41,14 @@ public class Xomw_lnke_wkr {// THREAD.UNSAFE: caching for repeated calls
this.parser = parser;
this.tmp = parser.Tmp();
this.linker = parser.Linker();
// this.sanitizer = parser.Sanitizer();
}
public void Init_by_wiki(Btrie_slim_mgr protocol_trie, Xomw_regex_url regex_url, Xomw_regex_space regex_space) {
this.protocol_trie = protocol_trie;
this.regex_url = regex_url;
this.regex_space = regex_space;
}
// XO.MW:SYNC:1.29; DATE:2017-02-01
public void Replace_external_links(Xomw_parser_ctx pctx, Xomw_parser_bfr pbfr) {
// XO.PBFR
Bry_bfr src_bfr = pbfr.Src();
@ -150,12 +160,10 @@ public class Xomw_lnke_wkr {// THREAD.UNSAFE: caching for repeated calls
// If the link text is an image URL, replace it with an <img> tag
// This happened by accident in the original parser, but some people used it extensively
// TODO.XO:
//$img = $this->maybeMakeExternalImage( $text );
//if ( $img !== false ) {
// $text = $img;
//}
//
// XO.MW.UNSUPPORTED.NON-WMF: not supporting images from freefrom url; (EX: "http://a.org/image.png" -> "<img>"); haven't seen this used on WMF wikis
// $img = $this->maybeMakeExternalImage( $text );
// if ($img !== false) $text = $img;
//$dtrail = '';
// Set linktype for CSS - if URL==text, link is essentially free
@ -181,7 +189,7 @@ public class Xomw_lnke_wkr {// THREAD.UNSAFE: caching for repeated calls
// $text = $this->getConverterLanguage()->markNoConversion( $text );
// TODO.XO:
// $url = Sanitizer::cleanUrl( $url );
// url = sanitizer.Clean_url(url);
bfr.Add_mid(src, prv, lnke_bgn);
prv = cur;
@ -191,6 +199,7 @@ public class Xomw_lnke_wkr {// THREAD.UNSAFE: caching for repeated calls
// This was changed in August 2004
linker.Make_external_link(bfr, Bry_.Mid(src, url_bgn, url_end), Bry_.Mid(src, text_bgn, text_end), Bool_.N, link_type, parser.Get_external_link_attribs(attribs), Bry_.Empty);
// XO.MW.UNSUPPORTED.HOOK: registers link for processing by other extensions?
// Register link in the output Object.
// Replace unnecessary URL escape codes with the referenced character
// This prevents spammers from hiding links from the filters