<ahref="#Luaj_2.0.3_errors_fixed_in_3.0"><spanclass="tocnumber">3.1</span><spanclass="toctext">Luaj 2.0.3 errors fixed in 3.0</span></a>
<ul>
<liclass="toclevel-3 tocsection-5">
<ahref="#os.time_doesn.27t_handle_dates_before_1970"><spanclass="tocnumber">3.1.1</span><spanclass="toctext">os.time doesn't handle dates before 1970</span></a>
</li>
<liclass="toclevel-3 tocsection-6">
<ahref="#pairs.next_fails_when_setting_val_to_null"><spanclass="tocnumber">3.1.2</span><spanclass="toctext">pairs.next fails when setting val to null</span></a>
</li>
</ul>
</li>
<liclass="toclevel-2 tocsection-7">
<ahref="#Luaj_2.0.3_features_removed_from_3.0"><spanclass="tocnumber">3.2</span><spanclass="toctext">Luaj 2.0.3 features removed from 3.0</span></a>
<ahref="#automatic_arg_variable_in_varargs_function_deprecated"><spanclass="tocnumber">3.2.6</span><spanclass="toctext">automatic arg variable in varargs function deprecated</span></a>
<ahref="#os.date_does_not_accept_UTC_format"><spanclass="tocnumber">3.3.1</span><spanclass="toctext">os.date does not accept UTC format</span></a>
</li>
<liclass="toclevel-3 tocsection-16">
<ahref="#string.gsub_fails_with_out_of_bounds_error"><spanclass="tocnumber">3.3.2</span><spanclass="toctext">string.gsub fails with out_of_bounds error</span></a>
</li>
<liclass="toclevel-3 tocsection-17">
<ahref="#string.gsub_fails_if_string_is_empty"><spanclass="tocnumber">3.3.3</span><spanclass="toctext">string.gsub fails if string is empty</span></a>
</li>
<liclass="toclevel-3 tocsection-18">
<ahref="#string.format_ignores_precision_for_double_args"><spanclass="tocnumber">3.3.4</span><spanclass="toctext">string.format ignores precision for double args</span></a>
<ahref="#string.tonumber_should_trim_all_whitespace"><spanclass="tocnumber">3.3.6</span><spanclass="toctext">string.tonumber should trim all whitespace</span></a>
</li>
<liclass="toclevel-3 tocsection-21">
<ahref="#multi-byte_strings_not_fully_supported"><spanclass="tocnumber">3.3.7</span><spanclass="toctext">multi-byte strings not fully supported</span></a>
The luaj_xowa.jar was built using the source at <ahref="http://sourceforge.net/projects/luaj/files/luaj-3.0/3.0-beta2/luaj-3.0-beta2.zip/download"rel="nofollow"class="external free">http://sourceforge.net/projects/luaj/files/luaj-3.0/3.0-beta2/luaj-3.0-beta2.zip/download</a>.
</p>
<p>
Its source is not currently included with XOWA. It is available at the following location: <ahref="https://sourceforge.net/projects/xowa/files/support/luaj/"rel="nofollow"class="external free">https://sourceforge.net/projects/xowa/files/support/luaj/</a>
if (srclen == 0) return varargsOf(src, LuaValue.ZERO); // exit early
</pre>
<h4>
<spanclass="mw-headline"id="string.format_ignores_precision_for_double_args">string.format ignores precision for double args</span>
</h4>
<ul>
<li>
fix : Convert calls will show full precision for numbers; EX:w:Tomato
</li>
<li>
file: /src/core/org/luaj/vm2/lib/StringLib.java
</li>
</ul>
<pre>
FormatDesc fdsc = new FormatDesc(args, fmt, i );
int fdsc_bgn = i;
</pre>
<prestyle='overflow:auto'>
old:
case 'G':
fdsc.format( result, args.checkdouble( arg ) );
new:
case 'G':
String fmt_str = new String(fmt.m_bytes, fdsc_bgn - 1, fdsc.length + 1); // -1 to include %; +1 to account for included %; basically get everything between % and f; EX: a%.1fb -> %.1f
return new String(bytes, offset, length, java.nio.charset.Charset.forName("UTF-8"));
}
public static int lengthAsUtf8(char[] chars) {
// COMMENTED: does not handle 3+ byte chars
// int i,b;
// char c;
// for ( i=b=chars.length; --i>=0; )
// if ( (c=chars[i]) >=0x80 )
// b += (c>=0x800)? 2: 1;
// return b;
int len = chars.length;
int rv = 0;
for (int i = 0; i < len; i++) {
int b_len = LuaString.Utf16_Len_by_char(chars[i]);
if (b_len == 4) ++i; // 4 bytes; surrogate pair; skip next char;
rv += b_len;
}
return rv;
}
public static int encodeToUtf8(char[] chars, int nchars, byte[] bytes, int off) {
// COMMENTED: does not handle 4+ byte chars; already using Encode_by_int, so might as well be consistent
// char c;
// int j = off;
// for ( int i=0; i<nchars; i++ ) {
// if ( (c = chars[i]) < 0x80 ) {
// bytes[j++] = (byte) c;
// } else if ( c < 0x800 ) {
// bytes[j++] = (byte) (0xC0 | ((c>>6) & 0x1f));
// bytes[j++] = (byte) (0x80 | ( c & 0x3f));
// } else {
// bytes[j++] = (byte) (0xE0 | ((c>>12) & 0x0f));
// bytes[j++] = (byte) (0x80 | ((c>>6) & 0x3f));
// bytes[j++] = (byte) (0x80 | ( c & 0x3f));
// }
// }
// return j - off;
int bry_idx = off;
int i = 0;
while (i < nchars) {
char c = chars[i];
int bytes_read = Utf16_Encode_char(c, chars, i, bytes, bry_idx);
bry_idx += bytes_read;
i += bytes_read == 4 ? 2 : 1; // 4 bytes; surrogate pair; skip next char;
}
return nchars; // NOTE: code returned # of bytes which is wrong; Globals.UTF8Stream.read caches rv as j which is used as index to char[] not byte[]; will throw out of bounds exception if bytes returned
else throw new RuntimeException("UTF-16 int must be between 0 and 2097152; char=" + c);
}
public static int Utf8_Len_of_char_by_1st_byte(byte b) {// SEE:w:UTF-8
int i = b & 0xff; // PATCH.JAVA:need to convert to unsigned byte
switch (i) {
case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: case 8: case 9: case 10: case 11: case 12: case 13: case 14: case 15:
case 16: case 17: case 18: case 19: case 20: case 21: case 22: case 23: case 24: case 25: case 26: case 27: case 28: case 29: case 30: case 31:
case 32: case 33: case 34: case 35: case 36: case 37: case 38: case 39: case 40: case 41: case 42: case 43: case 44: case 45: case 46: case 47:
case 48: case 49: case 50: case 51: case 52: case 53: case 54: case 55: case 56: case 57: case 58: case 59: case 60: case 61: case 62: case 63:
case 64: case 65: case 66: case 67: case 68: case 69: case 70: case 71: case 72: case 73: case 74: case 75: case 76: case 77: case 78: case 79:
case 80: case 81: case 82: case 83: case 84: case 85: case 86: case 87: case 88: case 89: case 90: case 91: case 92: case 93: case 94: case 95:
case 96: case 97: case 98: case 99: case 100: case 101: case 102: case 103: case 104: case 105: case 106: case 107: case 108: case 109: case 110: case 111:
case 112: case 113: case 114: case 115: case 116: case 117: case 118: case 119: case 120: case 121: case 122: case 123: case 124: case 125: case 126: case 127:
case 128: case 129: case 130: case 131: case 132: case 133: case 134: case 135: case 136: case 137: case 138: case 139: case 140: case 141: case 142: case 143:
case 144: case 145: case 146: case 147: case 148: case 149: case 150: case 151: case 152: case 153: case 154: case 155: case 156: case 157: case 158: case 159:
case 160: case 161: case 162: case 163: case 164: case 165: case 166: case 167: case 168: case 169: case 170: case 171: case 172: case 173: case 174: case 175:
case 176: case 177: case 178: case 179: case 180: case 181: case 182: case 183: case 184: case 185: case 186: case 187: case 188: case 189: case 190: case 191:
return 1;
case 192: case 193: case 194: case 195: case 196: case 197: case 198: case 199: case 200: case 201: case 202: case 203: case 204: case 205: case 206: case 207:
case 208: case 209: case 210: case 211: case 212: case 213: case 214: case 215: case 216: case 217: case 218: case 219: case 220: case 221: case 222: case 223:
return 2;
case 224: case 225: case 226: case 227: case 228: case 229: case 230: case 231: case 232: case 233: case 234: case 235: case 236: case 237: case 238: case 239:
return 3;
case 240: case 241: case 242: case 243: case 244: case 245: case 246: case 247:
return 4;
default: throw new RuntimeException("invalid initial utf8 byte; byte=" + b);
}
}
public static int Utf16_Decode_to_int(byte[] ary, int pos) {
byte b0 = ary[pos];
if ((b0 & 0x80) == 0) {
return b0;
}
else if ((b0 & 0xE0) == 0xC0) {
return ( b0 & 0x1f) << 6
| ( ary[pos + 1] & 0x3f)
;
}
else if ((b0 & 0xF0) == 0xE0) {
return ( b0 & 0x0f) << 12
| ((ary[pos + 1] & 0x3f) << 6)
| ( ary[pos + 2] & 0x3f)
;
}
else if ((b0 & 0xF8) == 0xF0) {
return ( b0 & 0x07) << 18
| ((ary[pos + 1] & 0x3f) << 12)
| ((ary[pos + 2] & 0x3f) << 6)
| ( ary[pos + 3] & 0x3f)
;
}
else throw new RuntimeException("invalid utf8 byte: byte=" + b0);
}
public static int Utf16_Encode_int(int c, byte[] src, int pos) {
if ((c > -1)
&& (c < 128)) {
src[ pos] = (byte)c;
return 1;
}
else if (c < 2048) {
src[ pos] = (byte)(0xC0 | (c >> 6));
src[++pos] = (byte)(0x80 | (c & 0x3F));
return 2;
}
else if (c < 65536) {
src[pos] = (byte)(0xE0 | (c >> 12));
src[++pos] = (byte)(0x80 | (c >> 6) & 0x3F);
src[++pos] = (byte)(0x80 | (c & 0x3F));
return 3;
}
else if (c < 2097152) {
src[pos] = (byte)(0xF0 | (c >> 18));
src[++pos] = (byte)(0x80 | (c >> 12) & 0x3F);
src[++pos] = (byte)(0x80 | (c >> 6) & 0x3F);
src[++pos] = (byte)(0x80 | (c & 0x3F));
return 4;
}
else throw new RuntimeException("UTF-16 int must be between 0 and 2097152; char=" + c);
}
public static int Utf16_Encode_char(int c, char[] c_ary, int c_pos, byte[] b_ary, int b_pos) {
if ((c > -1)
&& (c < 128)) {
b_ary[ b_pos] = (byte)c;
return 1;
}
else if (c < 2048) {
b_ary[ b_pos] = (byte)(0xC0 | (c >> 6));
b_ary[++b_pos] = (byte)(0x80 | (c & 0x3F));
return 2;
}
else if((c > 55295) // 0xD800
&& (c < 56320)) { // 0xDFFF
if (c_pos >= c_ary.length)
throw new RuntimeException("incomplete surrogate pair at end of string; char=" + c);
int nxt_char = c_ary[c_pos + 1];
int v = Utf16_Surrogate_merge(c, nxt_char);
b_ary[b_pos] = (byte)(0xF0 | (v >> 18));
b_ary[++b_pos] = (byte)(0x80 | (v >> 12) & 0x3F);
b_ary[++b_pos] = (byte)(0x80 | (v >> 6) & 0x3F);
b_ary[++b_pos] = (byte)(0x80 | (v & 0x3F));
return 4;
}
else {
b_ary[b_pos] = (byte)(0xE0 | (c >> 12));
b_ary[++b_pos] = (byte)(0x80 | (c >> 6) & 0x3F);
b_ary[++b_pos] = (byte)(0x80 | (c & 0x3F));
return 3;
}
}
private static int Utf16_Surrogate_merge(int hi, int lo) { // REF: http://perldoc.perl.org/Encode/Unicode.html
int bytes_len = c_might_be_utf8 ? LuaString.Utf8_Len_of_char_by_1st_byte((byte)c) : 1;
if (bytes_len > 1) { // c is 1st byte of utf8 multi-byte sequence; read required number of bytes and convert to char; EX: left-arrow is serialized in z as 226,134,144; c is currently 226; read 134 and 144 and convert to left-arrow
2015-10-11: Increased LUAI_MAXVALUES from 200 to 249 else <ref> fails because function citation0 in en.wikipedia.org/wiki/Module:Citation/CS1 uses more than 200 local variables
<ul>
<li>
See: <ahref="http://www.lua.org/source/5.1/luaconf.h.html"rel="nofollow"class="external free">http://www.lua.org/source/5.1/luaconf.h.html</a> LUAI_MAXVARS is the maximum number of local variables per function (must be smaller than 250).
<li><ahref="http://dumps.wikimedia.org/backup-index.html"title="Get wiki datababase dumps directly from Wikimedia">Wikimedia dumps</a></li>
<li><ahref="https://archive.org/search.php?query=xowa"title="Search archive.org for XOWA files">XOWA @ archive.org</a></li>
<li><ahref="http://en.wikipedia.org"title="Visit Wikipedia (and compare to XOWA!)">English Wikipedia</a></li>
</ul>
</div>
</div>
<divclass="portal"id='xowa-portal-donate'>
<h3>Donate</h3>
<divclass="body">
<ul>
<li><ahref="https://archive.org/donate/index.php"title="Support archive.org!">archive.org</a></li><!-- listed first due to recent fire damages: http://blog.archive.org/2013/11/06/scanning-center-fire-please-help-rebuild/ -->
<inputtype="image"height=14src="https://www.paypalobjects.com/en_US/i/btn/btn_donate_SM.gif"border="0"name="submit"alt="PayPal - The safer, easier way to pay online!">