return new String(bytes, offset, length, java.nio.charset.Charset.forName("UTF-8"));
}
public static int lengthAsUtf8(char[] chars) {
// COMMENTED: does not handle 3+ byte chars
// int i,b;
// char c;
// for ( i=b=chars.length; --i>=0; )
// if ( (c=chars[i]) >=0x80 )
// b += (c>=0x800)? 2: 1;
// return b;
int len = chars.length;
int rv = 0;
for (int i = 0; i < len; i++)
rv += LuaString.Utf8_len_by_int(chars[i]);
return rv;
}
public static int encodeToUtf8(char[] chars, int nchars, byte[] bytes, int off) {
// COMMENTED: does not handle 4+ byte chars; already using Encode_by_int, so might as well be consistent
// char c;
// int j = off;
// for ( int i=0; i<nchars; i++ ) {
// if ( (c = chars[i]) < 0x80 ) {
// bytes[j++] = (byte) c;
// } else if ( c < 0x800 ) {
// bytes[j++] = (byte) (0xC0 | ((c>>6) & 0x1f));
// bytes[j++] = (byte) (0x80 | ( c & 0x3f));
// } else {
// bytes[j++] = (byte) (0xE0 | ((c>>12) & 0x0f));
// bytes[j++] = (byte) (0x80 | ((c>>6) & 0x3f));
// bytes[j++] = (byte) (0x80 | ( c & 0x3f));
// }
// }
// return j - off;
int bry_idx = off;
for (int i = 0; i < nchars; i++ ) {
int c = chars[i];
int c_len_in_bytes = LuaString.Utf8_encode_by_int(c, bytes, bry_idx);
bry_idx += c_len_in_bytes;
}
return nchars; // NOTE: code returned # of bytes which is wrong; Globals.UTF8Stream.read caches rv as j which is used as index to char[] not byte[]; will throw out of bounds exception if bytes returned
}
public static int Utf8_len_by_int(int charAsInt) {
if (charAsInt < 0x80) return 1;
else if (charAsInt < (1 << 11)) return 2;
else if (charAsInt < (1 << 16)) return 3;
else if (charAsInt < (1 << 21)) return 4;
else if (charAsInt < (1 << 26)) return 5;
else return 6;
}
public static int Utf8_len_by_byte(byte b) {
int i = b & 0xff;
switch (i) {
case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: case 8: case 9: case 10: case 11: case 12: case 13: case 14: case 15:
case 16: case 17: case 18: case 19: case 20: case 21: case 22: case 23: case 24: case 25: case 26: case 27: case 28: case 29: case 30: case 31:
case 32: case 33: case 34: case 35: case 36: case 37: case 38: case 39: case 40: case 41: case 42: case 43: case 44: case 45: case 46: case 47:
case 48: case 49: case 50: case 51: case 52: case 53: case 54: case 55: case 56: case 57: case 58: case 59: case 60: case 61: case 62: case 63:
case 64: case 65: case 66: case 67: case 68: case 69: case 70: case 71: case 72: case 73: case 74: case 75: case 76: case 77: case 78: case 79:
case 80: case 81: case 82: case 83: case 84: case 85: case 86: case 87: case 88: case 89: case 90: case 91: case 92: case 93: case 94: case 95:
case 96: case 97: case 98: case 99: case 100: case 101: case 102: case 103: case 104: case 105: case 106: case 107: case 108: case 109: case 110: case 111:
case 112: case 113: case 114: case 115: case 116: case 117: case 118: case 119: case 120: case 121: case 122: case 123: case 124: case 125: case 126: case 127:
case 128: case 129: case 130: case 131: case 132: case 133: case 134: case 135: case 136: case 137: case 138: case 139: case 140: case 141: case 142: case 143:
case 144: case 145: case 146: case 147: case 148: case 149: case 150: case 151: case 152: case 153: case 154: case 155: case 156: case 157: case 158: case 159:
case 160: case 161: case 162: case 163: case 164: case 165: case 166: case 167: case 168: case 169: case 170: case 171: case 172: case 173: case 174: case 175:
case 176: case 177: case 178: case 179: case 180: case 181: case 182: case 183: case 184: case 185: case 186: case 187: case 188: case 189: case 190: case 191:
return 1;
case 192: case 193: case 194: case 195: case 196: case 197: case 198: case 199: case 200: case 201: case 202: case 203: case 204: case 205: case 206: case 207:
case 208: case 209: case 210: case 211: case 212: case 213: case 214: case 215: case 216: case 217: case 218: case 219: case 220: case 221: case 222: case 223:
return 2;
case 224: case 225: case 226: case 227: case 228: case 229: case 230: case 231: case 232: case 233: case 234: case 235: case 236: case 237: case 238: case 239:
return 3;
case 240: case 241: case 242: case 243: case 244: case 245: case 246: case 247:
return 4;
case 248: case 249: case 250: case 251:
return 5;
case 252: case 253:
return 6;
case 254: case 255:
default:
return 6;
}
}
public static int Utf8_decode_to_int(byte[] ary, int pos) {
byte b0 = ary[pos];
if ((b0 & 0x80) == 0) {
return b0;
}
else if ((b0 & 0xE0) == 0xC0) {
return ( b0 & 0x1f) << 6
| ( ary[pos + 1] & 0x3f)
;
}
else if ((b0 & 0xF0) == 0xE0) {
return ( b0 & 0x0f) << 12
| ((ary[pos + 1] & 0x3f) << 6)
| ( ary[pos + 2] & 0x3f)
;
}
else if ((b0 & 0xF8) == 0xF0) {
return ( b0 & 0x07) << 18
| ((ary[pos + 1] & 0x3f) << 12)
| ((ary[pos + 2] & 0x3f) << 6)
| ( ary[pos + 3] & 0x3f)
;
}
else if ((b0 & 0xFC) == 0xF8) {
return ( b0 & 0x03) << 24
| ((ary[pos + 1] & 0x3f) << 18)
| ((ary[pos + 2] & 0x3f) << 12)
| ((ary[pos + 3] & 0x3f) << 6)
| ( ary[pos + 4] & 0x3f)
;
}
else if ((b0 & 0xFC) == 0xFC) {
return ( b0 & 0x03) << 30
| ((ary[pos + 1] & 0x3f) << 24)
| ((ary[pos + 2] & 0x3f) << 18)
| ((ary[pos + 3] & 0x3f) << 12)
| ((ary[pos + 4] & 0x3f) << 6)
| ( ary[pos + 5] & 0x3f)
;
}
else {
return b0 & 0xFF;
}
}
public static int Utf8_encode_by_int(int charAsInt, byte[] src, int pos) {
int bytes_len = c_might_be_utf8 ? LuaString.Utf8_len_by_byte((byte)c) : 1;
if (bytes_len > 1) { // c is 1st byte of utf8 multi-byte sequence; read required number of bytes and convert to char; EX: ← is serialized in z as 226,134,144; c is currently 226; read 134 and 144 and convert to ←
temp_bry[0] = (byte)c;
for (int i = 1; i < bytes_len; i++) {
nextChar();
temp_bry[i] = (byte)current;
}
c = LuaString.Utf8_decode_to_int(temp_bry, 0);
}
if ( buff == null || nbuff + 1 > buff.length )
buff = LuaC.realloc( buff, nbuff*2+1 );
buff[nbuff++] = (char)c;
}
private static byte[] temp_bry = new byte[6];
</pre>
=== build.xml ===
* note: this change is needed to get luaj to compile with the String.format(String, double) call