[java]
public static Encoding determineEncoding(RandomAccessFile file) {
Encoding enc = Encoding.GBK;
try {
file.seek(0);
if(file.length() < 3) return enc;
byte[] bom = new byte[3]; //byte order mark
file.read(bom);
if((bom[0] & 0XFF) == 0xFF && (bom[1] & 0XFF) == 0xFE)
enc = Encoding.UTF16LE;
else if((bom[0] & 0XFF) == 0xFE && (bom[1] & 0XFF) == 0xFF)
enc = Encoding.UTF16BE;
else if((bom[0] & 0XFF) == 0xEF && (bom[1] & 0XFF) == 0xBB && (bom[2] & 0XFF) == 0xBF)
enc = Encoding.UTF8;
else {//test if the file is encoded using GBK or BIG5 character set
int gbkCount = 0;
int big5Count = 0;
int utf16leCount = 0;
int utf16beCount = 0;
int utf8Count = 0;
file.seek(0);
byte[] bs = new byte[4096];
file.read(bs);
int len = bs.length - 2;
//look up the Chinese characters "
for(int i = 0; i < len; ++i) {
if((bs[i] & 0xFF) == 0xB5 && (bs[i + 1] & 0xFF) == 0xC4) {
++gbkCount;
++i;
} else if ((bs[i] & 0xFF) == 0xE7 && (bs[i + 1] & 0xFF) == 0x9A && (bs[i + 2] & 0xFF) == 0x84) {
++utf8Count;
i += 2;
} else if ((bs[i] & 0xFF) == 0x84 && (bs[i + 1] & 0xFF) == 0x76) {
++utf16leCount;
++i;
++utf16beCount;
++i;
} else if ((bs[i] & 0xFF) == 0xAA && (bs[i + 1] & 0xFF) == 0xBA) {
++big5Count;
++i;
}
}
if(gbkCount > utf8Count && gbkCount > big5Count && gbkCount > utf16leCount && gbkCount > utf16beCount)
enc = Encoding.GBK;
else if(utf8Count > gbkCount && utf8Count > big5Count && utf8Count > utf16leCount && utf8Count > utf16beCount)
enc = Encoding.UTF8;
else if(utf16leCount > gbkCount && utf16leCount > big5Count && utf16leCount > utf8Count && utf16leCount > utf16beCount)
enc = Encoding.UTF16LE;
else if(utf16beCount > gbkCount && utf16beCount > big5Count && utf16beCount > utf16leCount && utf16beCount > utf16leCount)
enc = Encoding.UTF16BE;
else if(big5Count > gbkCount && big5Count > utf8Count && big5Count > utf16leCount && big5Count > utf16beCount)
enc = Encoding.BIG5;
}
} catch (Exception ex) {
Log.e("File ERROR", "encoding detection failed.");
}
return enc;
}
public static Encoding determineEncoding(RandomAccessFile file) {
Encoding enc = Encoding.GBK;
try {
file.seek(0);
if(file.length() < 3) return enc;
byte[] bom = new byte[3]; //byte order mark
file.read(bom);
if((bom[0] & 0XFF) == 0xFF && (bom[1] & 0XFF) == 0xFE)
enc = Encoding.UTF16LE;
else if((bom[0] & 0XFF) == 0xFE && (bom[1] & 0XFF) == 0xFF)
en