java unicode 编码转换两种方法(二)

2014-11-24 03:07:57 · 作者: · 浏览: 1
ffer.append('\\'); outBuffer.append('t');

break;

case '\n':outBuffer.append('\\'); outBuffer.append('n');

break;

case '\r':outBuffer.append('\\'); outBuffer.append('r');

break;

case '\f':outBuffer.append('\\'); outBuffer.append('f');

break;

case '=': // Fall through

case ':': // Fall through

case '#': // Fall through

case '!':

outBuffer.append('\\'); outBuffer.append(aChar);

break;

default:

if ((aChar < 0x0020) || (aChar > 0x007e)) {

outBuffer.append('\\');

outBuffer.append('u');

outBuffer.append(toHex((aChar >> 12) & 0xF));

outBuffer.append(toHex((aChar >> & 0xF));

outBuffer.append(toHex((aChar >> 4) & 0xF));

outBuffer.append(toHex( aChar & 0xF));

} else {

outBuffer.append(aChar);

}

}

}

return outBuffer.toString();

}

public static String fromUnicode(String str) {

return fromUnicode(str.toCharArray(), 0, str.length(), new char[1024]);

}

/*

* Converts encoded \uxxxx to unicode chars

* and changes special saved chars to their original forms

*/

public static String fromUnicode(char[] in, int off, int len, char[] convtBuf) {

if (convtBuf.length < len) {

int newLen = len * 2;

if (newLen < 0) {

newLen = Integer.MAX_VALUE;

}

convtBuf = new char[newLen];

}

char aChar;

char[] out = convtBuf;

int outLen = 0;

int end = off + len;

while (off < end) {

aChar = in[off++];

if (aChar == '\\') {

aChar = in[off++];

if (aChar == 'u') {

// Read the xxxx

int value = 0;

for (int i = 0; i < 4; i++) {

aChar = in[off++];

switch (aChar) {

case '0':

case '1':

case '2':

case '3':

case '4':

case '5':

case '6':

case '7':

case '8':

case '9':

value = (value << 4) + aChar - '0';

break;

case 'a':

case 'b':

case 'c':

case 'd':

case 'e':

case 'f':

value = (value << 4) + 10 + aChar - 'a';

break;

case 'A':

case 'B':

case 'C':

case 'D':

case 'E':

case 'F':

value = (value << 4) + 10 + aChar - 'A';

break;

default:

throw new IllegalArgumentException(

"Malformed \\uxxxx encoding.");

}

}

out[outLen++] = (char) value;

} else {

if (aChar == 't') {

aChar = '\t';

} else if (aChar == 'r') {

aChar = '\r';

} else if (aChar == 'n') {

aChar = '\n';

} else if (aChar == 'f') {

aChar = '\f';

}

out[outLen++] = aChar;

}

} else {

out[outLen++] = (char) aChar;

}

}

return new String(out, 0, outLen);

}

}

五、附加

下例是一个说明:无论字符原来用何种本地字符集表示,在Unicode字符集中都被表示成相同的编码。或者说,Unicode字符集和语言的种类无关。

public class ch {

public static void main(String[] args) {

// TODO Auto-generated method stub

try {

String str = "中";

String CHARSET = "GB2312";

// String CHARSET = "SHIFT-JIS";

char nativeChars[] = str.toCharArray();

java.nio.charset.Charset nativeCharset = java.nio.charset.Charset.forName(CHARSET);

java.nio.CharBuffer nativeCharBuffer = java.nio.CharBuffer.wrap(nativeChars);

java.nio.charset.CharsetEncoder encoder = nativeCharset.newEncoder();

java.nio.ByteBuffer nativeBytebuffer = encoder.encode(nativeCharBuffer);

byte[] nativeBytes = nativeBytebuffer.array();

System.out.println("\n#----- " + CHARSET + " encoding output -----#");

for (int i = 0; i < nativeBytes.length; i++) {

System.out.print(Integer.toHexString('\u00FF' & nativeBytes[i]).toUpperCase());

}

java.nio.charset.CharsetDecoder unicodeDecoder = nativeCharset.newDecoder();

java.nio.CharBuffer unicodeCharbuffer = unicodeDecoder.decode(nativeBytebuffer);

char unicodeChars[] = unicodeCharbuffer.array();

System.out.println("\n#----- Unicode encoding output -----#");

for (int i = 0; i < unicodeChars.length; i++) {

System.out.print(In