✎ 编程开发网

JAVA版StarDict星际译王简单实现(二)

2014-11-24 11:20:01 · 作者: · 浏览: 308

标签: JAVA StarDict 星际简单实现

到单词字符长度

return flag;

}

/**

* 通过偏移位置offset和长度length 来从dict文件中获取data内容UTF-8编码的字符

* @param offset 要读取的内容的起始偏移，为字节数

* @param length 要读取的内容的数据块大小，为字节数

* @return 字节数组的data int

public static byte[] get_data(int[] offset, int[] length)

{

long oft = offset[0];

long len = length[0];

long skip;

byte data_buf[] = new byte[length[0]];

System.out.println("This word's" + "offset:" + offset[0] + "len:"

+ length[0]);

try

{

isdict.reset();

long valuedata = isdict.available();

if (valuedata < oft + len)

{

System.out.println("No so much value data! " + valuedata);

}

// skip=isdict.skip(oft);

skip = skipBytesFromStream(isdict, oft);

if (skip != oft)

{

System.out.println("Skip" + skip + " dict file error!");

}

if (isdict.read(data_buf) == -1)

{

System.out.println("Arrive at the end of file!");

}

// // Unicode

// StringBuffer sb = new StringBuffer();

// int size =isdict.read(data_buf);

// for (int j = 0; j < size;)

// {

// int l = data_buf[j++];

// int h = data_buf[j++];

// char c = (char) ((l & 0xff) | ((h << 8) & 0xff00));

// sb.append(c);

// }

// // return sb.toString();

}

catch (Exception e)

{

data_buf = null;

System.out.println("dict file read error!");

e.printStackTrace();

}

if (data_buf == null)

{

return null;

}

return data_buf;

}

/**

* utf8解码参考自http://hi.baidu.com/leo10086/item/d6853813373b19001994ec24 用法:

* 假如 newContent 为UTF8编码的字符串 byte[] b = newContent.getBytes(); newContent =

* URLEncoder.UTF8Decode( b, 0, b.length );

* @param in 要进行解码的UTF8编码的字节数组

* @param offset

* @param length

* @return

public static String UTF8Decode(byte in[], int offset, int length)

{

StringBuffer buff = new StringBuffer();

int max = offset + length;

for (int i = offset; i < max; i++)

{

char c = 0;

if ((in[i] & 0x80) == 0)

{

c = (char) in[i];

} else if ((in[i] & 0xe0) == 0xc0) // 11100000

{

c |= ((in[i] & 0x1f) << 6); // 00011111

i++;

c |= ((in[i] & 0x3f) << 0); // 00111111

} else if ((in[i] & 0xf0) == 0xe0) // 11110000

{

c |= ((in[i] & 0x0f) << 12); // 00001111

i++;

c |= ((in[i] & 0x3f) << 6); // 00111111

i++;

c |= ((in[i] & 0x3f) << 0); // 00111111

} else if ((in[i] & 0xf8) == 0xf0) // 11111000

{

c |= ((in[i] & 0x07) << 18); // 00000111 (move 18, not 16 )

i++;

c |= ((in[i] & 0x3f) << 12); // 00111111

i++;

c |= ((in[i] & 0x3f) << 6); // 00111111

i++;

c |= ((in[i] & 0x3f) << 0); // 00111111

} else

{

c = ' ';

}

buff.append(c);

}

return buff.toString();

}

public static byte[] UTF8Encode(String str)

{

ByteArrayOutputStream bos = new ByteArrayOutputStream();

try

{

int strlen = str.length();

for (int i = 0; i < strlen; i++)

{

char t = str.charAt(i);

int c = 0;

c |= (t & 0xffff);

if (c >= 0 && c < 0x80)

{

bos.write((byte) (c & 0xff));

} else if (c > 0x7f && c < 0x800)

{

bos.write((byte) (((c >>> 6) & 0x1f) | 0xc0));

bos.write((byte) (((c >>> 0) & 0x3f) | 0x80));

} else if (c > 0x7ff && c < 0x10000)

{

bos.write((byte) (((c >>> 12) & 0x0f) | 0xe0)); // <--

// correction

// (mb)

bos.write((byte) (((c >>> 6) & 0x3f) | 0x80));

bos.write((byte) (((c >>> 0) & 0x3f) | 0x80));

} else if (c > 0x00ffff && c < 0xfffff)

{

bos.write((byte) (((c >>> 18) & 0x07) | 0xf0));

bos.write((byte) (((c >>> 12) & 0x3f) | 0x80));

bos.write((byte) (((c >>> 6) & 0x3f) | 0x80));

bos.write((byte) (((c >>> 0) & 0x3f) | 0x80));

}

bos.flush();

}

catch (Exception e)

{

}

return bos.toByteArray();

}

/**

* 将UTF-8字节数据转化为Unicode字

首页上一页 1 2 3 4 5 6 下一页尾页 2/6/6

上一篇为什么Java要增加lambda表达式

下一篇 flyway框架对数据库迁徙的支持配..