详解Pinyin4.jar包下的源码(二)

2014-11-24 11:24:52 · 作者: · 浏览: 18
PinyinTable和一个静态块,如下:
private static Properties unicodeToHanyuPinyinTable = null;
static {
if (unicodeToHanyuPinyinTable == null)
initializeTable();
}
代码很容易理解,用于保存映射表的类为空时,进行下面的初始化过程:
private static void initializeTable()
{
try
{
String resourceName = "/pinyindb/unicode_to_hanyu_pinyin.txt";
unicodeToHanyuPinyinTable = new Properties();
unicodeToHanyuPinyinTable.load(new BufferedInputStream(PinyinHelper.class.getResourceAsStream("/pinyindb/unicode_to_hanyu_pinyin.txt")));
} catch (FileNotFoundException ex) {
ex.printStackTrace();
} catch (IOException ex) {
ex.printStackTrace();
}
}
上面读取根类路径下的文件,让unicodeToHanyuPinyinTable表不为空
PinyinHelper类核心方法就只有一个方法:toHanyuPinyinString,其它方法都是辅助性方法,原理见下面注释
public static String toHanyuPinyinString(String str, HanyuPinyinOutputFormat outputFormat, String seperater)//传入一个包含汉语的字符序列,一个格式类,一个分格符(自定义,随便写)
{
StringBuffer resultPinyinStrBuf = new StringBuffer(); //保存序列的缓存
for (int i = 0; i < str.length(); i++) {//遍历字符序列
int codepointOfChar = str.codePointAt(i);//获取对应位的unicode编码整型值
String mainPinyinStrOfChar = getFirstHanyuPinyinString(codepointOfChar, outputFormat);//根据格式类的三个属性类的形式,以及对应unicode编码,在映射文件里边查询出第一个拼音字符串
if (mainPinyinStrOfChar != null) {
resultPinyinStrBuf.append(mainPinyinStrOfChar);//加入到缓存
if (i != str.length() - 1)
resultPinyinStrBuf.append(seperater);//添加分隔符
}
else {
resultPinyinStrBuf.append(str.charAt(i));
}
}
return resultPinyinStrBuf.toString();//return
}
下面讲一下,上面调用getFirstHanyuPinyinString方法
private static String getFirstHanyuPinyinString(int codepoint, HanyuPinyinOutputFormat outputFormat)
{
String[] pinyinStrArray = getHanyuPinyinStringArray(codepoint, outputFormat);
if ((pinyinStrArray != null) && (pinyinStrArray.length > 0)) {//查映射表,返回拼音数组的第一个或null
return pinyinStrArray[0];
}
return null;
}
其中getHanyuPinyinStringArray方法,获取映射表对应的拼单数组, 源码如下:
private static String[] getHanyuPinyinStringArray(int codepoint, HanyuPinyinOutputFormat outputFormat)
{
String pinyinRecord = getHanyuPinyinRecord(codepoint);//根据具体的unicode获取对应的拼音字符串,去掉括号的形式,形如:4E36 (zhu3,dian3) ,则得到"(zhu3,dian3)"这个字什
if (pinyinRecord != null) {
int indexOfLeftBracket = pinyinRecord.indexOf("(");
int indexOfRightBracket = pinyinRecord.lastIndexOf(")");
String stripedString = pinyinRecord.substring(indexOfLeftBracket + 1, indexOfRightBracket);//去掉左右括号,以上面的例子,获得"zhu3,dian3"; www.2cto.com
//下面根据outputFormat格式对象,对结果进行格式公替换操作
if (HanyuPinyinVCharType.WITH_V == outputFormat.getVCharType())
stripedString = stripedString.replaceAll("u:", "v");
else if (HanyuPinyinVCharType.WITH_U_UNICODE == outputFormat.getVCharType()) {
stripedString = stripedString.replaceAll("u:", "ü");
}
if (HanyuPinyinToneType.WITHOUT_TONE == outputFormat.getToneType()) {
stripedString = stripedString.replaceAll("[1-5]", "");
}
if (HanyuPinyinCaseType.UPPERCASE == outputFormat.getCaseType()) {
stripedStr