tempChunk.addWord(tempWord);
string remain = src.substr(ChineseLength);
mmseg_recursion(remain, tempChunk);
tempChunk.list.pop_back();
} else {
termslist = content[singleWordStr];
set
vector
// for (setIt = termslist.begin(); setIt != termslist.end(); setIt++) {
// termsVector.push_back(*setIt);
// }
// sort(termsVector.begin(), termsVector.end());
// int sizeVec = termsVector.size();
for (setIt = termslist.begin(); setIt != termslist.end(); setIt++) {
tempStr = *setIt;
size_t foundit = src.find(tempStr, 0);
if (foundit == 0 && tempStr != singleWordStr) {
tempWord.setValue(tempStr);
tempChunk.addWord(tempWord);
if (tempChunk.getWordNumber() > (minChunkWordNumber)) {
tempChunk.list.pop_back();
return;
}
//if the term has all remain character of string
//return
if (tempStr.length() == src.length()) {
chunklist.push_back(tempChunk);
if (minChunkWordNumber > tempChunk.getWordNumber()) {
minChunkWordNumber = tempChunk.getWordNumber();
}
return;
}
string remain = src.substr(tempStr.length());
mmseg_recursion(remain, tempChunk);
tempChunk.list.pop_back();
}
}
//process the single character situation
tempStr = singleWordStr;
tempWord.setValue(tempStr);
if (tempChunk.getWordNumber() > (minChunkWordNumber)) {
tempChunk.list.pop_back();
return;
}
string remain = src.substr(tempStr.length());
mmseg_recursion(remain, tempChunk);
tempChunk.list.pop_back();
}
}
}
vector
vector
chunklist.clear();
minChunkWordNumber = 0x7ffffff0;
Chunk tempChunk;
vector
int min = 0x7fffffff;
// cout << min;
mmseg_recursion(src, tempChunk);
int chunkListSize = chunklist.size();
if (chunkListSize == 1) {
return chunklist.at(0).getVectorString();
} else {
for (int i = 0; i < chunkListSize; i++) {
if (chunklist.at(i).getWordNumber() < min) {
min = chunklist.at(i).getWordNumber();
indexInChunkList.clear();
indexInChunkList.push_back(i);
} else if (chunklist.at(i).getWordNumber() == min) {
indexInChunkList.push_back(i);
}
}
//rule 1 to find the max average length chunk
if (indexInChunkList.size() == 1) {
return chunklist.at(indexInChunkList.at(0)).getVectorString();
} else {
//rule 2 find the least variance of chunk
double minVariance = min *