全文检索模型算法实现 (一)

2014-11-24 02:46:51 · 作者: · 浏览: 5


全文检索是一种将文件中所有文本与检索项匹配的文字资料检索方法。全文检索系统是按照全文检索理论建立起来的用于提供全文检索服务的软件系统。


[cpp]
#include
#include
#include

//Status是函数的类型,其值是函数结果状态代码
typedef int Status;
#define INT_MAX 32767
#define LINESIZE 80//设一行字符数为80
#define MAXKEYLEN 26//关键字的长度
#define MAXNUM 100//统计单词的最大数
typedef struct{
char *ch; //关键字
int num; //关键字的长度
char *info;//关键字有关信息
}KeysType; //关键字类型

typedef enum{LEAF,BRANCH} NodeKind;//结点种类:{叶子,分支}

typedef struct DLTNode{
char symbol;
struct DLTNode *next; //指向兄弟结点的指针
NodeKind kind; //结点标志
union{
struct DLTNode *first;//分支结点的孩子链指针
struct {
int idx; //叶子结点的count数组下标指针
KeysType infoptr;
};
};
}DLTNode,*DLTree; //双链树类型
char line[LINESIZE];//用于缓存文章中每行的字符串
struct{
int times;
KeysType info;
}count[MAXNUM];
int NUM=0;//记录整个文章的单词总数
char ch1[17][100]={"a","an","the","them","there","here","they","are","that","this","those","what","which","why","then","and","these"};

void InitTree(DLTree &root){
//初始化键树
root=new DLTNode;
root->next =NULL;
root->first=NULL;
}//InitTree

Status Insert_DLTree(DLTree &root,KeysType K,int &n){
//指针root所指双链树中已含n个关键字,若不存在和K相同的关键字,
//则将关键字K插入到双链树中相应位置,树中关键字个数n增1且返回TRUE;..
//否则不再插入返回FALSE
DLTree p=root->first,f=root,pre,s;
int j=0;
while (p && j pre=NULL;
while (p && p->symbol pre=p;
p=p->next ;
}
if (p && p->symbol ==K.ch[j]){
f=p;
p=p->first ;
j++;
}//找到后进入键树的下一层,即查找和K.ch[j+1]相同的结点
else{//没有找到和K.ch[j]相同的结点,插入K.ch[j]
s=new DLTNode;
s->kind =BRANCH;
s->symbol =K.ch [j++];
if (pre) pre->next =s;
else f->first =s;
s->next =p;
s->first =NULL;
p=s;
break;
}//else
}//while
if (p&&j==K.num && p->first&&p->first ->kind ==LEAF) return FALSE;
else{//键树中已存在相同前缀的单词,插入由剩余字符构成的单支树
while (j<=K.num ){
s=new DLTNode;
s->next =NULL;
if (p){
s->next =p->next ;
p->first =s;
p=s;
}
else {
f->first =s;
p=s;
}
if(j s->kind =BRANCH;
s->symbol =K.ch [j++];
s->first =NULL;
}
else{
s->symbol ='$';
s->kind =LEAF;
n++;
s->idx =n;
s->infoptr.ch =count[s->idx ].info.ch =K.ch ;
s->infoptr.info =count[s->idx ].info.info =K.info ;
s->infoptr.num =count[s->idx ].info.num=K.num ;
count[s->idx ].times=0;
j++;
}
}//while
return TRUE;
}//else
}//Insert_DLTree


void CreatDLTree(DLTree &T,KeysType *key,int &n){
//建立键树模型
for (int i=0;i<=16;i++){
key[i].ch=ch1[i];
key[i].info=NULL;
key[i].num =strlen(key[i].c