读书人

字符串处置-Trie树

发布时间: 2012-08-29 08:40:14 作者: rapoo

字符串处理----Trie树

Trie树,又称单词查找树键树,是一种树形结构,是一种哈希树的变种。典型应用是用于统计和排序大量的字符串(但不仅限于字符串),所以经常被搜索引擎系统用于文本词频统计。它的优点是:最大限度地减少无谓的字符串比较,查询效率比哈希表高。

特性:

1)根节点不包含字符,除根节点外每一个节点都只包含一个字符。

#include<stdio.h>#include<malloc.h>#define NUMBER 26#define WORD_LENGTH 15 struct TrieNode{int count;struct TrieNode * next[NUMBER];};struct TrieNode * root=NULL;void Insert_TrieNode(char *chr){int index,i; struct TrieNode * cur =root,*node;while(*chr!='\0'){index= *chr-'a';if(cur->next[index]==NULL){node=(struct TrieNode*)malloc(sizeof(struct TrieNode));node->count=0;for(i=0;i<NUMBER;i++){node->next[i]=NULL;}cur->next[index]=node;}cur=cur->next[index];chr++;}cur->count++;}void Search_Word(char *chr){int index;char *ch=chr;structTrieNode * cur =root;while(*chr){ index = *chr-'a';if(cur->next[index]==NULL){printf("Not find word %s in Trie Tree\n",chr);return;}cur=cur->next[index];chr++;}printf("Success,word %s=%d times\n",ch,cur->count);}void DFS_Trie(struct TrieNode *node,char *queue){int i;char *head=queue;for(i=0;i<NUMBER;i++){if(node->next[i]!=NULL){while(*queue!='\0')queue++;*queue=(char)(i+'a');queue=head;if(node->next[i]->count>0){printf("%s\t%d\n",queue,node->next[i]->count);}//else//{DFS_Trie(node->next[i],queue);//}head=queue;while(*(queue+1)!='\0')queue++;*queue='\0';queue=head;}}}int main(){int i;char *chr[]={"inn", "intt", "at", "age", "adv", "ant","age"};char *queue = (char*)calloc(WORD_LENGTH,sizeof(char));root = (struct TrieNode*)malloc(sizeof(struct TrieNode));for(i=0;i<NUMBER;i++){root->next[i]=NULL;}for(i=0;i<7;i++){Insert_TrieNode(chr[i]);}Search_Word("age");Search_Word("hello");DFS_Trie(root,queue);return 0;}void ToLower(char *ch){while(*ch!='\0'){if('A'<=*ch&&*ch<='Z')*ch=*ch+32;ch++;}}


读书人网 >互联网

热点推荐