當前位置：首頁 > 编程资源 > 编程问答 >内容正文

编程问答

统计字符串中单词个数的算法优化

發布時間：2023/12/19 编程问答 27 豆豆

生活随笔收集整理的這篇文章主要介紹了统计字符串中单词个数的算法优化小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

要求：輸入一個字符串，統計每個單詞的個數。單詞間用空格隔開，可多個空格，寫出自己認為高效的算法。

例如：輸入：I love love China
輸出為：
I： 1
love： 2
China： 1

首先想到的還是模擬的方法，就是用struct把出現過的單詞緩存起來，然后再輸入文本中遍歷到新單詞的時候，遍歷一次struct，看這個單詞是不是已經存，做相關處理。
如果輸入文本中有n個字母，不重復的字母為m個，則算法復雜度為O(nm^2) 最好情況是m =1 ，最差情況是m=n 其實現代碼如下：

1
2 #include <stdio.h>
3 #include <string.h>
4 ?struct struct_words{
5 char word[20];
6 int count;
7 };
8 ?int main(){
9 char string[100];
10 char c;
11 struct struct_words words[20];
12 int i = 0, k = 0 , ws =0;
13
14 for(; i < 20; i++){
15 words[i].word[0] = '\0';
16 words[i].count = 0;
17 }
18 puts("please input words.");
19 gets(string);
20 puts("=============開始取詞================");
21
22 i = 0;
23 do{
24 c = string[i];
25 if(c != ' ' && c !='\0'){
26 words[k].word[ws] = c;
27 words[k].count = 1;
28 ws ++;
29 }else{
30 words[k].word[ws] = '\0';
31 ws = 0;
32 k ++;
33 }
34 i ++;
35 }while(c!='\0');lda
36
37
38 puts("=========== 合并相同的單詞 ==============");
39 for(i = 0; words[i].word[0] != '\0' ; i++){
40 puts(words[i].word);
41 if( words[i].count >= 1)
42 for(k = i; words[k].word[0] != '\0'; k++){
43 if(strcmp(words[i].word, words[k].word) == 0
44 && words[k].count == 1){
45 words[k].count --;
46 words[i].count ++;
47 }
48 }
49 }
50
51 puts("=============== End ==============");
52 for(i = 0;words[i].word[0] != '\0' ;i++){
53 if(words[i].count != 0 )
54 printf("%s:\t\t%d\n",words[i].word, words[i].count);
55 }
56 return(0);
57 }

然后呢，做一下優化，恩路是遍歷用戶的輸入文本是必須的，但是，單詞的緩存和出現次數的統計是可以使用hash算法來優化的，借用hash算法的特性，使復雜度立刻就降低到了 O(n),實現代碼如下：

#include <stdio.h>
#include <string.h>
#define N 100

struct struct_words{
char word[100];
int count;
};

int hash(char* key)
{
unsigned long h=0;
while(*key)
{
h=(h<<4)+*key++;
unsigned long g=h & 0xF0000000L;
if(g)
h^=g>>24;
h&=~g;
}
return h&N;
}
int main(){
char string[1000];
char current_word[100];
char c;
struct struct_words words[200];
int i = 0, k = 0 , ws =0 , key;
int keys[100];

for(; i < 200; i++){
words[i].word[0] = '\0';
words[i].count = 0;
}
puts("=============輸入一些單詞，用空格隔開================");
gets(string);

i = 0;
do{
c = string[i];
//如果第一個單詞前有空格，跳過去
if( ws == 0 && c == ' ') {i++ ; continue;}
if(c != ' ' && c !='\0'){
current_word[ws] = c;
ws ++;
}else{
current_word[ws] = '\0';
key = hash(current_word);
if(words[key].count == 0){
strcpy(words[key].word, current_word);
keys[k] = key;
k++;
}
words[key].count ++;
ws = 0;
}
i ++;
}while(c != '\0');

printf("%d" ,k);
puts("===============打印結果 ==============");
for(i = 0 ; i < k ;i++){
printf("%s:\t\t%d\n",words[keys[i]].word, words[keys[i]].count);
}
puts("=============== End ==============");
return 0;
}

呵呵，弄了近三個小時，發現Linux下gdb不熟太痛苦了，加油！

轉載于:https://www.cnblogs.com/amboyna/archive/2009/12/05/1617387.html

總結

以上是生活随笔為你收集整理的统计字符串中单词个数的算法优化的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇： iPhone设置黑名单防垃圾短信骚扰
下一篇：自用Excel VBA技巧整理