日韩性视频-久久久蜜桃-www中文字幕-在线中文字幕av-亚洲欧美一区二区三区四区-撸久久-香蕉视频一区-久久无码精品丰满人妻-国产高潮av-激情福利社-日韩av网址大全-国产精品久久999-日本五十路在线-性欧美在线-久久99精品波多结衣一区-男女午夜免费视频-黑人极品ⅴideos精品欧美棵-人人妻人人澡人人爽精品欧美一区-日韩一区在线看-欧美a级在线免费观看

歡迎訪問 生活随笔!

生活随笔

當前位置: 首頁 > 编程资源 > 编程问答 >内容正文

编程问答

hash编码

發布時間:2023/12/10 编程问答 23 豆豆
生活随笔 收集整理的這篇文章主要介紹了 hash编码 小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
?

常用的字符串Hash函數還有ELFHash,APHash等等,都是十分簡單有效的方法。這些函數使用位運算使得每一個字符都對最后的函數值產生影響。另外還有以MD5和SHA1為代表的雜湊函數,這些函數幾乎不可能找到碰撞。

常用字符串哈希函數有 BKDRHash,APHash,DJBHash,JSHash,RSHash,SDBMHash,PJWHash,ELFHash等等。對于以上幾種哈希函數,我對其進行了一個小小的評測。

Hash函數數據1數據2數據3數據4數據1得分數據2得分數據3得分數據4得分平均分
BKDRHash20477448196.5510090.9582.0592.64
APHash23475449396.5588.4610051.2886.28
DJBHash22497547496.5592.31010083.43
JSHash14476150610084.6296.8317.9581.94
RSHash10486150510010051.5820.5175.96
SDBMHash32484950493.192.3157.0123.0872.41
PJWHash302648785130043.89021.95
ELFHash302648785130043.89021.95

其中數據1為100000個字母和數字組成的隨機串哈希沖突個數。數據2為100000個有意義的英文句子哈希沖突個數。數據3為數據1的哈希值與 1000003(大素數)求模后存儲到線性表中沖突的個數。數據4為數據1的哈希值與10000019(更大素數)求模后存儲到線性表中沖突的個數。

經過比較,得出以上平均得分。平均數為平方平均數??梢园l現,BKDRHash無論是在實際效果還是編碼實現中,效果都是最突出的。APHash也是較為優秀的算法。DJBHash,JSHash,RSHash與SDBMHash各有千秋。PJWHash與ELFHash效果最差,但得分相似,其算法本質是相似的。

HashFun.h

unsigned?int?SDBMHash(char?*str)
{
????unsigned?int?hash?=?0;

????while?(*str)
????{
????????//?equivalent?to:?hash?=?65599*hash?+?(*str++);
????????hash?=?(*str++)?+?(hash?<<?6)?+?(hash?<<?16)?-?hash;
????}

????return?(hash?&?0x7FFFFFFF);
}

//?RS?Hash?Function
unsigned?int?RSHash(char?*str)
{
????unsigned?int?b?=?378551;
????unsigned?int?a?=?63689;
????unsigned?int?hash?=?0;

????while?(*str)
????{
????????hash?=?hash?*?a?+?(*str++);
????????a?*=?b;
????}

????return?(hash?&?0x7FFFFFFF);
}

//?JS?Hash?Function
unsigned?int?JSHash(char?*str)
{
????unsigned?int?hash?=?1315423911;

????while?(*str)
????{
????????hash?^=?((hash?<<?5)?+?(*str++)?+?(hash?>>?2));
????}

????return?(hash?&?0x7FFFFFFF);
}

//?P.?J.?Weinberger?Hash?Function
unsigned?int?PJWHash(char?*str)
{
????unsigned?int?BitsInUnignedInt?=?(unsigned?int)(sizeof(unsigned?int)?*?8);
????unsigned?int?ThreeQuarters????=?(unsigned?int)((BitsInUnignedInt??*?3)?/?4);
????unsigned?int?OneEighth????????=?(unsigned?int)(BitsInUnignedInt?/?8);
????unsigned?int?HighBits?????????=?(unsigned?int)(0xFFFFFFFF)?<<?(BitsInUnignedInt?-?OneEighth);
????unsigned?int?hash?????????????=?0;
????unsigned?int?test?????????????=?0;

????while?(*str)
????{
????????hash?=?(hash?<<?OneEighth)?+?(*str++);
????????if?((test?=?hash?&?HighBits)?!=?0)
????????{
????????????hash?=?((hash?^?(test?>>?ThreeQuarters))?&?(~HighBits));
????????}
????}

????return?(hash?&?0x7FFFFFFF);
}

//?ELF?Hash?Function
unsigned?int?ELFHash(char?*str)
{
????unsigned?int?hash?=?0;
????unsigned?int?x????=?0;

????while?(*str)
????{
????????hash?=?(hash?<<?4)?+?(*str++);
????????if?((x?=?hash?&?0xF0000000L)?!=?0)
????????{
????????????hash?^=?(x?>>?24);
????????????hash?&=?~x;
????????}
????}

????return?(hash?&?0x7FFFFFFF);
}

//?BKDR?Hash?Function
unsigned?int?BKDRHash(char?*str)
{
????unsigned?int?seed?=?131;?//?31?131?1313?13131?131313?etc..
????unsigned?int?hash?=?0;

????while?(*str)
????{
????????hash?=?hash?*?seed?+?(*str++);
????}

????return?(hash?&?0x7FFFFFFF);
}

//?DJB?Hash?Function
unsigned?int?DJBHash(char?*str)
{
????unsigned?int?hash?=?5381;

????while?(*str)
????{
????????hash?+=?(hash?<<?5)?+?(*str++);
????}

????return?(hash?&?0x7FFFFFFF);
}

//?AP?Hash?Function
unsigned?int?APHash(char?*str)
{
????unsigned?int?hash?=?0;
????int?i;

????for?(i=0;?*str;?i++)
????{
????????if?((i?&?1)?==?0)
????????{
????????????hash?^=?((hash?<<?7)?^?(*str++)?^?(hash?>>?3));
????????}
????????else
????????{
????????????hash?^=?(~((hash?<<?11)?^?(*str++)?^?(hash?>>?5)));
????????}
????}
????return?(hash?&?0x7FFFFFFF);
}

以上為轉載內容 以下是自己測試Hash算法效果的函數。

main.cpp

#include<iostream>
#include<fstream>
#include<cstdlib>
#include<cstring>
#include<ctime>
#include"HashFun.h"
using?namespace?std;
/*
??從Dict.txt讀入英文單詞表,總共24018個單詞
??測試哈希函數的分布情況

*/

void?ReadDict(char?**?dict,int&?len)
{
??fstream?dictFile;
??dictFile.open("Dict.txt",ios::in);
??if(!dictFile)??{??cout<<"OpenFailed."<<endl;??exit(-1);}
??char?tmpString[50];
??len=0;
??int?sl;
??while(!dictFile.eof())
??{
????dictFile.getline(tmpString,50);

????sl=strlen(tmpString);
????if(sl>0?&&?tmpString[sl-1]=='\r')
????????????{
????????????????tmpString[sl-1]='\0';
????????????????--sl;
????????????}
????if(sl>0)
????{
????????dict[len]=new?char[sl];
????????????if(dict[len]==NULL)?{???cout<<"Memory?fail."<<endl;exit(-1);}
????????strcpy(dict[len],tmpString);
????????//cout<<dict[len]<<endl;
????????len++;
????}
??}
??dictFile.close();
}

const?int?MaxHashLen=49999;
void?Analise(unsigned?int?HashHeight[])
{
????const?unsigned?int?AreaNum=20;
????unsigned?int?AreaHeight[AreaNum];
????const?unsigned?int?MaxRecordHeight=10;???????//記錄各自出現高度超過10的放在一起統計
????unsigned??int?NumOfHeight[MaxRecordHeight+1];
????memset(AreaHeight,0,sizeof(AreaHeight));
????memset(NumOfHeight,0,?sizeof(NumOfHeight));
????unsigned?int?step?=?MaxHashLen/AreaNum+1,wordNum=0,MaxHeight=0;

????for(int?i=0;i<MaxHashLen;i++)
????{
????????wordNum+=HashHeight[i];
????????if(?HashHeight[i]>=MaxRecordHeight?)
????????????NumOfHeight[MaxRecordHeight]++;
????????else
????????????NumOfHeight[HashHeight[i]]++;
????????if(HashHeight[i]>MaxHeight)
????????????MaxHeight=HashHeight[i];

????????AreaHeight[i/step]+=HashHeight[i];

????}
????int?searchtime=0;
????for(unsigned?int?i=1;i<MaxRecordHeight;i++)
????????searchtime+=(i+1)*i*NumOfHeight[i];?//有i*NumOfHeight[i]個的單詞在高度為i的鏈表里
????searchtime+=((MaxRecordHeight+MaxHeight)/2+1)*(MaxRecordHeight+MaxHeight)/2*NumOfHeight[MaxRecordHeight];
????searchtime/=2;
????cout<<"--------Hash分布如下"<<endl;
????for(unsigned?int?i=0;i<AreaNum;i++)
????????cout<<"[?"<<i*step<<"?,?"<<(i+1)*?step?<<"?]:?"<<AreaHeight[i]<<endl;

????cout<<"--------鏈表高度情況"<<endl;
????for(unsigned?int?i=0;i<MaxRecordHeight;i++)
????????cout<<"高度為:?"<<i<<?"的鏈表個數為?"?<<NumOfHeight[i]<<endl;
????cout<<"高度大于?"<<MaxRecordHeight<<?"的鏈表個數為?"?<<NumOfHeight[MaxRecordHeight]<<endl;

????cout<<"--------鏈表最高高度:"<<MaxHeight<<endl;
????cout<<"--------單詞表個數為:"<<wordNum<<endl;
????cout<<"--------平均鏈表高度:"<<float(wordNum)/(MaxHashLen-NumOfHeight[0])<<endl;
????cout<<"--------鏈表的利用率:"<<1-float(NumOfHeight[0])/MaxHashLen<<endl;
????cout<<"--------平均查找次數:"<<1.0*searchtime/wordNum<<endl;
}

int?main()
{
????char?*dict[24100];????//存儲單詞表
??int?wordNum;
??unsigned?int?HashHeight[MaxHashLen],hashvalue;
??memset(HashHeight,0,sizeof(HashHeight));
??ReadDict(dict,wordNum);
????unsigned?int?startTime,curTime;
????startTime=clock();
????for(int?i=0;i<wordNum;i++)
????{
????????hashvalue?=?SDBMHash(?dict[i]?)?%?MaxHashLen;
//????????hashvalue?=?RSHash(?dict[i]?)?%?MaxHashLen;
//????????hashvalue?=?JSHash(?dict[i]?)?%?MaxHashLen;
//????????hashvalue?=?PJWHash(?dict[i]?)?%?MaxHashLen;
//????????hashvalue?=?ELFHash(?dict[i]?)?%?MaxHashLen;
//????????hashvalue?=?BKDRHash(?dict[i]?)?%?MaxHashLen;
//????????hashvalue?=?DJBHash(?dict[i]?)?%?MaxHashLen;
//????????hashvalue?=?APHash(?dict[i]?)?%?MaxHashLen;
????????HashHeight[?hashvalue?]++;
????????//cout<<hashvalue<<endl;
????}
????curTime=clock();
????Analise(HashHeight);
????cout<<"cost?time:"<<(curTime-startTime)/1000?<<"ms"<<endl;
????//for(int?i=0;i<MaxHashLen;i++)
??????//??cout<<HashHeight[i]<<endl;
??return?0;
}

我的測試說明

測試的內容,是從英漢詞典Dict.txt讀取24018個單詞,將哈希值對49999求余,觀察Hash算法的散列情況

英漢詞典Dict.txt在這里下載:http://wenku.baidu.com/view/4ff90a9851e79b8968022695.html

測試結果如下:

方法最高高度平均查找次數
BKDRHash61.2383
SDBMHash51.2387
JSHash61.2397
DJBHash51.2403
RSHash51.2417
PJWHash51.2427
ELFHash51.2427
APHash61.2436

轉自:

http://apps.hi.baidu.com/share/detail/39630021

http://www.cnblogs.com/atlantis13579/archive/2010/02/06/1664792.html

轉載于:https://www.cnblogs.com/dongtian0359/archive/2011/07/10/2102192.html

總結

以上是生活随笔為你收集整理的hash编码的全部內容,希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯,歡迎將生活随笔推薦給好友。