/*
unsigned char b[16];
MD5(c,strlen(c),b);
return(b[0]|(b[1]<<8)|(b[2]<<16)|(b[3]<<24));
*/
n=0x100;
while (*c)
{
v=n|(*c);
n+=0x100;
r= (int)((v>>2)^v)&0x0f;
ret=(ret(32-r));
ret&=0xFFFFFFFFL;
ret^=v*v;
c++;
}
return((ret>>16)^ret);
}
[cpp]
// MySql中出现的字符串Hash函数
#ifndef NEW_HASH_FUNCTION
/* Calc hashvalue for a key */
static uint calc_hashnr(const byte *key,uint length)
{
register uint nr=1, nr2=4;
while (length--)
{
nr^= (((nr & 63)+nr2)*((uint) (uchar) *key++))+ (nr << 8);
nr2+=3;
}
return((uint) nr);
}
/* Calc hashvalue for a key, case indepenently */
static uint calc_hashnr_caseup(const byte *key,uint length)
{
register uint nr=1, nr2=4;
while (length--)
{
nr^= (((nr & 63)+nr2)*((uint) (uchar) toupper(*key++)))+ (nr << 8);
nr2+=3;
}
return((uint) nr);
}
#else
/*
* Fowler/Noll/Vo hash
*
* The basis of the hash algorithm was taken from an idea sent by email to the
* IEEE Posix P1003.2 mailing list from Phong Vo (kpv@research.att.com) and
* Glenn Fowler (gsf@research.att.com). Landon Curt Noll (chongo@toad.com)
* later improved on their algorithm.
*
* The magic is in the interesting relationship between the special prime
* 16777619 (2^24 + 403) and 2^32 and 2^8.
*
* This hash produces the fewest collisions of any function that we've seen so
* far, and works well on both numbers and strings.
*/
uint calc_hashnr(const byte *key, uint len)
{
const byte *end=key+len;
uint hash;
for (hash = 0; key < end; key++)
{
hash *= 16777619;
hash ^= (uint) *(uchar*) key;
}
return (hash);
}
uint calc_hashnr_caseup(const byte *key, uint len)
{
const byte *end=key+len;
uint hash;
for (hash = 0; key < end; key++)
{
hash *= 16777619;
hash ^= (uint) (uchar) toupper(*key);
}
return (hash);
}
#endif
3. Hash 表
我们接下去分析Poco中Hash表的实现。Poco中实现了3种哈希表,分别是SimpleHashTable, HashTable,LinearHashTable。它们的实现对应了当出现冲突时,解决冲突的不同方法。首先我们看一下通用的解决方法。
1. 线性探测。当出现碰撞时,顺序依次查询后续位置,直到找到空位。《利用线性探测法构造散列表》
2. 双重散列法。当使用第一个散列Hash函数,出现碰撞时,用第二个散列函数去寻找空位
3. 拉链法。出现碰撞的时候,使用list存储碰撞数据
4. 线性哈希,linear hash。立刻分裂或者延迟分裂。通过分裂,控制桶的高度,每次分裂时,会重新散列碰撞元素。《linear hashing》
SimpleHashTable的实现对应了方法一;HashTable对应了方法3;LinearHashTable对应了方法4。
3.1 SimpleHashTable
从类图里我们看到,SimpleHashTable是一个HashEntry容器, 内部定义如下:
[cpp]
std::vector _entries
当插入新数据时,首先根据hash值,计算空位,然后存储;如果发现冲突,顺着计算的hash值按地址顺序依次寻找空位;如_entries容器无空位,则抛出异常。
[cpp]
UInt32 insert(const Key& key, const Value& value)
/// Returns the hash value of the inserted item.
/// Throws an exception if the entry was already inserted
{
UInt32 hsh = hash(key);
insertRaw(key, hsh, value);
return hsh;
}
Value& insertRaw(const Key& key, UInt32 hsh, const Value& value)
/// Returns the hash value of the inserted item.
/// Throws an exception if the entry was already inserted
{
UInt32 pos = hsh;
if (!_entries[pos])
_entries[pos] = new HashEntry(key, value);
else