大数据过滤及判断算法 -- Bitmap / Bloomfilter(二)

2014-11-24 10:13:16 · 作者: · 浏览: 1
rn 0;
register size_t hash = 1315423911;
while (size_t ch = (size_t)*str++) {
hash = ((hash << 5) ^ (hash >> 27)) ^ ch;
}
return hash;
}

typedef size_t (*HASH_FUNC)(const char*);

HASH_FUNC HASH[] = {
BKDR_hash,FNV_hash,DEK_hash
};


void bloom_filter_mark(BloomFilter* bf, const char* v)
{
HashResult *hr = (HashResult*)calloc(1,sizeof(HashResult)+(sizeof(size_t)*HASH_RESULT));

for (int i=0;i!=HASH_RESULT;++i) {
hr->result[i] = (HASH[i](v)) % BLOOM;
// set the binary bit to 1
bf[hr->result[i]/8] |= 0x80UL >> (hr->result[i]%8);
//printf("**%lu|hash-%d[%lu]|offset[%X]\n",HASH[i](v),i,hr->result[i],bf[hr->result[i]/8]);
}

free(hr);
}

bool bloom_filter_check(BloomFilter* bf, const char* v)
{
HashResult *hr = (HashResult*)calloc(1,sizeof(HashResult)+(sizeof(size_t)*HASH_RESULT));

size_t in = HASH_RESULT;
for (int i=0;i!=HASH_RESULT;++i) {
hr->result[i] = HASH[i](v) % BLOOM;
//printf("**%lu|%X\n",hr->result[i],bf[hr->result[i]/8]);
// check this bit is "1" or not
if (bf[hr->result[i]/8] & (0x80UL >> (hr->result[i]%8)))
in--;
}

free(hr);
return in == 0;

}

int main()
{
// std::cout< // std::cout< // std::cout<
BloomFilter* bloom = new (std::nothrow) BloomFilter[BLOOM];
if (NULL == bloom)
printf("No Space to build BloomFilter\n"),exit(0);

printf("BloomFilter Calloc Memory Ok\n");

for(int i=0;i!=1000000;i++) {
char buf[16] = {0};
sprintf(buf,"%d",i);
bloom_filter_mark(bloom,buf);
}
printf("BloomFilter Build Ok\n");

for(int i=999995;i!=1000010;i++) {
char buf[16] = {0};
sprintf(buf,"%d",i);
if (bloom_filter_check(bloom,buf))
printf("[FOUND] %d\n",i);
}

delete bloom;

return 0;
}