五月天青色头像情侣网名,国产亚洲av片在线观看18女人,黑人巨茎大战俄罗斯美女,扒下她的小内裤打屁股

歡迎光臨散文網(wǎng) 會員登陸 & 注冊

C語言實(shí)現(xiàn)HyperLogLog

2023-06-25 13:21 作者:機(jī)器朗讀  | 我要投稿


#include <stdio.h>
#include <stdlib.h>
#include <math.h> // gcc -lm for log() 

#define REGISTER_SIZE 16 ? ?// 寄存器的位數(shù),默認(rèn)為16位
#define ARRAY_SIZE (1 << REGISTER_SIZE) ? ?// 寄存器數(shù)組的大小

typedef struct {
 ? ?unsigned int* registers; ? ?// 寄存器數(shù)組
 ? ?int size; ? ?// 寄存器數(shù)組的大小
 ? ?int b; ? ?// 每個(gè)哈希值的前導(dǎo)零位數(shù)
} HyperLogLog;

// 使用djb2哈希算法
unsigned int hash_function(const char* data) {
 ? ?unsigned int hash = 5381;
 ? ?int c;

 ? ?while ((c = *data++)) {
 ? ? ? ?hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
 ? ?}

 ? ?return hash;
}

// 創(chuàng)建并初始化HyperLogLog數(shù)據(jù)結(jié)構(gòu)
HyperLogLog* hyperloglog_create() {
 ? ?HyperLogLog* hll = (HyperLogLog*)malloc(sizeof(HyperLogLog));
 ? ?hll->registers = (unsigned int*)calloc(ARRAY_SIZE, sizeof(unsigned int));
 ? ?hll->size = ARRAY_SIZE;
 ? ?hll->b = REGISTER_SIZE;

 ? ?return hll;
}

// 釋放HyperLogLog數(shù)據(jù)結(jié)構(gòu)的內(nèi)存
void hyperloglog_destroy(HyperLogLog* hll) {
 ? ?free(hll->registers);
 ? ?free(hll);
}

// 計(jì)算哈希值的前導(dǎo)零位數(shù)
int leading_zeros(unsigned int value, int max_bits) {
 ? ?int count = 0;
 ? ?while ((value & 0x80000000) == 0 && count < max_bits) {
 ? ? ? ?value <<= 1;
 ? ? ? ?count++;
 ? ?}
 ? ?return count;
}

// 更新HyperLogLog數(shù)據(jù)結(jié)構(gòu)的寄存器
void hyperloglog_update(HyperLogLog* hll, const char* data) {
 ? ?unsigned int hash = hash_function(data); ? ?// 哈希函數(shù)需要根據(jù)實(shí)際情況實(shí)現(xiàn)
 ? ?unsigned int index = hash & (hll->size - 1);
 ? ?unsigned int value = hash >> REGISTER_SIZE;
 ? ?int count = leading_zeros(value, 32 - REGISTER_SIZE) + 1;
 ? ?if (count > hll->registers[index]) {
 ? ? ? ?hll->registers[index] = count;
 ? ?}
}

// 估計(jì)基數(shù)
double hyperloglog_estimate(HyperLogLog* hll) {
 ? ?double alpha = 0.7213 / (1 + 1.079 / hll->size);
 ? ?double estimate = 0;
 ? ?double sum = 0;

 ? ?for (int i = 0; i < hll->size; i++) {
 ? ? ? ?sum += 1.0 / (1 << hll->registers[i]);
 ? ?}

 ? ?estimate = alpha * hll->size * hll->size / sum;

 ? ?// 根據(jù)哈希函數(shù)的不同,可能需要進(jìn)行修正
 ? ?if (estimate <= 2.5 * hll->size) {
 ? ? ? ?int zero_count = 0;
 ? ? ? ?for (int i = 0; i < hll->size; i++) {
 ? ? ? ? ? ?if (hll->registers[i] == 0) {
 ? ? ? ? ? ? ? ?zero_count++;
 ? ? ? ? ? ?}
 ? ? ? ?}
 ? ? ? ?if (zero_count != 0) {
 ? ? ? ? ? ?estimate = hll->size * log((double)hll->size / zero_count);
 ? ? ? ?}
 ? ?} else if (estimate > (1.0 / 30.0) * pow(2, 32)) {
 ? ? ? ?estimate = -pow(2, 32) * log(1 - estimate / pow(2, 32));
 ? ?}

 ? ?return estimate;
}

int main() {
 ? ?HyperLogLog* hll = hyperloglog_create();

 ? ?// 更新寄存器
 ? ?hyperloglog_update(hll, "data1");
 ? ?hyperloglog_update(hll, "data2");
 ? ?hyperloglog_update(hll, "data3");

 ? ?// 估計(jì)基數(shù)
 ? ?double estimate = hyperloglog_estimate(hll);
 ? ?printf("Estimated cardinality: %.0f\n", estimate);

 ? ?hyperloglog_destroy(hll);
 ? ?return 0;
}


C語言實(shí)現(xiàn)HyperLogLog的評論 (共 條)

分享到微博請遵守國家法律
平谷区| 綦江县| 海淀区| 清原| 会同县| 缙云县| 海丰县| 张家口市| 富源县| 庆元县| 同仁县| 怀柔区| 衡阳县| 天气| 永宁县| 台东市| 建湖县| 崇仁县| 昭平县| 西宁市| 宝兴县| 东乡族自治县| 江都市| 于田县| 香格里拉县| 广汉市| 行唐县| 营山县| 宁阳县| 平顶山市| 循化| 西和县| 谢通门县| 钟山县| 松滋市| 绵竹市| 宾阳县| 天柱县| 广宁县| 六盘水市| 永靖县|