Bouncer / bouncer / linear_sketches / hashing_alg.c
hashing_alg.c
Raw
/*******************************************************/
/*                 Hashing algorithms                  */
/*******************************************************/

// https://www.geeksforgeeks.org/program-to-find-the-next-prime-number/
bool isPrime(uint32_t n)
{
    // Corner cases
    if (n <= 1)  return false;
    if (n <= 3)  return true;
    // This is checked so that we can skip 
    // middle five numbers in below loop
    if (n%2 == 0 || n%3 == 0) return false;
    for (uint32_t i=5; i*i<=n; i=i+6)
        if (n%i == 0 || n%(i+2) == 0)
           return false;
    return true;
}

// https://www.geeksforgeeks.org/program-to-find-the-next-prime-number/
uint32_t nextPrime(uint32_t N)
{
    // Base case
    if (N <= 1)
        return 2;
    uint32_t prime = N;
    bool found = false;
    // Loop continuously until isPrime returns
    // true for a number greater than n
    while (!found) {
        prime++;
        if (isPrime(prime))
            found = true;
    }
    return prime;
}


/*******************************************************/
/*  Carter & Wegman, arbitrary k independence          */
/*******************************************************/

uint32_t* generate_random_bits_CW(uint32_t num_hash_buckets, uint32_t instantiations, uint8_t independence_degree, uint16_t seed) // number of random bits sequences to return  
{
    uint64_t i; 

    srand(seed); 

    uint32_t* hash_table; 
    if ((hash_table = (uint32_t*) calloc(instantiations * independence_degree,sizeof(uint32_t))) == NULL){ 
        return NULL;
    }
    for(i = 0; i < instantiations * independence_degree; i ++){
        hash_table[i] = rand() / (RAND_MAX / num_hash_buckets + 1); // generate random number between 0 and prime for all indices
    }
    return hash_table;
}

uint32_t retrieve_hash_index_CW(uint32_t num_hash_buckets, int token, uint32_t* rand_bits) // pointer into the table of random bits
{
    uint64_t hash_val = 0;  // to prevent overflow 
    for(uint32_t i = 0; i < num_hash_buckets; i ++){
        hash_val += pow(token, i) * rand_bits[i]; 
    }
    hash_val = hash_val% num_hash_buckets; // modulus the number of buckets 
    return (uint32_t) hash_val; 
}

// M. DIETZFELBINGER, Universal hashing and k-wise independent random variables via integer arithmetic without primes
/*******************************************************/
/*        2 independent hashing M. DIETZFELBINGER,     */
/*******************************************************/

uint32_t* generate_random_bits_D(uint32_t num_hash_buckets, uint32_t instantiations, uint8_t independence_degree, uint16_t seed) // number of random bits sequences to return  
{
    // assumes input can be represented with at most 32 bits. 
    uint64_t i; 
    uint32_t* hash_table; 

    srand(seed); 
    
    if ((hash_table = (uint32_t*) calloc(instantiations * 2,sizeof(uint32_t))) == NULL){ 
        return NULL;
    }
    for(i = 0; i < instantiations * independence_degree; i ++){
        hash_table[i] = rand(); // pick a random number in 2^{input bit size}, 2^32
    }
    return hash_table;
}

uint32_t retrieve_hash_index_D(uint32_t num_hash_buckets, int token, uint32_t* rand_bits) // pointer into the table of random bits
{
    uint32_t a, b;  // to prevent overflow 
    a = rand_bits[0]; 
    b = rand_bits[1]; 
    uint16_t hash_bits = log2(num_hash_buckets); 
    return (a * token + b) >> (32 - hash_bits);  
}