Bouncer / bouncer / linear_sketches / countmin.c
countmin.c
Raw
/*******************************************************/
/*           Countmin linear sketch class              */
/*******************************************************/

static void
CountMin_dealloc(CountMin *self)
{
    free(self->X);
    free(self->hash_table);
    Py_TYPE(self)->tp_free((PyObject *) self);
}

static PyObject *
CountMin_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
    // initialize attributes to 0
    CountMin *self;
    self = (CountMin *) type->tp_alloc(type, 0);

    if (self == NULL) {
        Py_DECREF(self); // error checking 
        return NULL;
    }
    
    return (PyObject *) self;
}


static int
CountMin_init(CountMin *self, PyObject *args, PyObject *kwds) // add n as parameter?
{
    float epsilon, delta; 
    uint32_t s, t; 
    uint16_t rand = 1; 
    const char* version = "CW"; 
    PyObject* hasher = Py_BuildValue("s#", version, strlen(version));;
    static char *kwlist[] = {"epsilon", "delta", "hasher", NULL};

    // passing in arguments 
    if (!PyArg_ParseTupleAndKeywords(args, kwds, "ff|OI", kwlist, &epsilon, &delta, &hasher, &rand)){ // optional argument for hasher, default is "tabulation"
        PyErr_SetString(MemoryError, "Unable to parse parameters");
        return -1;
    }

    // extracting out arguments that super can handle
    PyObject * super_args;
    if( (super_args = PyTuple_Pack(2, PyFloat_FromDouble((double) epsilon), PyFloat_FromDouble((double) delta))) == NULL){
        PyErr_NoMemory();
        return -1;
    }

    // super().__init__(epsilon, delta)
    if((LinearSketch_init(&self->super, super_args, NULL)) != 0){
        PyErr_NoMemory();
        return -1;
    }
  
    Py_XDECREF(super_args);

    if (!PyUnicode_Check(hasher)) {
        PyErr_SetString(PyExc_TypeError,
                        "The version attribute must be a string");
        return -1;
    }

    epsilon = byte_to_float(self->super.epsilon);
    delta = byte_to_float(self->super.delta); 

    s = (uint32_t) ceil(2/ (float) epsilon); 
    t = ceil( log2(1/(float) delta));

    self->seed = rand; 

    // generate random int from seed 
    
    const char* passed_in_vesion = PyUnicode_DATA(hasher);
    Py_DECREF(hasher);

    if( strcmp(passed_in_vesion, "CW") == 0){  // CW for Carter & Wegman 
        // either if it was passed in or not, default value 
        s = nextPrime(s); 
        if((self->hash_table = generate_random_bits_CW(s,  t, 2, self->seed)) == NULL){
            PyErr_SetString(MemoryError, "Unable to allocate space, terminating");
            return -1; 
        } 
        self->hash_bit = 0; 
    }
    else if( strcmp(passed_in_vesion, "mult-shift") == 0){  // 
        if( (self->hash_table = generate_random_bits_D(s,  t, 2, self->seed)) == NULL){
            PyErr_SetString(MemoryError, "Unable to allocate space, terminating");
            return -1; 
        } 
        self->hash_bit = 1; 
    }
    else{ 
        PyErr_SetString(VersionError, "version must be set to: CW, mult-shift"); // need to fill in options later 
        return -1;
    }

    // generating  sketch structure 
    if ((self->X = (uint32_t*) calloc(s * t, sizeof(uint32_t))) == NULL){ 
        PyErr_SetString(MemoryError, "Unable to allocate space, terminating");
        return -1;
    }

    return 0;
}


static PyObject *
CountMin_update(CountMin *self, PyObject * args)
{
    int token, count; 
    float epsilon, delta; 
    uint32_t s, t, index, (*hash_retriever)(); 

    if (!PyArg_ParseTuple(args, "ii", &token, &count)) { 
        PyErr_SetString(TokenError, "Must pass in a token and count integer");
        // raise error must pass a token integer and a count integer 
        Py_DECREF(self);
        return NULL; 
    }

    epsilon = byte_to_float(self->super.epsilon);
    delta = byte_to_float(self->super.delta); 

    s = (uint32_t) ceil(2/ (float) epsilon); 
    t = ceil( log2(1/(float) delta));
    
    if(self->hash_bit == 0){ // CW
        s = nextPrime(s); 
        hash_retriever = &retrieve_hash_index_CW; 
        }
    else{ // 
        hash_retriever = &retrieve_hash_index_D; 
    }

    for(uint32_t i = 0; i < t; i++){
        index = hash_retriever(s, token, &self->hash_table[i * s]); 

        if(self->X[ (i * s) + index] <= UINT32_MAX - count){
            self->X[ (i * s) + index] += count;
        }
        else{
            self->X[ (i * s) + index] = UINT32_MAX;
        }

    }
    return Py_None; 
}

static PyObject *
CountMin_query(CountMin *self, PyObject * args)
{
    float epsilon, delta; 
    uint32_t s, t, index, (*hash_retriever)(), min_count = INT_MAX; // max value for uint32_t type 
    int token, count; 

    if (!PyArg_ParseTuple(args, "i", &token, &count)) { 
        PyErr_SetString(TokenError, "Must pass in a token to query as integer");
        // raise error must pass a token integer and a count integer 
        return NULL; 
    }

    epsilon = byte_to_float(self->super.epsilon);
    delta = byte_to_float(self->super.delta); 

    s = (uint32_t) ceil(2/ (float) epsilon); 
    t = ceil( log2(1/(float) delta));
    
    if(self->hash_bit == 0){ // CW
        s = nextPrime(s); 
        hash_retriever = &retrieve_hash_index_CW; 
    }
    else{ // mult-shift
        hash_retriever = &retrieve_hash_index_D; 
    }

    for(uint32_t i = 0; i < t; i++){
        index = hash_retriever(s, token, &self->hash_table[i * s]); 
        min_count = MIN(min_count, self->X[ (i * s) + index]);
    }
        
    return PyLong_FromLong(min_count); 
}

static PyObject *CountMin_combine(PyObject *self, PyObject *args, PyObject *kwds){
    static char *kwlist[] = {"other", NULL};
    PyObject* other; 
    int tuple_val; 
    PyObject *type_one, *type_two;  
    PyObject *tuple_one, *tuple_two; 
    float epsilon, delta; 
    uint32_t s, t, (*hash_retriever)();
    
    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O", kwlist, &other)){
        // raise error
        return NULL; 
    }

    type_one = PyObject_Type(self);
    type_two = PyObject_Type(other); 

    // check if object types are the same 
    if( (PyObject_RichCompareBool(type_one, type_two, Py_EQ) != 1)){
        PyErr_SetString(PyExc_TypeError,
                        "Unable to combine different types");
    }

    Py_DECREF(type_one);
    Py_DECREF(type_two);

    const char* s_ = "shape";
    PyObject* shape_input = Py_BuildValue("s#", s_, strlen(s_));

    tuple_one = PyObject_GetAttr(self, shape_input); 
    tuple_two = PyObject_GetAttr(other, shape_input); 

    Py_DECREF(shape_input);

    if((tuple_one == NULL) || (tuple_two == NULL)){
        PyErr_SetString(PyExc_TypeError,
                        "Unable to get attribute shape");
    }

    // check if object shapes are the same
    tuple_val = PyObject_RichCompareBool(tuple_one, tuple_two, Py_EQ);
    if(tuple_val != 1){
        PyErr_SetString(PyExc_TypeError,
                        "Cannot combine Morris objects of different shapes");
    }

    Py_DECREF(tuple_one);
    Py_DECREF(tuple_two);

    epsilon = byte_to_float( ((CountMin *) self)->super.epsilon);
    delta = byte_to_float(((CountMin *) self)->super.delta); 

    if( (epsilon != byte_to_float(((CountMin *) other)->super.epsilon))  ||
       (delta != byte_to_float(((CountMin *) other)->super.delta))   ){
        PyErr_SetString(PyExc_TypeError,
                        "Cannot combine Morris different epsilon/delta values");
    }

    if(((CountMin *) self)->seed !=((CountMin *) other)->seed){
        PyErr_SetString(PyExc_TypeError,
                        "Cannot combine CountMin objects with different hash functions. Seeds need to be the same.");
    }

    s = (uint32_t) ceil(2/ (float) epsilon); 
    t = ceil( log2(1/(float) delta));

   

    if(((CountMin *) self)->hash_bit == 0){
        s = nextPrime(s); // only needs to be done once, since sizes are the same. 
        hash_retriever = &retrieve_hash_index_CW; 
    }
    else{ // mult-shift
        hash_retriever = &retrieve_hash_index_D; 
    }

    // add checks for upper size limits
    for(uint32_t i = 0; i < t; i++){
        for(uint32_t j = 0; i < j; i++){
            if(((CountMin *) self)->X[(i * s) + j] <= UINT32_MAX - ((CountMin *) other)->X[(i * s) + j]){
                ((CountMin *) self)->X[(i * s) + j] += ((CountMin *) other)->X[(i * s) + j];
            }
            else{
                ((CountMin *) self)->X[(i * s) + j] = UINT32_MAX;
            }
        }
    }
    return Py_None; 
    
}