/*******************************************************/ /* Countmin linear sketch class */ /*******************************************************/ static void CountMin_dealloc(CountMin *self) { free(self->X); free(self->hash_table); Py_TYPE(self)->tp_free((PyObject *) self); } static PyObject * CountMin_new(PyTypeObject *type, PyObject *args, PyObject *kwds) { // initialize attributes to 0 CountMin *self; self = (CountMin *) type->tp_alloc(type, 0); if (self == NULL) { Py_DECREF(self); // error checking return NULL; } return (PyObject *) self; } static int CountMin_init(CountMin *self, PyObject *args, PyObject *kwds) // add n as parameter? { float epsilon, delta; uint32_t s, t; uint16_t rand = 1; const char* version = "CW"; PyObject* hasher = Py_BuildValue("s#", version, strlen(version));; static char *kwlist[] = {"epsilon", "delta", "hasher", NULL}; // passing in arguments if (!PyArg_ParseTupleAndKeywords(args, kwds, "ff|OI", kwlist, &epsilon, &delta, &hasher, &rand)){ // optional argument for hasher, default is "tabulation" PyErr_SetString(MemoryError, "Unable to parse parameters"); return -1; } // extracting out arguments that super can handle PyObject * super_args; if( (super_args = PyTuple_Pack(2, PyFloat_FromDouble((double) epsilon), PyFloat_FromDouble((double) delta))) == NULL){ PyErr_NoMemory(); return -1; } // super().__init__(epsilon, delta) if((LinearSketch_init(&self->super, super_args, NULL)) != 0){ PyErr_NoMemory(); return -1; } Py_XDECREF(super_args); if (!PyUnicode_Check(hasher)) { PyErr_SetString(PyExc_TypeError, "The version attribute must be a string"); return -1; } epsilon = byte_to_float(self->super.epsilon); delta = byte_to_float(self->super.delta); s = (uint32_t) ceil(2/ (float) epsilon); t = ceil( log2(1/(float) delta)); self->seed = rand; // generate random int from seed const char* passed_in_vesion = PyUnicode_DATA(hasher); Py_DECREF(hasher); if( strcmp(passed_in_vesion, "CW") == 0){ // CW for Carter & Wegman // either if it was passed in or not, default value s = nextPrime(s); if((self->hash_table = generate_random_bits_CW(s, t, 2, self->seed)) == NULL){ PyErr_SetString(MemoryError, "Unable to allocate space, terminating"); return -1; } self->hash_bit = 0; } else if( strcmp(passed_in_vesion, "mult-shift") == 0){ // if( (self->hash_table = generate_random_bits_D(s, t, 2, self->seed)) == NULL){ PyErr_SetString(MemoryError, "Unable to allocate space, terminating"); return -1; } self->hash_bit = 1; } else{ PyErr_SetString(VersionError, "version must be set to: CW, mult-shift"); // need to fill in options later return -1; } // generating sketch structure if ((self->X = (uint32_t*) calloc(s * t, sizeof(uint32_t))) == NULL){ PyErr_SetString(MemoryError, "Unable to allocate space, terminating"); return -1; } return 0; } static PyObject * CountMin_update(CountMin *self, PyObject * args) { int token, count; float epsilon, delta; uint32_t s, t, index, (*hash_retriever)(); if (!PyArg_ParseTuple(args, "ii", &token, &count)) { PyErr_SetString(TokenError, "Must pass in a token and count integer"); // raise error must pass a token integer and a count integer Py_DECREF(self); return NULL; } epsilon = byte_to_float(self->super.epsilon); delta = byte_to_float(self->super.delta); s = (uint32_t) ceil(2/ (float) epsilon); t = ceil( log2(1/(float) delta)); if(self->hash_bit == 0){ // CW s = nextPrime(s); hash_retriever = &retrieve_hash_index_CW; } else{ // hash_retriever = &retrieve_hash_index_D; } for(uint32_t i = 0; i < t; i++){ index = hash_retriever(s, token, &self->hash_table[i * s]); if(self->X[ (i * s) + index] <= UINT32_MAX - count){ self->X[ (i * s) + index] += count; } else{ self->X[ (i * s) + index] = UINT32_MAX; } } return Py_None; } static PyObject * CountMin_query(CountMin *self, PyObject * args) { float epsilon, delta; uint32_t s, t, index, (*hash_retriever)(), min_count = INT_MAX; // max value for uint32_t type int token, count; if (!PyArg_ParseTuple(args, "i", &token, &count)) { PyErr_SetString(TokenError, "Must pass in a token to query as integer"); // raise error must pass a token integer and a count integer return NULL; } epsilon = byte_to_float(self->super.epsilon); delta = byte_to_float(self->super.delta); s = (uint32_t) ceil(2/ (float) epsilon); t = ceil( log2(1/(float) delta)); if(self->hash_bit == 0){ // CW s = nextPrime(s); hash_retriever = &retrieve_hash_index_CW; } else{ // mult-shift hash_retriever = &retrieve_hash_index_D; } for(uint32_t i = 0; i < t; i++){ index = hash_retriever(s, token, &self->hash_table[i * s]); min_count = MIN(min_count, self->X[ (i * s) + index]); } return PyLong_FromLong(min_count); } static PyObject *CountMin_combine(PyObject *self, PyObject *args, PyObject *kwds){ static char *kwlist[] = {"other", NULL}; PyObject* other; int tuple_val; PyObject *type_one, *type_two; PyObject *tuple_one, *tuple_two; float epsilon, delta; uint32_t s, t, (*hash_retriever)(); if (!PyArg_ParseTupleAndKeywords(args, kwds, "O", kwlist, &other)){ // raise error return NULL; } type_one = PyObject_Type(self); type_two = PyObject_Type(other); // check if object types are the same if( (PyObject_RichCompareBool(type_one, type_two, Py_EQ) != 1)){ PyErr_SetString(PyExc_TypeError, "Unable to combine different types"); } Py_DECREF(type_one); Py_DECREF(type_two); const char* s_ = "shape"; PyObject* shape_input = Py_BuildValue("s#", s_, strlen(s_)); tuple_one = PyObject_GetAttr(self, shape_input); tuple_two = PyObject_GetAttr(other, shape_input); Py_DECREF(shape_input); if((tuple_one == NULL) || (tuple_two == NULL)){ PyErr_SetString(PyExc_TypeError, "Unable to get attribute shape"); } // check if object shapes are the same tuple_val = PyObject_RichCompareBool(tuple_one, tuple_two, Py_EQ); if(tuple_val != 1){ PyErr_SetString(PyExc_TypeError, "Cannot combine Morris objects of different shapes"); } Py_DECREF(tuple_one); Py_DECREF(tuple_two); epsilon = byte_to_float( ((CountMin *) self)->super.epsilon); delta = byte_to_float(((CountMin *) self)->super.delta); if( (epsilon != byte_to_float(((CountMin *) other)->super.epsilon)) || (delta != byte_to_float(((CountMin *) other)->super.delta)) ){ PyErr_SetString(PyExc_TypeError, "Cannot combine Morris different epsilon/delta values"); } if(((CountMin *) self)->seed !=((CountMin *) other)->seed){ PyErr_SetString(PyExc_TypeError, "Cannot combine CountMin objects with different hash functions. Seeds need to be the same."); } s = (uint32_t) ceil(2/ (float) epsilon); t = ceil( log2(1/(float) delta)); if(((CountMin *) self)->hash_bit == 0){ s = nextPrime(s); // only needs to be done once, since sizes are the same. hash_retriever = &retrieve_hash_index_CW; } else{ // mult-shift hash_retriever = &retrieve_hash_index_D; } // add checks for upper size limits for(uint32_t i = 0; i < t; i++){ for(uint32_t j = 0; i < j; i++){ if(((CountMin *) self)->X[(i * s) + j] <= UINT32_MAX - ((CountMin *) other)->X[(i * s) + j]){ ((CountMin *) self)->X[(i * s) + j] += ((CountMin *) other)->X[(i * s) + j]; } else{ ((CountMin *) self)->X[(i * s) + j] = UINT32_MAX; } } } return Py_None; }