#include "ssb_utils.h" #include <iostream> #include <stdio.h> #include <curand.h> #include <cuda.h> #include <assert.h> #define HASH_WM(X,Y,Z) ((X-Z) % Y) using namespace std; __global__ void count_date(int *fact_key, int num_tuples, int *hash_table, int num_slots, int val_min) { int offset = blockIdx.x * blockDim.x + threadIdx.x; if (offset < num_tuples) { int key = fact_key[offset]; int hash = HASH_WM(key, num_slots, val_min); // int init = 0; // atomicCAS(&hash_table[hash << 1], init, key); if (hash_table[(hash << 1)] == 0) hash_table[(hash << 1)] = key; atomicAdd(&hash_table[(hash << 1) + 1], 1); // printf("%d %d\n", key, hash); } } __global__ void build_date(int *dim_key, int num_tuples, int *hash_table, int num_slots, int val_min) { int offset = blockIdx.x * blockDim.x + threadIdx.x; if (offset < num_tuples) { int key = dim_key[offset]; int hash = HASH_WM(key, num_slots, val_min); int init = 0; atomicCAS(&hash_table[hash << 1], init, key); } } __global__ void modify_date(int *fact_key, int *hash_table, int num_tuples, int final_N) { int offset = blockIdx.x * blockDim.x + threadIdx.x; if (offset < num_tuples) { int start_idx = hash_table[(offset << 1) + 1]; int end_idx; if (offset == num_tuples - 1) end_idx = final_N; else end_idx = hash_table[(offset << 1) + 3]; int val = hash_table[(offset << 1)]; if (offset == num_tuples - 1) printf("%d %d %d %d\n", offset, val, start_idx, end_idx); for (int i = start_idx; i < end_idx; i++) { if (val != 0) fact_key[i] = val; } } } int main () { int* h_lo_orderdate = loadColumnPinned<int>("lo_orderdate", LO_LEN); // for (int i = 0; i < 1000; i++) { // if (h_lo_orderdate[i] < 19920101 || h_lo_orderdate[i] > 19981230) // cout << h_lo_orderdate[i] << endl; // } int* h_d_datekey = loadColumnPinned<int>("d_datekey", D_LEN); int* lo_orderdate, *lo_orderdatesort, *d_datekey; cudaMalloc((void**) &lo_orderdate, LO_LEN * sizeof(int)); // cudaMalloc((void**) &lo_orderdatesort, LO_LEN * sizeof(int)); cudaMalloc((void**) &d_datekey, D_LEN * sizeof(int)); cudaMemcpy(lo_orderdate, h_lo_orderdate, LO_LEN * sizeof(int), cudaMemcpyHostToDevice); cudaMemcpy(d_datekey, h_d_datekey, D_LEN * sizeof(int), cudaMemcpyHostToDevice); int d_val_len = 19981230 - 19920101 + 1; int d_val_min = 19920101; int* ht_d; cudaMalloc((void**) &ht_d, 2 * d_val_len * sizeof(int)); cudaMemset(ht_d, 0, 2 * d_val_len * sizeof(int)); build_date<<<(D_LEN + 127)/128, 128>>>(d_datekey, D_LEN, ht_d, d_val_len, d_val_min); count_date<<<(LO_LEN+ 127)/128, 128>>>(lo_orderdate, LO_LEN, ht_d, d_val_len, d_val_min); cudaDeviceSynchronize(); cout << "Build and count date done" << endl; // assert(0); int* h_ht_d; cudaHostAlloc((void**) &h_ht_d, 2 * d_val_len * sizeof(int), cudaHostAllocDefault); cudaMemcpy(h_ht_d, ht_d, 2 * d_val_len * sizeof(int), cudaMemcpyDeviceToHost); int len; if (SF % 10 == 0) len = 2556; else len = 2568; int* modify_ht_d = new int[2 * len]; cudaHostAlloc((void**) &modify_ht_d, 2 * len * sizeof(int), cudaHostAllocDefault); cudaDeviceSynchronize(); int temp = 0; int k = 0; for (int i = 0; i < 2 * d_val_len; i+=2) { if (h_ht_d[i] != 0) { modify_ht_d[k] = h_ht_d[i]; modify_ht_d[k+1] = temp; temp += h_ht_d[i+1]; cout << k << " " << modify_ht_d[k] << " " << modify_ht_d[k+1] << " " << LO_LEN << endl; cout << h_ht_d[i] << " " << h_ht_d[i+1] << endl; k+=2; } } cout << "CPU portion done" << endl; // cout << temp << endl; // for (int i = 0; i < 2 * D_LEN; i++) { // cout << modify_ht_d[i] << endl; // } // cout << "here " << endl; int* d_modify_ht_d; cudaMalloc((void**) &d_modify_ht_d, 2 * len * sizeof(int)); cudaMemcpy(d_modify_ht_d, modify_ht_d, 2 * len * sizeof(int), cudaMemcpyHostToDevice); modify_date<<<(len + 127)/128, 128>>>(lo_orderdate, d_modify_ht_d, len, LO_LEN); cudaMemcpy(h_lo_orderdate, lo_orderdate, LO_LEN * sizeof(int), cudaMemcpyDeviceToHost); cout << "Modify date" << endl; storeColumn<int>("lo_orderdate", LO_LEN, h_lo_orderdate); // h_lo_orderdate = loadColumnPinned<int>("lo_orderdate", LO_LEN); // for (int i = LO_LEN - 1000; i < LO_LEN; i++) { // // if (h_lo_orderdate[i] < 19920101 && h_lo_orderdate[i] > 19981230) // cout << h_lo_orderdate[i] << endl; // } }