#include "../headers/varintdecode.h" #include <x86intrin.h> #if defined(_MSC_VER) #define ALIGNED(x) __declspec(align(x)) #else #if defined(__GNUC__) #define ALIGNED(x) __attribute__ ((aligned(x))) #endif #endif #if defined(_MSC_VER) # include <intrin.h> /* 64-bit needs extending */ # define SIMDCOMP_CTZ(result, mask) do { \ unsigned long index; \ if (!_BitScanForward(&(index), (mask))) { \ (result) = 32U; \ } else { \ (result) = (uint32_t)(index); \ } \ } while (0) #else # define SIMDCOMP_CTZ(result, mask) \ result = __builtin_ctz(mask) #endif typedef struct index_bytes_consumed { uint8_t index; uint8_t bytes_consumed; } index_bytes_consumed; static index_bytes_consumed combined_lookup[] ALIGNED(0x1000) = { {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {48, 8}, {82, 6}, {160, 5}, {4, 7}, {40, 8}, {24, 8}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {36, 8}, {20, 8}, {121, 7}, {12, 8}, {56, 9}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {136, 8}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {34, 8}, {18, 8}, {119, 7}, {10, 8}, {52, 9}, {83, 7}, {160, 5}, {6, 8}, {44, 9}, {28, 9}, {130, 8}, {71, 7}, {112, 8}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {124, 8}, {69, 7}, {106, 8}, {88, 8}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {33, 8}, {17, 8}, {118, 6}, {9, 8}, {50, 9}, {82, 6}, {160, 5}, {5, 8}, {42, 9}, {26, 9}, {128, 8}, {70, 6}, {110, 8}, {148, 5}, {165, 6}, {3, 8}, {38, 9}, {22, 9}, {122, 8}, {14, 9}, {60, 10}, {86, 8}, {161, 6}, {66, 6}, {98, 8}, {80, 8}, {139, 9}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {120, 8}, {67, 5}, {102, 8}, {84, 8}, {160, 5}, {65, 5}, {96, 8}, {78, 8}, {133, 9}, {72, 8}, {115, 9}, {148, 5}, {167, 8}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, {163, 8}, {145, 2}, {150, 3}, {146, 3}, {159, 8}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {49, 9}, {82, 6}, {160, 5}, {4, 7}, {41, 9}, {25, 9}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {37, 9}, {21, 9}, {121, 7}, {13, 9}, {58, 10}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {137, 9}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {35, 9}, {19, 9}, {119, 7}, {11, 9}, {54, 10}, {83, 7}, {160, 5}, {7, 9}, {46, 10}, {30, 10}, {131, 9}, {71, 7}, {113, 9}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {125, 9}, {69, 7}, {107, 9}, {89, 9}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {91, 5}, {73, 5}, {118, 6}, {67, 5}, {100, 6}, {82, 6}, {160, 5}, {65, 5}, {94, 6}, {76, 6}, {129, 9}, {70, 6}, {111, 9}, {148, 5}, {165, 6}, {64, 4}, {92, 6}, {74, 6}, {123, 9}, {68, 6}, {105, 9}, {87, 9}, {161, 6}, {66, 6}, {99, 9}, {81, 9}, {142, 10}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {155, 4}, {67, 5}, {151, 4}, {147, 4}, {160, 5}, {65, 5}, {150, 3}, {146, 3}, {156, 5}, {145, 2}, {152, 5}, {148, 5}, {168, 9}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, {164, 9}, {0, 2}, {0, 3}, {0, 3}, {0, 0}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {48, 8}, {82, 6}, {160, 5}, {4, 7}, {40, 8}, {24, 8}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {36, 8}, {20, 8}, {121, 7}, {12, 8}, {57, 10}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {136, 8}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {34, 8}, {18, 8}, {119, 7}, {10, 8}, {53, 10}, {83, 7}, {160, 5}, {6, 8}, {45, 10}, {29, 10}, {130, 8}, {71, 7}, {112, 8}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {124, 8}, {69, 7}, {106, 8}, {88, 8}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {33, 8}, {17, 8}, {118, 6}, {9, 8}, {51, 10}, {82, 6}, {160, 5}, {5, 8}, {43, 10}, {27, 10}, {128, 8}, {70, 6}, {110, 8}, {148, 5}, {165, 6}, {3, 8}, {39, 10}, {23, 10}, {122, 8}, {15, 10}, {62, 11}, {86, 8}, {161, 6}, {66, 6}, {98, 8}, {80, 8}, {140, 10}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {120, 8}, {67, 5}, {102, 8}, {84, 8}, {160, 5}, {65, 5}, {96, 8}, {78, 8}, {134, 10}, {72, 8}, {116, 10}, {148, 5}, {167, 8}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, {163, 8}, {145, 2}, {150, 3}, {146, 3}, {159, 8}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {100, 6}, {82, 6}, {160, 5}, {4, 7}, {94, 6}, {76, 6}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {92, 6}, {74, 6}, {121, 7}, {68, 6}, {103, 7}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {138, 10}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {91, 5}, {73, 5}, {119, 7}, {67, 5}, {101, 7}, {83, 7}, {160, 5}, {65, 5}, {95, 7}, {77, 7}, {132, 10}, {71, 7}, {114, 10}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {126, 10}, {69, 7}, {108, 10}, {90, 10}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {91, 5}, {73, 5}, {118, 6}, {67, 5}, {100, 6}, {82, 6}, {160, 5}, {65, 5}, {94, 6}, {76, 6}, {156, 5}, {70, 6}, {152, 5}, {148, 5}, {165, 6}, {64, 4}, {92, 6}, {74, 6}, {155, 4}, {68, 6}, {151, 4}, {147, 4}, {161, 6}, {66, 6}, {150, 3}, {146, 3}, {157, 6}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {155, 4}, {67, 5}, {151, 4}, {147, 4}, {160, 5}, {65, 5}, {150, 3}, {146, 3}, {156, 5}, {145, 2}, {152, 5}, {148, 5}, {169, 10}, {0, 4}, {0, 3}, {0, 3}, {0, 4}, {0, 2}, {0, 4}, {0, 4}, {0, 0}, {0, 2}, {0, 3}, {0, 3}, {0, 0}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {48, 8}, {82, 6}, {160, 5}, {4, 7}, {40, 8}, {24, 8}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {36, 8}, {20, 8}, {121, 7}, {12, 8}, {56, 9}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {136, 8}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {34, 8}, {18, 8}, {119, 7}, {10, 8}, {52, 9}, {83, 7}, {160, 5}, {6, 8}, {44, 9}, {28, 9}, {130, 8}, {71, 7}, {112, 8}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {124, 8}, {69, 7}, {106, 8}, {88, 8}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {33, 8}, {17, 8}, {118, 6}, {9, 8}, {50, 9}, {82, 6}, {160, 5}, {5, 8}, {42, 9}, {26, 9}, {128, 8}, {70, 6}, {110, 8}, {148, 5}, {165, 6}, {3, 8}, {38, 9}, {22, 9}, {122, 8}, {14, 9}, {61, 11}, {86, 8}, {161, 6}, {66, 6}, {98, 8}, {80, 8}, {139, 9}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {120, 8}, {67, 5}, {102, 8}, {84, 8}, {160, 5}, {65, 5}, {96, 8}, {78, 8}, {133, 9}, {72, 8}, {115, 9}, {148, 5}, {167, 8}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, {163, 8}, {145, 2}, {150, 3}, {146, 3}, {159, 8}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {49, 9}, {82, 6}, {160, 5}, {4, 7}, {41, 9}, {25, 9}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {37, 9}, {21, 9}, {121, 7}, {13, 9}, {59, 11}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {137, 9}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {35, 9}, {19, 9}, {119, 7}, {11, 9}, {55, 11}, {83, 7}, {160, 5}, {7, 9}, {47, 11}, {31, 11}, {131, 9}, {71, 7}, {113, 9}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {125, 9}, {69, 7}, {107, 9}, {89, 9}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {91, 5}, {73, 5}, {118, 6}, {67, 5}, {100, 6}, {82, 6}, {160, 5}, {65, 5}, {94, 6}, {76, 6}, {129, 9}, {70, 6}, {111, 9}, {148, 5}, {165, 6}, {64, 4}, {92, 6}, {74, 6}, {123, 9}, {68, 6}, {105, 9}, {87, 9}, {161, 6}, {66, 6}, {99, 9}, {81, 9}, {143, 11}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {155, 4}, {67, 5}, {151, 4}, {147, 4}, {160, 5}, {65, 5}, {150, 3}, {146, 3}, {156, 5}, {145, 2}, {152, 5}, {148, 5}, {168, 9}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, {164, 9}, {0, 2}, {0, 3}, {0, 3}, {0, 0}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {48, 8}, {82, 6}, {160, 5}, {4, 7}, {40, 8}, {24, 8}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {36, 8}, {20, 8}, {121, 7}, {12, 8}, {103, 7}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {136, 8}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {34, 8}, {18, 8}, {119, 7}, {10, 8}, {101, 7}, {83, 7}, {160, 5}, {6, 8}, {95, 7}, {77, 7}, {130, 8}, {71, 7}, {112, 8}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {124, 8}, {69, 7}, {106, 8}, {88, 8}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {33, 8}, {17, 8}, {118, 6}, {9, 8}, {100, 6}, {82, 6}, {160, 5}, {5, 8}, {94, 6}, {76, 6}, {128, 8}, {70, 6}, {110, 8}, {148, 5}, {165, 6}, {3, 8}, {92, 6}, {74, 6}, {122, 8}, {68, 6}, {104, 8}, {86, 8}, {161, 6}, {66, 6}, {98, 8}, {80, 8}, {141, 11}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {120, 8}, {67, 5}, {102, 8}, {84, 8}, {160, 5}, {65, 5}, {96, 8}, {78, 8}, {135, 11}, {72, 8}, {117, 11}, {148, 5}, {167, 8}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, {163, 8}, {145, 2}, {150, 3}, {146, 3}, {159, 8}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {100, 6}, {82, 6}, {160, 5}, {4, 7}, {94, 6}, {76, 6}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {92, 6}, {74, 6}, {121, 7}, {68, 6}, {103, 7}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {157, 6}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {91, 5}, {73, 5}, {119, 7}, {67, 5}, {101, 7}, {83, 7}, {160, 5}, {65, 5}, {95, 7}, {77, 7}, {156, 5}, {71, 7}, {152, 5}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {155, 4}, {69, 7}, {151, 4}, {147, 4}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {91, 5}, {73, 5}, {118, 6}, {67, 5}, {100, 6}, {82, 6}, {160, 5}, {65, 5}, {94, 6}, {76, 6}, {156, 5}, {70, 6}, {152, 5}, {148, 5}, {165, 6}, {64, 4}, {92, 6}, {74, 6}, {155, 4}, {68, 6}, {151, 4}, {147, 4}, {161, 6}, {66, 6}, {150, 3}, {146, 3}, {157, 6}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {0, 4}, {0, 5}, {0, 5}, {0, 4}, {0, 5}, {0, 4}, {0, 4}, {0, 5}, {0, 5}, {0, 3}, {0, 3}, {0, 5}, {0, 2}, {0, 5}, {0, 5}, {0, 0}, {0, 4}, {0, 3}, {0, 3}, {0, 4}, {0, 2}, {0, 4}, {0, 4}, {0, 0}, {0, 2}, {0, 3}, {0, 3}, {0, 0}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {48, 8}, {82, 6}, {160, 5}, {4, 7}, {40, 8}, {24, 8}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {36, 8}, {20, 8}, {121, 7}, {12, 8}, {56, 9}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {136, 8}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {34, 8}, {18, 8}, {119, 7}, {10, 8}, {52, 9}, {83, 7}, {160, 5}, {6, 8}, {44, 9}, {28, 9}, {130, 8}, {71, 7}, {112, 8}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {124, 8}, {69, 7}, {106, 8}, {88, 8}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {33, 8}, {17, 8}, {118, 6}, {9, 8}, {50, 9}, {82, 6}, {160, 5}, {5, 8}, {42, 9}, {26, 9}, {128, 8}, {70, 6}, {110, 8}, {148, 5}, {165, 6}, {3, 8}, {38, 9}, {22, 9}, {122, 8}, {14, 9}, {60, 10}, {86, 8}, {161, 6}, {66, 6}, {98, 8}, {80, 8}, {139, 9}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {120, 8}, {67, 5}, {102, 8}, {84, 8}, {160, 5}, {65, 5}, {96, 8}, {78, 8}, {133, 9}, {72, 8}, {115, 9}, {148, 5}, {167, 8}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, {163, 8}, {145, 2}, {150, 3}, {146, 3}, {159, 8}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {49, 9}, {82, 6}, {160, 5}, {4, 7}, {41, 9}, {25, 9}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {37, 9}, {21, 9}, {121, 7}, {13, 9}, {58, 10}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {137, 9}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {35, 9}, {19, 9}, {119, 7}, {11, 9}, {54, 10}, {83, 7}, {160, 5}, {7, 9}, {46, 10}, {30, 10}, {131, 9}, {71, 7}, {113, 9}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {125, 9}, {69, 7}, {107, 9}, {89, 9}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {91, 5}, {73, 5}, {118, 6}, {67, 5}, {100, 6}, {82, 6}, {160, 5}, {65, 5}, {94, 6}, {76, 6}, {129, 9}, {70, 6}, {111, 9}, {148, 5}, {165, 6}, {64, 4}, {92, 6}, {74, 6}, {123, 9}, {68, 6}, {105, 9}, {87, 9}, {161, 6}, {66, 6}, {99, 9}, {81, 9}, {142, 10}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {155, 4}, {67, 5}, {151, 4}, {147, 4}, {160, 5}, {65, 5}, {150, 3}, {146, 3}, {156, 5}, {145, 2}, {152, 5}, {148, 5}, {168, 9}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, {164, 9}, {0, 2}, {0, 3}, {0, 3}, {0, 0}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {48, 8}, {82, 6}, {160, 5}, {4, 7}, {40, 8}, {24, 8}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {36, 8}, {20, 8}, {121, 7}, {12, 8}, {57, 10}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {136, 8}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {34, 8}, {18, 8}, {119, 7}, {10, 8}, {53, 10}, {83, 7}, {160, 5}, {6, 8}, {45, 10}, {29, 10}, {130, 8}, {71, 7}, {112, 8}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {124, 8}, {69, 7}, {106, 8}, {88, 8}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {33, 8}, {17, 8}, {118, 6}, {9, 8}, {51, 10}, {82, 6}, {160, 5}, {5, 8}, {43, 10}, {27, 10}, {128, 8}, {70, 6}, {110, 8}, {148, 5}, {165, 6}, {3, 8}, {39, 10}, {23, 10}, {122, 8}, {15, 10}, {63, 12}, {86, 8}, {161, 6}, {66, 6}, {98, 8}, {80, 8}, {140, 10}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {120, 8}, {67, 5}, {102, 8}, {84, 8}, {160, 5}, {65, 5}, {96, 8}, {78, 8}, {134, 10}, {72, 8}, {116, 10}, {148, 5}, {167, 8}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, {163, 8}, {145, 2}, {150, 3}, {146, 3}, {159, 8}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {100, 6}, {82, 6}, {160, 5}, {4, 7}, {94, 6}, {76, 6}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {92, 6}, {74, 6}, {121, 7}, {68, 6}, {103, 7}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {138, 10}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {91, 5}, {73, 5}, {119, 7}, {67, 5}, {101, 7}, {83, 7}, {160, 5}, {65, 5}, {95, 7}, {77, 7}, {132, 10}, {71, 7}, {114, 10}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {126, 10}, {69, 7}, {108, 10}, {90, 10}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {91, 5}, {73, 5}, {118, 6}, {67, 5}, {100, 6}, {82, 6}, {160, 5}, {65, 5}, {94, 6}, {76, 6}, {156, 5}, {70, 6}, {152, 5}, {148, 5}, {165, 6}, {64, 4}, {92, 6}, {74, 6}, {155, 4}, {68, 6}, {151, 4}, {147, 4}, {161, 6}, {66, 6}, {150, 3}, {146, 3}, {157, 6}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {155, 4}, {67, 5}, {151, 4}, {147, 4}, {160, 5}, {65, 5}, {150, 3}, {146, 3}, {156, 5}, {145, 2}, {152, 5}, {148, 5}, {169, 10}, {0, 4}, {0, 3}, {0, 3}, {0, 4}, {0, 2}, {0, 4}, {0, 4}, {0, 0}, {0, 2}, {0, 3}, {0, 3}, {0, 0}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {48, 8}, {82, 6}, {160, 5}, {4, 7}, {40, 8}, {24, 8}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {36, 8}, {20, 8}, {121, 7}, {12, 8}, {56, 9}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {136, 8}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {34, 8}, {18, 8}, {119, 7}, {10, 8}, {52, 9}, {83, 7}, {160, 5}, {6, 8}, {44, 9}, {28, 9}, {130, 8}, {71, 7}, {112, 8}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {124, 8}, {69, 7}, {106, 8}, {88, 8}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {33, 8}, {17, 8}, {118, 6}, {9, 8}, {50, 9}, {82, 6}, {160, 5}, {5, 8}, {42, 9}, {26, 9}, {128, 8}, {70, 6}, {110, 8}, {148, 5}, {165, 6}, {3, 8}, {38, 9}, {22, 9}, {122, 8}, {14, 9}, {104, 8}, {86, 8}, {161, 6}, {66, 6}, {98, 8}, {80, 8}, {139, 9}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {120, 8}, {67, 5}, {102, 8}, {84, 8}, {160, 5}, {65, 5}, {96, 8}, {78, 8}, {133, 9}, {72, 8}, {115, 9}, {148, 5}, {167, 8}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, {163, 8}, {145, 2}, {150, 3}, {146, 3}, {159, 8}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {49, 9}, {82, 6}, {160, 5}, {4, 7}, {41, 9}, {25, 9}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {37, 9}, {21, 9}, {121, 7}, {13, 9}, {103, 7}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {137, 9}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {35, 9}, {19, 9}, {119, 7}, {11, 9}, {101, 7}, {83, 7}, {160, 5}, {7, 9}, {95, 7}, {77, 7}, {131, 9}, {71, 7}, {113, 9}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {125, 9}, {69, 7}, {107, 9}, {89, 9}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {91, 5}, {73, 5}, {118, 6}, {67, 5}, {100, 6}, {82, 6}, {160, 5}, {65, 5}, {94, 6}, {76, 6}, {129, 9}, {70, 6}, {111, 9}, {148, 5}, {165, 6}, {64, 4}, {92, 6}, {74, 6}, {123, 9}, {68, 6}, {105, 9}, {87, 9}, {161, 6}, {66, 6}, {99, 9}, {81, 9}, {144, 12}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {155, 4}, {67, 5}, {151, 4}, {147, 4}, {160, 5}, {65, 5}, {150, 3}, {146, 3}, {156, 5}, {145, 2}, {152, 5}, {148, 5}, {168, 9}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, {164, 9}, {0, 2}, {0, 3}, {0, 3}, {0, 0}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {48, 8}, {82, 6}, {160, 5}, {4, 7}, {40, 8}, {24, 8}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {36, 8}, {20, 8}, {121, 7}, {12, 8}, {103, 7}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {136, 8}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {34, 8}, {18, 8}, {119, 7}, {10, 8}, {101, 7}, {83, 7}, {160, 5}, {6, 8}, {95, 7}, {77, 7}, {130, 8}, {71, 7}, {112, 8}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {124, 8}, {69, 7}, {106, 8}, {88, 8}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {33, 8}, {17, 8}, {118, 6}, {9, 8}, {100, 6}, {82, 6}, {160, 5}, {5, 8}, {94, 6}, {76, 6}, {128, 8}, {70, 6}, {110, 8}, {148, 5}, {165, 6}, {3, 8}, {92, 6}, {74, 6}, {122, 8}, {68, 6}, {104, 8}, {86, 8}, {161, 6}, {66, 6}, {98, 8}, {80, 8}, {157, 6}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {120, 8}, {67, 5}, {102, 8}, {84, 8}, {160, 5}, {65, 5}, {96, 8}, {78, 8}, {156, 5}, {72, 8}, {152, 5}, {148, 5}, {167, 8}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, {163, 8}, {145, 2}, {150, 3}, {146, 3}, {159, 8}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {100, 6}, {82, 6}, {160, 5}, {4, 7}, {94, 6}, {76, 6}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {92, 6}, {74, 6}, {121, 7}, {68, 6}, {103, 7}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {157, 6}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {91, 5}, {73, 5}, {119, 7}, {67, 5}, {101, 7}, {83, 7}, {160, 5}, {65, 5}, {95, 7}, {77, 7}, {156, 5}, {71, 7}, {152, 5}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {155, 4}, {69, 7}, {151, 4}, {147, 4}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {0, 5}, {0, 5}, {0, 6}, {0, 5}, {0, 6}, {0, 6}, {0, 5}, {0, 5}, {0, 6}, {0, 6}, {0, 5}, {0, 6}, {0, 5}, {0, 5}, {0, 6}, {0, 4}, {0, 6}, {0, 6}, {0, 4}, {0, 6}, {0, 4}, {0, 4}, {0, 6}, {0, 6}, {0, 3}, {0, 3}, {0, 6}, {0, 2}, {0, 6}, {0, 6}, {0, 0}, {0, 4}, {0, 5}, {0, 5}, {0, 4}, {0, 5}, {0, 4}, {0, 4}, {0, 5}, {0, 5}, {0, 3}, {0, 3}, {0, 5}, {0, 2}, {0, 5}, {0, 5}, {0, 0}, {0, 4}, {0, 3}, {0, 3}, {0, 4}, {0, 2}, {0, 4}, {0, 4}, {0, 0}, {0, 2}, {0, 3}, {0, 3}, {0, 0}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {48, 8}, {82, 6}, {160, 5}, {4, 7}, {40, 8}, {24, 8}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {36, 8}, {20, 8}, {121, 7}, {12, 8}, {56, 9}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {136, 8}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {34, 8}, {18, 8}, {119, 7}, {10, 8}, {52, 9}, {83, 7}, {160, 5}, {6, 8}, {44, 9}, {28, 9}, {130, 8}, {71, 7}, {112, 8}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {124, 8}, {69, 7}, {106, 8}, {88, 8}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {33, 8}, {17, 8}, {118, 6}, {9, 8}, {50, 9}, {82, 6}, {160, 5}, {5, 8}, {42, 9}, {26, 9}, {128, 8}, {70, 6}, {110, 8}, {148, 5}, {165, 6}, {3, 8}, {38, 9}, {22, 9}, {122, 8}, {14, 9}, {60, 10}, {86, 8}, {161, 6}, {66, 6}, {98, 8}, {80, 8}, {139, 9}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {120, 8}, {67, 5}, {102, 8}, {84, 8}, {160, 5}, {65, 5}, {96, 8}, {78, 8}, {133, 9}, {72, 8}, {115, 9}, {148, 5}, {167, 8}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, {163, 8}, {145, 2}, {150, 3}, {146, 3}, {159, 8}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {49, 9}, {82, 6}, {160, 5}, {4, 7}, {41, 9}, {25, 9}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {37, 9}, {21, 9}, {121, 7}, {13, 9}, {58, 10}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {137, 9}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {35, 9}, {19, 9}, {119, 7}, {11, 9}, {54, 10}, {83, 7}, {160, 5}, {7, 9}, {46, 10}, {30, 10}, {131, 9}, {71, 7}, {113, 9}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {125, 9}, {69, 7}, {107, 9}, {89, 9}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {91, 5}, {73, 5}, {118, 6}, {67, 5}, {100, 6}, {82, 6}, {160, 5}, {65, 5}, {94, 6}, {76, 6}, {129, 9}, {70, 6}, {111, 9}, {148, 5}, {165, 6}, {64, 4}, {92, 6}, {74, 6}, {123, 9}, {68, 6}, {105, 9}, {87, 9}, {161, 6}, {66, 6}, {99, 9}, {81, 9}, {142, 10}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {155, 4}, {67, 5}, {151, 4}, {147, 4}, {160, 5}, {65, 5}, {150, 3}, {146, 3}, {156, 5}, {145, 2}, {152, 5}, {148, 5}, {168, 9}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, {164, 9}, {0, 2}, {0, 3}, {0, 3}, {0, 0}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {48, 8}, {82, 6}, {160, 5}, {4, 7}, {40, 8}, {24, 8}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {36, 8}, {20, 8}, {121, 7}, {12, 8}, {57, 10}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {136, 8}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {34, 8}, {18, 8}, {119, 7}, {10, 8}, {53, 10}, {83, 7}, {160, 5}, {6, 8}, {45, 10}, {29, 10}, {130, 8}, {71, 7}, {112, 8}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {124, 8}, {69, 7}, {106, 8}, {88, 8}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {33, 8}, {17, 8}, {118, 6}, {9, 8}, {51, 10}, {82, 6}, {160, 5}, {5, 8}, {43, 10}, {27, 10}, {128, 8}, {70, 6}, {110, 8}, {148, 5}, {165, 6}, {3, 8}, {39, 10}, {23, 10}, {122, 8}, {15, 10}, {62, 11}, {86, 8}, {161, 6}, {66, 6}, {98, 8}, {80, 8}, {140, 10}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {120, 8}, {67, 5}, {102, 8}, {84, 8}, {160, 5}, {65, 5}, {96, 8}, {78, 8}, {134, 10}, {72, 8}, {116, 10}, {148, 5}, {167, 8}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, {163, 8}, {145, 2}, {150, 3}, {146, 3}, {159, 8}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {100, 6}, {82, 6}, {160, 5}, {4, 7}, {94, 6}, {76, 6}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {92, 6}, {74, 6}, {121, 7}, {68, 6}, {103, 7}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {138, 10}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {91, 5}, {73, 5}, {119, 7}, {67, 5}, {101, 7}, {83, 7}, {160, 5}, {65, 5}, {95, 7}, {77, 7}, {132, 10}, {71, 7}, {114, 10}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {126, 10}, {69, 7}, {108, 10}, {90, 10}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {91, 5}, {73, 5}, {118, 6}, {67, 5}, {100, 6}, {82, 6}, {160, 5}, {65, 5}, {94, 6}, {76, 6}, {156, 5}, {70, 6}, {152, 5}, {148, 5}, {165, 6}, {64, 4}, {92, 6}, {74, 6}, {155, 4}, {68, 6}, {151, 4}, {147, 4}, {161, 6}, {66, 6}, {150, 3}, {146, 3}, {157, 6}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {155, 4}, {67, 5}, {151, 4}, {147, 4}, {160, 5}, {65, 5}, {150, 3}, {146, 3}, {156, 5}, {145, 2}, {152, 5}, {148, 5}, {169, 10}, {0, 4}, {0, 3}, {0, 3}, {0, 4}, {0, 2}, {0, 4}, {0, 4}, {0, 0}, {0, 2}, {0, 3}, {0, 3}, {0, 0}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {48, 8}, {82, 6}, {160, 5}, {4, 7}, {40, 8}, {24, 8}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {36, 8}, {20, 8}, {121, 7}, {12, 8}, {56, 9}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {136, 8}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {34, 8}, {18, 8}, {119, 7}, {10, 8}, {52, 9}, {83, 7}, {160, 5}, {6, 8}, {44, 9}, {28, 9}, {130, 8}, {71, 7}, {112, 8}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {124, 8}, {69, 7}, {106, 8}, {88, 8}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {33, 8}, {17, 8}, {118, 6}, {9, 8}, {50, 9}, {82, 6}, {160, 5}, {5, 8}, {42, 9}, {26, 9}, {128, 8}, {70, 6}, {110, 8}, {148, 5}, {165, 6}, {3, 8}, {38, 9}, {22, 9}, {122, 8}, {14, 9}, {61, 11}, {86, 8}, {161, 6}, {66, 6}, {98, 8}, {80, 8}, {139, 9}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {120, 8}, {67, 5}, {102, 8}, {84, 8}, {160, 5}, {65, 5}, {96, 8}, {78, 8}, {133, 9}, {72, 8}, {115, 9}, {148, 5}, {167, 8}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, {163, 8}, {145, 2}, {150, 3}, {146, 3}, {159, 8}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {49, 9}, {82, 6}, {160, 5}, {4, 7}, {41, 9}, {25, 9}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {37, 9}, {21, 9}, {121, 7}, {13, 9}, {59, 11}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {137, 9}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {35, 9}, {19, 9}, {119, 7}, {11, 9}, {55, 11}, {83, 7}, {160, 5}, {7, 9}, {47, 11}, {31, 11}, {131, 9}, {71, 7}, {113, 9}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {125, 9}, {69, 7}, {107, 9}, {89, 9}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {91, 5}, {73, 5}, {118, 6}, {67, 5}, {100, 6}, {82, 6}, {160, 5}, {65, 5}, {94, 6}, {76, 6}, {129, 9}, {70, 6}, {111, 9}, {148, 5}, {165, 6}, {64, 4}, {92, 6}, {74, 6}, {123, 9}, {68, 6}, {105, 9}, {87, 9}, {161, 6}, {66, 6}, {99, 9}, {81, 9}, {143, 11}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {155, 4}, {67, 5}, {151, 4}, {147, 4}, {160, 5}, {65, 5}, {150, 3}, {146, 3}, {156, 5}, {145, 2}, {152, 5}, {148, 5}, {168, 9}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, {164, 9}, {0, 2}, {0, 3}, {0, 3}, {0, 0}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {48, 8}, {82, 6}, {160, 5}, {4, 7}, {40, 8}, {24, 8}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {36, 8}, {20, 8}, {121, 7}, {12, 8}, {103, 7}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {136, 8}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {34, 8}, {18, 8}, {119, 7}, {10, 8}, {101, 7}, {83, 7}, {160, 5}, {6, 8}, {95, 7}, {77, 7}, {130, 8}, {71, 7}, {112, 8}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {124, 8}, {69, 7}, {106, 8}, {88, 8}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {33, 8}, {17, 8}, {118, 6}, {9, 8}, {100, 6}, {82, 6}, {160, 5}, {5, 8}, {94, 6}, {76, 6}, {128, 8}, {70, 6}, {110, 8}, {148, 5}, {165, 6}, {3, 8}, {92, 6}, {74, 6}, {122, 8}, {68, 6}, {104, 8}, {86, 8}, {161, 6}, {66, 6}, {98, 8}, {80, 8}, {141, 11}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {120, 8}, {67, 5}, {102, 8}, {84, 8}, {160, 5}, {65, 5}, {96, 8}, {78, 8}, {135, 11}, {72, 8}, {117, 11}, {148, 5}, {167, 8}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, {163, 8}, {145, 2}, {150, 3}, {146, 3}, {159, 8}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {100, 6}, {82, 6}, {160, 5}, {4, 7}, {94, 6}, {76, 6}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {92, 6}, {74, 6}, {121, 7}, {68, 6}, {103, 7}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {157, 6}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {91, 5}, {73, 5}, {119, 7}, {67, 5}, {101, 7}, {83, 7}, {160, 5}, {65, 5}, {95, 7}, {77, 7}, {156, 5}, {71, 7}, {152, 5}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {155, 4}, {69, 7}, {151, 4}, {147, 4}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {91, 5}, {73, 5}, {118, 6}, {67, 5}, {100, 6}, {82, 6}, {160, 5}, {65, 5}, {94, 6}, {76, 6}, {156, 5}, {70, 6}, {152, 5}, {148, 5}, {165, 6}, {64, 4}, {92, 6}, {74, 6}, {155, 4}, {68, 6}, {151, 4}, {147, 4}, {161, 6}, {66, 6}, {150, 3}, {146, 3}, {157, 6}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {0, 4}, {0, 5}, {0, 5}, {0, 4}, {0, 5}, {0, 4}, {0, 4}, {0, 5}, {0, 5}, {0, 3}, {0, 3}, {0, 5}, {0, 2}, {0, 5}, {0, 5}, {0, 0}, {0, 4}, {0, 3}, {0, 3}, {0, 4}, {0, 2}, {0, 4}, {0, 4}, {0, 0}, {0, 2}, {0, 3}, {0, 3}, {0, 0}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {48, 8}, {82, 6}, {160, 5}, {4, 7}, {40, 8}, {24, 8}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {36, 8}, {20, 8}, {121, 7}, {12, 8}, {56, 9}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {136, 8}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {34, 8}, {18, 8}, {119, 7}, {10, 8}, {52, 9}, {83, 7}, {160, 5}, {6, 8}, {44, 9}, {28, 9}, {130, 8}, {71, 7}, {112, 8}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {124, 8}, {69, 7}, {106, 8}, {88, 8}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {33, 8}, {17, 8}, {118, 6}, {9, 8}, {50, 9}, {82, 6}, {160, 5}, {5, 8}, {42, 9}, {26, 9}, {128, 8}, {70, 6}, {110, 8}, {148, 5}, {165, 6}, {3, 8}, {38, 9}, {22, 9}, {122, 8}, {14, 9}, {60, 10}, {86, 8}, {161, 6}, {66, 6}, {98, 8}, {80, 8}, {139, 9}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {120, 8}, {67, 5}, {102, 8}, {84, 8}, {160, 5}, {65, 5}, {96, 8}, {78, 8}, {133, 9}, {72, 8}, {115, 9}, {148, 5}, {167, 8}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, {163, 8}, {145, 2}, {150, 3}, {146, 3}, {159, 8}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {49, 9}, {82, 6}, {160, 5}, {4, 7}, {41, 9}, {25, 9}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {37, 9}, {21, 9}, {121, 7}, {13, 9}, {58, 10}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {137, 9}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {35, 9}, {19, 9}, {119, 7}, {11, 9}, {54, 10}, {83, 7}, {160, 5}, {7, 9}, {46, 10}, {30, 10}, {131, 9}, {71, 7}, {113, 9}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {125, 9}, {69, 7}, {107, 9}, {89, 9}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {91, 5}, {73, 5}, {118, 6}, {67, 5}, {100, 6}, {82, 6}, {160, 5}, {65, 5}, {94, 6}, {76, 6}, {129, 9}, {70, 6}, {111, 9}, {148, 5}, {165, 6}, {64, 4}, {92, 6}, {74, 6}, {123, 9}, {68, 6}, {105, 9}, {87, 9}, {161, 6}, {66, 6}, {99, 9}, {81, 9}, {142, 10}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {155, 4}, {67, 5}, {151, 4}, {147, 4}, {160, 5}, {65, 5}, {150, 3}, {146, 3}, {156, 5}, {145, 2}, {152, 5}, {148, 5}, {168, 9}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, {164, 9}, {0, 2}, {0, 3}, {0, 3}, {0, 0}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {48, 8}, {82, 6}, {160, 5}, {4, 7}, {40, 8}, {24, 8}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {36, 8}, {20, 8}, {121, 7}, {12, 8}, {57, 10}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {136, 8}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {34, 8}, {18, 8}, {119, 7}, {10, 8}, {53, 10}, {83, 7}, {160, 5}, {6, 8}, {45, 10}, {29, 10}, {130, 8}, {71, 7}, {112, 8}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {124, 8}, {69, 7}, {106, 8}, {88, 8}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {33, 8}, {17, 8}, {118, 6}, {9, 8}, {51, 10}, {82, 6}, {160, 5}, {5, 8}, {43, 10}, {27, 10}, {128, 8}, {70, 6}, {110, 8}, {148, 5}, {165, 6}, {3, 8}, {39, 10}, {23, 10}, {122, 8}, {15, 10}, {104, 8}, {86, 8}, {161, 6}, {66, 6}, {98, 8}, {80, 8}, {140, 10}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {120, 8}, {67, 5}, {102, 8}, {84, 8}, {160, 5}, {65, 5}, {96, 8}, {78, 8}, {134, 10}, {72, 8}, {116, 10}, {148, 5}, {167, 8}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, {163, 8}, {145, 2}, {150, 3}, {146, 3}, {159, 8}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {100, 6}, {82, 6}, {160, 5}, {4, 7}, {94, 6}, {76, 6}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {92, 6}, {74, 6}, {121, 7}, {68, 6}, {103, 7}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {138, 10}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {91, 5}, {73, 5}, {119, 7}, {67, 5}, {101, 7}, {83, 7}, {160, 5}, {65, 5}, {95, 7}, {77, 7}, {132, 10}, {71, 7}, {114, 10}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {126, 10}, {69, 7}, {108, 10}, {90, 10}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {91, 5}, {73, 5}, {118, 6}, {67, 5}, {100, 6}, {82, 6}, {160, 5}, {65, 5}, {94, 6}, {76, 6}, {156, 5}, {70, 6}, {152, 5}, {148, 5}, {165, 6}, {64, 4}, {92, 6}, {74, 6}, {155, 4}, {68, 6}, {151, 4}, {147, 4}, {161, 6}, {66, 6}, {150, 3}, {146, 3}, {157, 6}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {155, 4}, {67, 5}, {151, 4}, {147, 4}, {160, 5}, {65, 5}, {150, 3}, {146, 3}, {156, 5}, {145, 2}, {152, 5}, {148, 5}, {169, 10}, {0, 4}, {0, 3}, {0, 3}, {0, 4}, {0, 2}, {0, 4}, {0, 4}, {0, 0}, {0, 2}, {0, 3}, {0, 3}, {0, 0}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {48, 8}, {82, 6}, {160, 5}, {4, 7}, {40, 8}, {24, 8}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {36, 8}, {20, 8}, {121, 7}, {12, 8}, {56, 9}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {136, 8}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {34, 8}, {18, 8}, {119, 7}, {10, 8}, {52, 9}, {83, 7}, {160, 5}, {6, 8}, {44, 9}, {28, 9}, {130, 8}, {71, 7}, {112, 8}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {124, 8}, {69, 7}, {106, 8}, {88, 8}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {33, 8}, {17, 8}, {118, 6}, {9, 8}, {50, 9}, {82, 6}, {160, 5}, {5, 8}, {42, 9}, {26, 9}, {128, 8}, {70, 6}, {110, 8}, {148, 5}, {165, 6}, {3, 8}, {38, 9}, {22, 9}, {122, 8}, {14, 9}, {104, 8}, {86, 8}, {161, 6}, {66, 6}, {98, 8}, {80, 8}, {139, 9}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {120, 8}, {67, 5}, {102, 8}, {84, 8}, {160, 5}, {65, 5}, {96, 8}, {78, 8}, {133, 9}, {72, 8}, {115, 9}, {148, 5}, {167, 8}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, {163, 8}, {145, 2}, {150, 3}, {146, 3}, {159, 8}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {49, 9}, {82, 6}, {160, 5}, {4, 7}, {41, 9}, {25, 9}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {37, 9}, {21, 9}, {121, 7}, {13, 9}, {103, 7}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {137, 9}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {35, 9}, {19, 9}, {119, 7}, {11, 9}, {101, 7}, {83, 7}, {160, 5}, {7, 9}, {95, 7}, {77, 7}, {131, 9}, {71, 7}, {113, 9}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {125, 9}, {69, 7}, {107, 9}, {89, 9}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {91, 5}, {73, 5}, {118, 6}, {67, 5}, {100, 6}, {82, 6}, {160, 5}, {65, 5}, {94, 6}, {76, 6}, {129, 9}, {70, 6}, {111, 9}, {148, 5}, {165, 6}, {64, 4}, {92, 6}, {74, 6}, {123, 9}, {68, 6}, {105, 9}, {87, 9}, {161, 6}, {66, 6}, {99, 9}, {81, 9}, {157, 6}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {155, 4}, {67, 5}, {151, 4}, {147, 4}, {160, 5}, {65, 5}, {150, 3}, {146, 3}, {156, 5}, {145, 2}, {152, 5}, {148, 5}, {168, 9}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, {164, 9}, {0, 2}, {0, 3}, {0, 3}, {0, 0}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {32, 7}, {16, 7}, {118, 6}, {8, 7}, {48, 8}, {82, 6}, {160, 5}, {4, 7}, {40, 8}, {24, 8}, {127, 7}, {70, 6}, {109, 7}, {148, 5}, {165, 6}, {2, 7}, {36, 8}, {20, 8}, {121, 7}, {12, 8}, {103, 7}, {85, 7}, {161, 6}, {66, 6}, {97, 7}, {79, 7}, {136, 8}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {1, 7}, {34, 8}, {18, 8}, {119, 7}, {10, 8}, {101, 7}, {83, 7}, {160, 5}, {6, 8}, {95, 7}, {77, 7}, {130, 8}, {71, 7}, {112, 8}, {148, 5}, {166, 7}, {64, 4}, {93, 7}, {75, 7}, {124, 8}, {69, 7}, {106, 8}, {88, 8}, {162, 7}, {145, 2}, {150, 3}, {146, 3}, {158, 7}, {145, 2}, {154, 7}, {0, 0}, {0, 0}, {0, 6}, {33, 8}, {17, 8}, {118, 6}, {9, 8}, {100, 6}, {82, 6}, {160, 5}, {5, 8}, {94, 6}, {76, 6}, {128, 8}, {70, 6}, {110, 8}, {148, 5}, {165, 6}, {3, 8}, {92, 6}, {74, 6}, {122, 8}, {68, 6}, {104, 8}, {86, 8}, {161, 6}, {66, 6}, {98, 8}, {80, 8}, {157, 6}, {145, 2}, {153, 6}, {149, 6}, {0, 0}, {64, 4}, {91, 5}, {73, 5}, {120, 8}, {67, 5}, {102, 8}, {84, 8}, {160, 5}, {65, 5}, {96, 8}, {78, 8}, {156, 5}, {72, 8}, {152, 5}, {148, 5}, {167, 8}, {64, 4}, {150, 3}, {146, 3}, {155, 4}, {145, 2}, {151, 4}, {147, 4}, {163, 8}, {145, 2}, {150, 3}, {146, 3}, {159, 8}, {0, 2}, {0, 0}, {0, 0}, {0, 0}, {0, 6}, {0, 7}, {0, 7}, {0, 6}, {0, 7}, {0, 6}, {0, 6}, {0, 5}, {0, 7}, {0, 6}, {0, 6}, {0, 7}, {0, 6}, {0, 7}, {0, 5}, {0, 6}, {0, 7}, {0, 6}, {0, 6}, {0, 7}, {0, 6}, {0, 7}, {0, 7}, {0, 6}, {0, 6}, {0, 7}, {0, 7}, {0, 6}, {0, 2}, {0, 6}, {0, 6}, {0, 0}, {0, 7}, {0, 5}, {0, 5}, {0, 7}, {0, 5}, {0, 7}, {0, 7}, {0, 5}, {0, 5}, {0, 7}, {0, 7}, {0, 5}, {0, 7}, {0, 5}, {0, 5}, {0, 7}, {0, 4}, {0, 7}, {0, 7}, {0, 4}, {0, 7}, {0, 4}, {0, 4}, {0, 7}, {0, 2}, {0, 3}, {0, 3}, {0, 7}, {0, 2}, {0, 7}, {0, 0}, {0, 0}, {0, 6}, {0, 5}, {0, 5}, {0, 6}, {0, 5}, {0, 6}, {0, 6}, {0, 5}, {0, 5}, {0, 6}, {0, 6}, {0, 5}, {0, 6}, {0, 5}, {0, 5}, {0, 6}, {0, 4}, {0, 6}, {0, 6}, {0, 4}, {0, 6}, {0, 4}, {0, 4}, {0, 6}, {0, 6}, {0, 3}, {0, 3}, {0, 6}, {0, 2}, {0, 6}, {0, 6}, {0, 0}, {0, 4}, {0, 5}, {0, 5}, {0, 4}, {0, 5}, {0, 4}, {0, 4}, {0, 5}, {0, 5}, {0, 3}, {0, 3}, {0, 5}, {0, 2}, {0, 5}, {0, 5}, {0, 0}, {0, 4}, {0, 3}, {0, 3}, {0, 4}, {0, 2}, {0, 4}, {0, 4}, {0, 0}, {0, 2}, {0, 3}, {0, 3}, {0, 0}, {0, 2}, {0, 0}, {0, 0}, {0, 0} }; static const int8_t vectorsrawbytes[] ALIGNED(0x1000) = { 0, -1, 4, -1, 1, -1, 5, -1, 2, -1, -1, -1, 3, -1, -1, -1, // 0 0, -1, 4, -1, 1, -1, 5, 6, 2, -1, -1, -1, 3, -1, -1, -1, // 1 0, -1, 4, 5, 1, -1, 6, -1, 2, -1, -1, -1, 3, -1, -1, -1, // 2 0, -1, 4, 5, 1, -1, 6, 7, 2, -1, -1, -1, 3, -1, -1, -1, // 3 0, -1, 5, -1, 1, -1, 6, -1, 2, -1, -1, -1, 3, 4, -1, -1, // 4 0, -1, 5, -1, 1, -1, 6, 7, 2, -1, -1, -1, 3, 4, -1, -1, // 5 0, -1, 5, 6, 1, -1, 7, -1, 2, -1, -1, -1, 3, 4, -1, -1, // 6 0, -1, 5, 6, 1, -1, 7, 8, 2, -1, -1, -1, 3, 4, -1, -1, // 7 0, -1, 5, -1, 1, -1, 6, -1, 2, 3, -1, -1, 4, -1, -1, -1, // 8 0, -1, 5, -1, 1, -1, 6, 7, 2, 3, -1, -1, 4, -1, -1, -1, // 9 0, -1, 5, 6, 1, -1, 7, -1, 2, 3, -1, -1, 4, -1, -1, -1, // 10 0, -1, 5, 6, 1, -1, 7, 8, 2, 3, -1, -1, 4, -1, -1, -1, // 11 0, -1, 6, -1, 1, -1, 7, -1, 2, 3, -1, -1, 4, 5, -1, -1, // 12 0, -1, 6, -1, 1, -1, 7, 8, 2, 3, -1, -1, 4, 5, -1, -1, // 13 0, -1, 6, 7, 1, -1, 8, -1, 2, 3, -1, -1, 4, 5, -1, -1, // 14 0, -1, 6, 7, 1, -1, 8, 9, 2, 3, -1, -1, 4, 5, -1, -1, // 15 0, -1, 5, -1, 1, 2, 6, -1, 3, -1, -1, -1, 4, -1, -1, -1, // 16 0, -1, 5, -1, 1, 2, 6, 7, 3, -1, -1, -1, 4, -1, -1, -1, // 17 0, -1, 5, 6, 1, 2, 7, -1, 3, -1, -1, -1, 4, -1, -1, -1, // 18 0, -1, 5, 6, 1, 2, 7, 8, 3, -1, -1, -1, 4, -1, -1, -1, // 19 0, -1, 6, -1, 1, 2, 7, -1, 3, -1, -1, -1, 4, 5, -1, -1, // 20 0, -1, 6, -1, 1, 2, 7, 8, 3, -1, -1, -1, 4, 5, -1, -1, // 21 0, -1, 6, 7, 1, 2, 8, -1, 3, -1, -1, -1, 4, 5, -1, -1, // 22 0, -1, 6, 7, 1, 2, 8, 9, 3, -1, -1, -1, 4, 5, -1, -1, // 23 0, -1, 6, -1, 1, 2, 7, -1, 3, 4, -1, -1, 5, -1, -1, -1, // 24 0, -1, 6, -1, 1, 2, 7, 8, 3, 4, -1, -1, 5, -1, -1, -1, // 25 0, -1, 6, 7, 1, 2, 8, -1, 3, 4, -1, -1, 5, -1, -1, -1, // 26 0, -1, 6, 7, 1, 2, 8, 9, 3, 4, -1, -1, 5, -1, -1, -1, // 27 0, -1, 7, -1, 1, 2, 8, -1, 3, 4, -1, -1, 5, 6, -1, -1, // 28 0, -1, 7, -1, 1, 2, 8, 9, 3, 4, -1, -1, 5, 6, -1, -1, // 29 0, -1, 7, 8, 1, 2, 9, -1, 3, 4, -1, -1, 5, 6, -1, -1, // 30 0, -1, 7, 8, 1, 2, 9, 10, 3, 4, -1, -1, 5, 6, -1, -1, // 31 0, 1, 5, -1, 2, -1, 6, -1, 3, -1, -1, -1, 4, -1, -1, -1, // 32 0, 1, 5, -1, 2, -1, 6, 7, 3, -1, -1, -1, 4, -1, -1, -1, // 33 0, 1, 5, 6, 2, -1, 7, -1, 3, -1, -1, -1, 4, -1, -1, -1, // 34 0, 1, 5, 6, 2, -1, 7, 8, 3, -1, -1, -1, 4, -1, -1, -1, // 35 0, 1, 6, -1, 2, -1, 7, -1, 3, -1, -1, -1, 4, 5, -1, -1, // 36 0, 1, 6, -1, 2, -1, 7, 8, 3, -1, -1, -1, 4, 5, -1, -1, // 37 0, 1, 6, 7, 2, -1, 8, -1, 3, -1, -1, -1, 4, 5, -1, -1, // 38 0, 1, 6, 7, 2, -1, 8, 9, 3, -1, -1, -1, 4, 5, -1, -1, // 39 0, 1, 6, -1, 2, -1, 7, -1, 3, 4, -1, -1, 5, -1, -1, -1, // 40 0, 1, 6, -1, 2, -1, 7, 8, 3, 4, -1, -1, 5, -1, -1, -1, // 41 0, 1, 6, 7, 2, -1, 8, -1, 3, 4, -1, -1, 5, -1, -1, -1, // 42 0, 1, 6, 7, 2, -1, 8, 9, 3, 4, -1, -1, 5, -1, -1, -1, // 43 0, 1, 7, -1, 2, -1, 8, -1, 3, 4, -1, -1, 5, 6, -1, -1, // 44 0, 1, 7, -1, 2, -1, 8, 9, 3, 4, -1, -1, 5, 6, -1, -1, // 45 0, 1, 7, 8, 2, -1, 9, -1, 3, 4, -1, -1, 5, 6, -1, -1, // 46 0, 1, 7, 8, 2, -1, 9, 10, 3, 4, -1, -1, 5, 6, -1, -1, // 47 0, 1, 6, -1, 2, 3, 7, -1, 4, -1, -1, -1, 5, -1, -1, -1, // 48 0, 1, 6, -1, 2, 3, 7, 8, 4, -1, -1, -1, 5, -1, -1, -1, // 49 0, 1, 6, 7, 2, 3, 8, -1, 4, -1, -1, -1, 5, -1, -1, -1, // 50 0, 1, 6, 7, 2, 3, 8, 9, 4, -1, -1, -1, 5, -1, -1, -1, // 51 0, 1, 7, -1, 2, 3, 8, -1, 4, -1, -1, -1, 5, 6, -1, -1, // 52 0, 1, 7, -1, 2, 3, 8, 9, 4, -1, -1, -1, 5, 6, -1, -1, // 53 0, 1, 7, 8, 2, 3, 9, -1, 4, -1, -1, -1, 5, 6, -1, -1, // 54 0, 1, 7, 8, 2, 3, 9, 10, 4, -1, -1, -1, 5, 6, -1, -1, // 55 0, 1, 7, -1, 2, 3, 8, -1, 4, 5, -1, -1, 6, -1, -1, -1, // 56 0, 1, 7, -1, 2, 3, 8, 9, 4, 5, -1, -1, 6, -1, -1, -1, // 57 0, 1, 7, 8, 2, 3, 9, -1, 4, 5, -1, -1, 6, -1, -1, -1, // 58 0, 1, 7, 8, 2, 3, 9, 10, 4, 5, -1, -1, 6, -1, -1, -1, // 59 0, 1, 8, -1, 2, 3, 9, -1, 4, 5, -1, -1, 6, 7, -1, -1, // 60 0, 1, 8, -1, 2, 3, 9, 10, 4, 5, -1, -1, 6, 7, -1, -1, // 61 0, 1, 8, 9, 2, 3, 10, -1, 4, 5, -1, -1, 6, 7, -1, -1, // 62 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, -1, -1, 6, 7, -1, -1, // 63 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, // 64 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, // 65 0, -1, -1, -1, 1, -1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, // 66 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, // 67 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, // 68 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, // 69 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, // 70 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, // 71 0, -1, -1, -1, 1, -1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, // 72 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, -1, -1, -1, // 73 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, 5, -1, -1, // 74 0, -1, -1, -1, 1, 2, -1, -1, 3, -1, -1, -1, 4, 5, 6, -1, // 75 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, -1, -1, -1, // 76 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, 6, -1, -1, // 77 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, -1, -1, 5, 6, 7, -1, // 78 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, -1, -1, -1, // 79 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, 7, -1, -1, // 80 0, -1, -1, -1, 1, 2, -1, -1, 3, 4, 5, -1, 6, 7, 8, -1, // 81 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, -1, -1, -1, // 82 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, 6, -1, -1, // 83 0, -1, -1, -1, 1, 2, 3, -1, 4, -1, -1, -1, 5, 6, 7, -1, // 84 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, -1, -1, -1, // 85 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, 7, -1, -1, // 86 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, -1, -1, 6, 7, 8, -1, // 87 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, -1, -1, -1, // 88 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, 8, -1, -1, // 89 0, -1, -1, -1, 1, 2, 3, -1, 4, 5, 6, -1, 7, 8, 9, -1, // 90 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, -1, -1, -1, // 91 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, 5, -1, -1, // 92 0, 1, -1, -1, 2, -1, -1, -1, 3, -1, -1, -1, 4, 5, 6, -1, // 93 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, -1, -1, -1, // 94 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, 6, -1, -1, // 95 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, -1, -1, 5, 6, 7, -1, // 96 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, -1, -1, -1, // 97 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, 7, -1, -1, // 98 0, 1, -1, -1, 2, -1, -1, -1, 3, 4, 5, -1, 6, 7, 8, -1, // 99 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, -1, -1, -1, // 100 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, 6, -1, -1, // 101 0, 1, -1, -1, 2, 3, -1, -1, 4, -1, -1, -1, 5, 6, 7, -1, // 102 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, -1, -1, -1, // 103 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, 7, -1, -1, // 104 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, -1, -1, 6, 7, 8, -1, // 105 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, -1, -1, -1, // 106 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, 8, -1, -1, // 107 0, 1, -1, -1, 2, 3, -1, -1, 4, 5, 6, -1, 7, 8, 9, -1, // 108 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, -1, -1, -1, // 109 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, 7, -1, -1, // 110 0, 1, -1, -1, 2, 3, 4, -1, 5, -1, -1, -1, 6, 7, 8, -1, // 111 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, -1, -1, -1, // 112 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, 8, -1, -1, // 113 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, -1, -1, 7, 8, 9, -1, // 114 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, -1, -1, -1, // 115 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, 9, -1, -1, // 116 0, 1, -1, -1, 2, 3, 4, -1, 5, 6, 7, -1, 8, 9, 10, -1, // 117 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, -1, -1, -1, // 118 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, 6, -1, -1, // 119 0, 1, 2, -1, 3, -1, -1, -1, 4, -1, -1, -1, 5, 6, 7, -1, // 120 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, -1, -1, -1, // 121 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, 7, -1, -1, // 122 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, -1, -1, 6, 7, 8, -1, // 123 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, -1, -1, -1, // 124 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, 8, -1, -1, // 125 0, 1, 2, -1, 3, -1, -1, -1, 4, 5, 6, -1, 7, 8, 9, -1, // 126 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, -1, -1, -1, // 127 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, 7, -1, -1, // 128 0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1, -1, 6, 7, 8, -1, // 129 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, -1, -1, -1, // 130 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, 8, -1, -1, // 131 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, -1, -1, 7, 8, 9, -1, // 132 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, -1, -1, -1, // 133 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, 9, -1, -1, // 134 0, 1, 2, -1, 3, 4, -1, -1, 5, 6, 7, -1, 8, 9, 10, -1, // 135 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, -1, -1, -1, // 136 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, 8, -1, -1, // 137 0, 1, 2, -1, 3, 4, 5, -1, 6, -1, -1, -1, 7, 8, 9, -1, // 138 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, -1, -1, -1, // 139 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, 9, -1, -1, // 140 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, -1, -1, 8, 9, 10, -1, // 141 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, -1, -1, -1, // 142 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, -1, -1, // 143 0, 1, 2, -1, 3, 4, 5, -1, 6, 7, 8, -1, 9, 10, 11, -1, // 144 -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, 1, // 145 -1, -1, -1, -1, -1, -1, -1, 0, 2, -1, -1, -1, -1, -1, -1, 1, // 146 -1, -1, -1, -1, -1, -1, -1, 0, 2, -1, 3, -1, -1, -1, -1, 1, // 147 -1, -1, -1, -1, -1, -1, -1, 0, 2, -1, 3, -1, 4, -1, -1, 1, // 148 -1, -1, -1, -1, -1, -1, -1, 0, 2, -1, 3, -1, 4, -1, 5, 1, // 149 1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, 2, // 150 1, -1, -1, -1, -1, -1, -1, 0, 3, -1, -1, -1, -1, -1, -1, 2, // 151 1, -1, -1, -1, -1, -1, -1, 0, 3, -1, 4, -1, -1, -1, -1, 2, // 152 1, -1, -1, -1, -1, -1, -1, 0, 3, -1, 4, -1, 5, -1, -1, 2, // 153 1, -1, -1, -1, -1, -1, -1, 0, 3, -1, 4, -1, 5, -1, 6, 2, // 154 1, -1, 2, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, 3, // 155 1, -1, 2, -1, -1, -1, -1, 0, 4, -1, -1, -1, -1, -1, -1, 3, // 156 1, -1, 2, -1, -1, -1, -1, 0, 4, -1, 5, -1, -1, -1, -1, 3, // 157 1, -1, 2, -1, -1, -1, -1, 0, 4, -1, 5, -1, 6, -1, -1, 3, // 158 1, -1, 2, -1, -1, -1, -1, 0, 4, -1, 5, -1, 6, -1, 7, 3, // 159 1, -1, 2, -1, 3, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, 4, // 160 1, -1, 2, -1, 3, -1, -1, 0, 5, -1, -1, -1, -1, -1, -1, 4, // 161 1, -1, 2, -1, 3, -1, -1, 0, 5, -1, 6, -1, -1, -1, -1, 4, // 162 1, -1, 2, -1, 3, -1, -1, 0, 5, -1, 6, -1, 7, -1, -1, 4, // 163 1, -1, 2, -1, 3, -1, -1, 0, 5, -1, 6, -1, 7, -1, 8, 4, // 164 1, -1, 2, -1, 3, -1, 4, 0, -1, -1, -1, -1, -1, -1, -1, 5, // 165 1, -1, 2, -1, 3, -1, 4, 0, 6, -1, -1, -1, -1, -1, -1, 5, // 166 1, -1, 2, -1, 3, -1, 4, 0, 6, -1, 7, -1, -1, -1, -1, 5, // 167 1, -1, 2, -1, 3, -1, 4, 0, 6, -1, 7, -1, 8, -1, -1, 5, // 168 1, -1, 2, -1, 3, -1, 4, 0, 6, -1, 7, -1, 8, -1, 9, 5, // 169 }; static const __m128i* vectors = (const __m128i*)vectorsrawbytes; static int read_int(const uint8_t* in, uint32_t* out) { *out = in[0] & 0x7F; if (in[0] < 128) { return 1; } *out = ((in[1] & 0x7FU) << 7) | *out; if (in[1] < 128) { return 2; } *out = ((in[2] & 0x7FU) << 14) | *out; if (in[2] < 128) { return 3; } *out = ((in[3] & 0x7FU) << 21) | *out; if (in[3] < 128) { return 4; } *out = ((in[4] & 0x7FU) << 28) | *out; return 5; } static inline int read_int_delta(const uint8_t* in, uint32_t* out, uint32_t* prev) { *out = in[0] & 0x7F; if (in[0] < 128) { *prev += *out; *out = *prev; return 1; } *out = ((in[1] & 0x7FU) << 7) | *out; if (in[1] < 128) { *prev += *out; *out = *prev; return 2; } *out = ((in[2] & 0x7FU) << 14) | *out; if (in[2] < 128) { *prev += *out; *out = *prev; return 3; } *out = ((in[3] & 0x7FU) << 21) | *out; if (in[3] < 128) { *prev += *out; *out = *prev; return 4; } *out = ((in[4] & 0x7FU) << 28) | *out; *prev += *out; *out = *prev; return 5; } static uint64_t masked_vbyte_read_group(const uint8_t* in, uint32_t* out, uint64_t mask, uint64_t* ints_read) { __m128i initial = _mm_lddqu_si128((const __m128i *) (in)); __m128i * mout = (__m128i *) out; if (!(mask & 0xFFFF)) { __m128i result = _mm_cvtepi8_epi32(initial); _mm_storeu_si128(mout, result); initial = _mm_srli_si128(initial, 4); result = _mm_cvtepi8_epi32(initial); _mm_storeu_si128(mout + 1, result); initial = _mm_srli_si128(initial, 4); result = _mm_cvtepi8_epi32(initial); _mm_storeu_si128(mout + 2, result); initial = _mm_srli_si128(initial, 4); result = _mm_cvtepi8_epi32(initial); _mm_storeu_si128(mout + 3, result); *ints_read = 16; return 16; } uint32_t low_12_bits = mask & 0xFFF; // combine index and bytes consumed into a single lookup index_bytes_consumed combined = combined_lookup[low_12_bits]; uint64_t consumed = combined.bytes_consumed; uint8_t index = combined.index; __m128i shuffle_vector = vectors[index]; if (index < 64) { *ints_read = 6; __m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector); __m128i low_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi16(0x007F)); __m128i high_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi16(0x7F00)); __m128i high_bytes_shifted = _mm_srli_epi16(high_bytes, 1); __m128i packed_result = _mm_or_si128(low_bytes, high_bytes_shifted); __m128i unpacked_result_a = _mm_and_si128(packed_result, _mm_set1_epi32(0x0000FFFF)); _mm_storeu_si128(mout, unpacked_result_a); __m128i unpacked_result_b = _mm_srli_epi32(packed_result, 16); _mm_storel_epi64(mout+1, unpacked_result_b); //_mm_storeu_si128(mout + 1, unpacked_result_b); // maybe faster to write 16 bytes? return consumed; } if (index < 145) { *ints_read = 4; __m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector); __m128i low_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi32(0x0000007F)); __m128i middle_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi32(0x00007F00)); __m128i high_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi32(0x007F0000)); __m128i middle_bytes_shifted = _mm_srli_epi32(middle_bytes, 1); __m128i high_bytes_shifted = _mm_srli_epi32(high_bytes, 2); __m128i low_middle = _mm_or_si128(low_bytes, middle_bytes_shifted); __m128i result = _mm_or_si128(low_middle, high_bytes_shifted); _mm_storeu_si128(mout, result); return consumed; } *ints_read = 2; __m128i data_bits = _mm_and_si128(initial, _mm_set1_epi8(0x7F)); __m128i bytes_to_decode = _mm_shuffle_epi8(data_bits, shuffle_vector); __m128i split_bytes = _mm_mullo_epi16(bytes_to_decode, _mm_setr_epi16(128, 64, 32, 16, 128, 64, 32, 16)); __m128i shifted_split_bytes = _mm_slli_epi64(split_bytes, 8); __m128i recombined = _mm_or_si128(split_bytes, shifted_split_bytes); __m128i low_byte = _mm_srli_epi64(bytes_to_decode, 56); __m128i result_evens = _mm_or_si128(recombined, low_byte); __m128i result = _mm_shuffle_epi8(result_evens, _mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1)); _mm_storel_epi64(mout, result); //_mm_storeu_si128(mout, result); // maybe faster to write 16 bytes? return consumed; } static inline __m128i PrefixSum(__m128i curr, __m128i prev) { __m128i Add = _mm_slli_si128(curr, 4); // Cycle 1: [- A B C] (already done) prev = _mm_shuffle_epi32(prev, 0xff); // Cycle 2: [P P P P] curr = _mm_add_epi32(curr, Add); // Cycle 2: [A AB BC CD] Add = _mm_slli_si128(curr, 8); // Cycle 3: [- - A AB] curr = _mm_add_epi32(curr, prev); // Cycle 3: [PA PAB PBC PCD] curr = _mm_add_epi32(curr, Add); // Cycle 4: [PA PAB PABC PABCD] return curr; } // only the first two ints of curr are meaningful, rest is garbage to beignored static inline __m128i PrefixSum2ints(__m128i curr, __m128i prev) { __m128i Add = _mm_slli_si128(curr, 4); // Cycle 1: [- A B G] (already done) prev = _mm_shuffle_epi32(prev, 0xff); // Cycle 2: [P P P P] curr = _mm_add_epi32(curr, Add); // Cycle 2: [A AB BG GG] curr = _mm_shuffle_epi32(curr, 0x54); //Cycle 3:[A AB AB AB] curr = _mm_add_epi32(curr, prev); // Cycle 4: [PA PAB PAB PAB] return curr; } static uint64_t masked_vbyte_read_group_delta(const uint8_t* in, uint32_t* out, uint64_t mask, uint64_t* ints_read, __m128i * prev) { __m128i initial = _mm_lddqu_si128((const __m128i *) (in)); __m128i * mout = (__m128i *) out; if (!(mask & 0xFFFF)) { __m128i result = _mm_cvtepi8_epi32(initial); *prev = PrefixSum(result, *prev); _mm_storeu_si128(mout, *prev); initial = _mm_srli_si128(initial, 4); result = _mm_cvtepi8_epi32(initial); *prev = PrefixSum(result, *prev); _mm_storeu_si128(mout + 1, *prev); initial = _mm_srli_si128(initial, 4); result = _mm_cvtepi8_epi32(initial); *prev = PrefixSum(result, *prev); _mm_storeu_si128(mout + 2, *prev); initial = _mm_srli_si128(initial, 4); result = _mm_cvtepi8_epi32(initial); *prev = PrefixSum(result, *prev); _mm_storeu_si128(mout + 3, *prev); *ints_read = 16; return 16; } uint32_t low_12_bits = mask & 0xFFF; // combine index and bytes consumed into a single lookup index_bytes_consumed combined = combined_lookup[low_12_bits]; uint64_t consumed = combined.bytes_consumed; uint8_t index = combined.index; __m128i shuffle_vector = vectors[index]; if (index < 64) { *ints_read = 6; __m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector); __m128i low_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi16(0x007F)); __m128i high_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi16(0x7F00)); __m128i high_bytes_shifted = _mm_srli_epi16(high_bytes, 1); __m128i packed_result = _mm_or_si128(low_bytes, high_bytes_shifted); __m128i unpacked_result_a = _mm_and_si128(packed_result, _mm_set1_epi32(0x0000FFFF)); *prev = PrefixSum(unpacked_result_a, *prev); _mm_storeu_si128(mout, *prev); __m128i unpacked_result_b = _mm_srli_epi32(packed_result, 16); *prev = PrefixSum2ints(unpacked_result_b, *prev); _mm_storel_epi64(mout + 1, *prev); return consumed; } if (index < 145) { *ints_read = 4; __m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector); __m128i low_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi32(0x0000007F)); __m128i middle_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi32(0x00007F00)); __m128i high_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi32(0x007F0000)); __m128i middle_bytes_shifted = _mm_srli_epi32(middle_bytes, 1); __m128i high_bytes_shifted = _mm_srli_epi32(high_bytes, 2); __m128i low_middle = _mm_or_si128(low_bytes, middle_bytes_shifted); __m128i result = _mm_or_si128(low_middle, high_bytes_shifted); *prev = PrefixSum(result, *prev); _mm_storeu_si128(mout, *prev); return consumed; } *ints_read = 2; __m128i data_bits = _mm_and_si128(initial, _mm_set1_epi8(0x7F)); __m128i bytes_to_decode = _mm_shuffle_epi8(data_bits, shuffle_vector); __m128i split_bytes = _mm_mullo_epi16(bytes_to_decode, _mm_setr_epi16(128, 64, 32, 16, 128, 64, 32, 16)); __m128i shifted_split_bytes = _mm_slli_epi64(split_bytes, 8); __m128i recombined = _mm_or_si128(split_bytes, shifted_split_bytes); __m128i low_byte = _mm_srli_epi64(bytes_to_decode, 56); __m128i result_evens = _mm_or_si128(recombined, low_byte); __m128i result = _mm_shuffle_epi8(result_evens, _mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1)); *prev = PrefixSum2ints(result, *prev); _mm_storel_epi64(mout, *prev); return consumed; } static int read_int_group(const uint8_t* in, uint32_t* out, int* ints_read) { __m128i initial = _mm_lddqu_si128((const __m128i *) in); __m128i * const mout = (__m128i *) out; int mask = _mm_movemask_epi8(initial); if (mask == 0) { __m128i result; result = _mm_cvtepi8_epi32(initial); initial = _mm_srli_si128(initial, 4); _mm_storeu_si128(mout, result); result = _mm_cvtepi8_epi32(initial); initial = _mm_srli_si128(initial, 4); _mm_storeu_si128(mout + 1, result); result = _mm_cvtepi8_epi32(initial); initial = _mm_srli_si128(initial, 4); _mm_storeu_si128(mout + 2, result); result = _mm_cvtepi8_epi32(initial); _mm_storeu_si128(mout + 3, result); *ints_read = 16; return 16; } int mask2 = mask & 0xFFF; index_bytes_consumed combined = combined_lookup[mask2]; int index = combined.index; __m128i shuffle_vector = vectors[index]; int consumed = combined.bytes_consumed; if (index < 64) { *ints_read = 6; __m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector); __m128i low_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi16(0x007F)); __m128i high_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi16(0x7F00)); __m128i high_bytes_shifted = _mm_srli_epi16(high_bytes, 1); __m128i packed_result = _mm_or_si128(low_bytes, high_bytes_shifted); __m128i unpacked_result_a = _mm_and_si128(packed_result, _mm_set1_epi32(0x0000FFFF)); _mm_storeu_si128(mout, unpacked_result_a); __m128i unpacked_result_b = _mm_srli_epi32(packed_result, 16); _mm_storel_epi64(mout + 1, unpacked_result_b); return consumed; } if (index < 145) { *ints_read = 4; __m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector); __m128i low_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi32(0x0000007F)); __m128i middle_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi32(0x00007F00)); __m128i high_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi32(0x007F0000)); __m128i middle_bytes_shifted = _mm_srli_epi32(middle_bytes, 1); __m128i high_bytes_shifted = _mm_srli_epi32(high_bytes, 2); __m128i low_middle = _mm_or_si128(low_bytes, middle_bytes_shifted); __m128i result = _mm_or_si128(low_middle, high_bytes_shifted); _mm_storeu_si128(mout, result); return consumed; } *ints_read = 2; __m128i data_bits = _mm_and_si128(initial, _mm_set1_epi8(0x7F)); __m128i bytes_to_decode = _mm_shuffle_epi8(data_bits, shuffle_vector); __m128i split_bytes = _mm_mullo_epi16(bytes_to_decode, _mm_setr_epi16(128, 64, 32, 16, 128, 64, 32, 16)); __m128i shifted_split_bytes = _mm_slli_epi64(split_bytes, 8); __m128i recombined = _mm_or_si128(split_bytes, shifted_split_bytes); __m128i low_byte = _mm_srli_epi64(bytes_to_decode, 56); __m128i result_evens = _mm_or_si128(recombined, low_byte); __m128i result = _mm_shuffle_epi8(result_evens, _mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1)); _mm_storel_epi64(mout, result); return consumed; } // len_signed : number of ints we want to decode size_t masked_vbyte_decode(const uint8_t* in, uint32_t* out, uint64_t length) { size_t consumed = 0; // number of bytes read uint64_t count = 0; // how many integers we have read so far uint64_t sig = 0; int availablebytes = 0; if (96 < length) { size_t scanned = 0; #ifdef __AVX2__ __m256i low = _mm256_loadu_si256((__m256i *)(in + scanned)); uint32_t lowSig = _mm256_movemask_epi8(low); #else __m128i low1 = _mm_loadu_si128((__m128i *) (in + scanned)); uint32_t lowSig1 = _mm_movemask_epi8(low1); __m128i low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16)); uint32_t lowSig2 = _mm_movemask_epi8(low2); uint32_t lowSig = lowSig2 << 16; lowSig |= lowSig1; #endif // excess verbosity to avoid problems with sign extension on conversions // better to think about what's happening and make it clearer __m128i high = _mm_loadu_si128((__m128i *) (in + scanned + 32)); uint32_t highSig = _mm_movemask_epi8(high); uint64_t nextSig = highSig; nextSig <<= 32; nextSig |= lowSig; scanned += 48; do { uint64_t thisSig = nextSig; #ifdef __AVX2__ low = _mm256_loadu_si256((__m256i *)(in + scanned)); lowSig = _mm256_movemask_epi8(low); #else low1 = _mm_loadu_si128((__m128i *) (in + scanned)); lowSig1 = _mm_movemask_epi8(low1); low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16)); lowSig2 = _mm_movemask_epi8(low2); lowSig = lowSig2 << 16; lowSig |= lowSig1; #endif high = _mm_loadu_si128((__m128i *) (in + scanned + 32)); highSig = _mm_movemask_epi8(high); nextSig = highSig; nextSig <<= 32; nextSig |= lowSig; uint64_t remaining = scanned - (consumed + 48); sig = (thisSig << remaining) | sig; uint64_t reload = scanned - 16; scanned += 48; // need to reload when less than 16 scanned bytes remain in sig while (consumed < reload) { uint64_t ints_read; uint64_t bytes = masked_vbyte_read_group(in + consumed, out + count, sig, &ints_read); sig >>= bytes; // seems like this might force the compiler to prioritize shifting sig >>= bytes if (sig == 0xFFFFFFFFFFFFFFFF) return 0; // fake check to force earliest evaluation consumed += bytes; count += ints_read; } } while (count + 112 < length); // 112 == 48 + 48 ahead for scanning + up to 16 remaining in sig sig = (nextSig << (scanned - consumed - 48)) | sig; availablebytes = scanned - consumed; } while (availablebytes + count < length) { if (availablebytes < 16) { if (availablebytes + count + 31 < length) { #ifdef __AVX2__ uint64_t newsigavx = (uint32_t) _mm256_movemask_epi8(_mm256_loadu_si256((__m256i *)(in + availablebytes + consumed))); sig |= (newsigavx << availablebytes); #else uint64_t newsig = _mm_movemask_epi8( _mm_lddqu_si128( (const __m128i *) (in + availablebytes + consumed))); uint64_t newsig2 = _mm_movemask_epi8( _mm_lddqu_si128( (const __m128i *) (in + availablebytes + 16 + consumed))); sig |= (newsig << availablebytes) | (newsig2 << (availablebytes + 16)); #endif availablebytes += 32; } else if (availablebytes + count + 15 < length) { int newsig = _mm_movemask_epi8( _mm_lddqu_si128( (const __m128i *) (in + availablebytes + consumed))); sig |= newsig << availablebytes; availablebytes += 16; } else { break; } } uint64_t ints_read; uint64_t eaten = masked_vbyte_read_group(in + consumed, out + count, sig, &ints_read); consumed += eaten; availablebytes -= eaten; sig >>= eaten; count += ints_read; } for (; count < length; count++) { consumed += read_int(in + consumed, out + count); } return consumed; } // inputsize : number of input bytes we want to decode // returns the number of written ints size_t masked_vbyte_decode_fromcompressedsize(const uint8_t* in, uint32_t* out, size_t inputsize) { size_t consumed = 0; // number of bytes read uint32_t * initout = out; uint64_t sig = 0; int availablebytes = 0; if (96 < inputsize) { size_t scanned = 0; #ifdef __AVX2__ __m256i low = _mm256_loadu_si256((__m256i *)(in + scanned)); uint32_t lowSig = _mm256_movemask_epi8(low); #else __m128i low1 = _mm_loadu_si128((__m128i *) (in + scanned)); uint32_t lowSig1 = _mm_movemask_epi8(low1); __m128i low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16)); uint32_t lowSig2 = _mm_movemask_epi8(low2); uint32_t lowSig = lowSig2 << 16; lowSig |= lowSig1; #endif // excess verbosity to avoid problems with sign extension on conversions // better to think about what's happening and make it clearer __m128i high = _mm_loadu_si128((__m128i *) (in + scanned + 32)); uint32_t highSig = _mm_movemask_epi8(high); uint64_t nextSig = highSig; nextSig <<= 32; nextSig |= lowSig; scanned += 48; do { uint64_t thisSig = nextSig; #ifdef __AVX2__ low = _mm256_loadu_si256((__m256i *)(in + scanned)); lowSig = _mm256_movemask_epi8(low); #else low1 = _mm_loadu_si128((__m128i *) (in + scanned)); lowSig1 = _mm_movemask_epi8(low1); low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16)); lowSig2 = _mm_movemask_epi8(low2); lowSig = lowSig2 << 16; lowSig |= lowSig1; #endif high = _mm_loadu_si128((__m128i *) (in + scanned + 32)); highSig = _mm_movemask_epi8(high); nextSig = highSig; nextSig <<= 32; nextSig |= lowSig; uint64_t remaining = scanned - (consumed + 48); sig = (thisSig << remaining) | sig; uint64_t reload = scanned - 16; scanned += 48; // need to reload when less than 16 scanned bytes remain in sig while (consumed < reload) { uint64_t ints_read; uint64_t bytes = masked_vbyte_read_group(in + consumed, out, sig, &ints_read); sig >>= bytes; // seems like this might force the compiler to prioritize shifting sig >>= bytes if (sig == 0xFFFFFFFFFFFFFFFF) return 0; // fake check to force earliest evaluation consumed += bytes; out += ints_read; } } while (scanned + 112 < inputsize); // 112 == 48 + 48 ahead for scanning + up to 16 remaining in sig sig = (nextSig << (scanned - consumed - 48)) | sig; availablebytes = scanned - consumed; } while (1) { if (availablebytes < 16) { if (availablebytes + consumed + 31 < inputsize) { #ifdef __AVX2__ uint64_t newsigavx = (uint32_t) _mm256_movemask_epi8(_mm256_loadu_si256((__m256i *)(in + availablebytes + consumed))); sig |= (newsigavx << availablebytes); #else uint64_t newsig = _mm_movemask_epi8( _mm_lddqu_si128( (const __m128i *) (in + availablebytes + consumed))); uint64_t newsig2 = _mm_movemask_epi8( _mm_lddqu_si128( (const __m128i *) (in + availablebytes + 16 + consumed))); sig |= (newsig << availablebytes) | (newsig2 << (availablebytes + 16)); #endif availablebytes += 32; } else if(availablebytes + consumed + 15 < inputsize ) { int newsig = _mm_movemask_epi8( _mm_lddqu_si128( (const __m128i *) (in + availablebytes + consumed))); sig |= newsig << availablebytes; availablebytes += 16; } else { break; } } uint64_t ints_read; uint64_t bytes = masked_vbyte_read_group(in + consumed, out, sig, &ints_read); consumed += bytes; availablebytes -= bytes; sig >>= bytes; out += ints_read; } while (consumed < inputsize) { unsigned int shift = 0; for (uint32_t v = 0; consumed < inputsize; shift += 7) { uint8_t c = in[consumed++]; if ((c & 128) == 0) { out[0] = v + (c << shift); ++out; break; } else { v += (c & 127) << shift; } } } return out - initout; } size_t read_ints(const uint8_t* in, uint32_t* out, int length) { size_t consumed = 0; int count; for (count = 0; count + 15 < length;) { int ints_read; consumed += read_int_group(in + consumed, out + count, &ints_read); count += ints_read; } for (; count < length; count++) { consumed += read_int(in + consumed, out + count); } return consumed; } static int read_int_group_delta(const uint8_t* in, uint32_t* out, int* ints_read, __m128i * prev) { __m128i initial = _mm_lddqu_si128((const __m128i *) in); __m128i * const mout = (__m128i *) out; int mask = _mm_movemask_epi8(initial); if (mask == 0) { __m128i result; result = _mm_cvtepi8_epi32(initial); initial = _mm_srli_si128(initial, 4); *prev = PrefixSum(result, *prev); _mm_storeu_si128(mout, *prev); result = _mm_cvtepi8_epi32(initial); initial = _mm_srli_si128(initial, 4); *prev = PrefixSum(result, *prev); _mm_storeu_si128(mout + 1, *prev); result = _mm_cvtepi8_epi32(initial); initial = _mm_srli_si128(initial, 4); *prev = PrefixSum(result, *prev); _mm_storeu_si128(mout + 2, *prev); result = _mm_cvtepi8_epi32(initial); *prev = PrefixSum(result, *prev); _mm_storeu_si128(mout + 3, *prev); *ints_read = 16; return 16; } int mask2 = mask & 0xFFF; index_bytes_consumed combined = combined_lookup[mask2]; int index = combined.index; __m128i shuffle_vector = vectors[index]; int consumed = combined.bytes_consumed; if (index < 64) { *ints_read = 6; __m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector); __m128i low_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi16(0x007F)); __m128i high_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi16(0x7F00)); __m128i high_bytes_shifted = _mm_srli_epi16(high_bytes, 1); __m128i packed_result = _mm_or_si128(low_bytes, high_bytes_shifted); __m128i unpacked_result_a = _mm_and_si128(packed_result, _mm_set1_epi32(0x0000FFFF)); *prev = PrefixSum(unpacked_result_a, *prev); _mm_storeu_si128(mout, *prev); __m128i unpacked_result_b = _mm_srli_epi32(packed_result, 16); *prev = PrefixSum2ints(unpacked_result_b, *prev); _mm_storeu_si128(mout + 1, *prev); return consumed; } if (index < 145) { *ints_read = 4; __m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector); __m128i low_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi32(0x0000007F)); __m128i middle_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi32(0x00007F00)); __m128i high_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi32(0x007F0000)); __m128i middle_bytes_shifted = _mm_srli_epi32(middle_bytes, 1); __m128i high_bytes_shifted = _mm_srli_epi32(high_bytes, 2); __m128i low_middle = _mm_or_si128(low_bytes, middle_bytes_shifted); __m128i result = _mm_or_si128(low_middle, high_bytes_shifted); *prev = PrefixSum(result, *prev); _mm_storeu_si128(mout, *prev); return consumed; } *ints_read = 2; __m128i data_bits = _mm_and_si128(initial, _mm_set1_epi8(0x7F)); __m128i bytes_to_decode = _mm_shuffle_epi8(data_bits, shuffle_vector); __m128i split_bytes = _mm_mullo_epi16(bytes_to_decode, _mm_setr_epi16(128, 64, 32, 16, 128, 64, 32, 16)); __m128i shifted_split_bytes = _mm_slli_epi64(split_bytes, 8); __m128i recombined = _mm_or_si128(split_bytes, shifted_split_bytes); __m128i low_byte = _mm_srli_epi64(bytes_to_decode, 56); __m128i result_evens = _mm_or_si128(recombined, low_byte); __m128i result = _mm_shuffle_epi8(result_evens, _mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1)); *prev = PrefixSum2ints(result, *prev); _mm_storeu_si128(mout, *prev); return consumed; } // len_signed : number of ints we want to decode size_t masked_vbyte_decode_delta(const uint8_t* in, uint32_t* out, uint64_t length, uint32_t prev) { //uint64_t length = (uint64_t) len_signed; // number of ints we want to decode size_t consumed = 0; // number of bytes read __m128i mprev = _mm_set1_epi32(prev); uint64_t count = 0; // how many integers we have read so far uint64_t sig = 0; int availablebytes = 0; if (96 < length) { size_t scanned = 0; #ifdef __AVX2__ __m256i low = _mm256_loadu_si256((__m256i *)(in + scanned)); uint32_t lowSig = _mm256_movemask_epi8(low); #else __m128i low1 = _mm_loadu_si128((__m128i *) (in + scanned)); uint32_t lowSig1 = _mm_movemask_epi8(low1); __m128i low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16)); uint32_t lowSig2 = _mm_movemask_epi8(low2); uint32_t lowSig = lowSig2 << 16; lowSig |= lowSig1; #endif // excess verbosity to avoid problems with sign extension on conversions // better to think about what's happening and make it clearer __m128i high = _mm_loadu_si128((__m128i *) (in + scanned + 32)); uint32_t highSig = _mm_movemask_epi8(high); uint64_t nextSig = highSig; nextSig <<= 32; nextSig |= lowSig; scanned += 48; do { uint64_t thisSig = nextSig; #ifdef __AVX2__ low = _mm256_loadu_si256((__m256i *)(in + scanned)); lowSig = _mm256_movemask_epi8(low); #else low1 = _mm_loadu_si128((__m128i *) (in + scanned)); lowSig1 = _mm_movemask_epi8(low1); low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16)); lowSig2 = _mm_movemask_epi8(low2); lowSig = lowSig2 << 16; lowSig |= lowSig1; #endif high = _mm_loadu_si128((__m128i *) (in + scanned + 32)); highSig = _mm_movemask_epi8(high); nextSig = highSig; nextSig <<= 32; nextSig |= lowSig; uint64_t remaining = scanned - (consumed + 48); sig = (thisSig << remaining) | sig; uint64_t reload = scanned - 16; scanned += 48; // need to reload when less than 16 scanned bytes remain in sig while (consumed < reload) { uint64_t ints_read; uint64_t bytes = masked_vbyte_read_group_delta(in + consumed, out + count, sig, &ints_read, &mprev); sig >>= bytes; // seems like this might force the compiler to prioritize shifting sig >>= bytes if (sig == 0xFFFFFFFFFFFFFFFF) return 0; // fake check to force earliest evaluation consumed += bytes; count += ints_read; } } while (count + 112 < length); // 112 == 48 + 48 ahead for scanning + up to 16 remaining in sig sig = (nextSig << (scanned - consumed - 48)) | sig; availablebytes = scanned - consumed; } while (availablebytes + count < length) { if (availablebytes < 16) break; if (availablebytes < 16) { if (availablebytes + count + 31 < length) { #ifdef __AVX2__ uint64_t newsigavx = (uint32_t) _mm256_movemask_epi8(_mm256_loadu_si256((__m256i *)(in + availablebytes + consumed))); sig |= (newsigavx << availablebytes); #else uint64_t newsig = _mm_movemask_epi8( _mm_lddqu_si128( (const __m128i *) (in + availablebytes + consumed))); uint64_t newsig2 = _mm_movemask_epi8( _mm_lddqu_si128( (const __m128i *) (in + availablebytes + 16 + consumed))); sig |= (newsig << availablebytes) | (newsig2 << (availablebytes + 16)); #endif availablebytes += 32; } else if (availablebytes + count + 15 < length) { int newsig = _mm_movemask_epi8( _mm_lddqu_si128( (const __m128i *) (in + availablebytes + consumed))); sig |= newsig << availablebytes; availablebytes += 16; } else { break; } } uint64_t ints_read; uint64_t eaten = masked_vbyte_read_group_delta(in + consumed, out + count, sig, &ints_read, &mprev); consumed += eaten; availablebytes -= eaten; sig >>= eaten; count += ints_read; } prev = _mm_extract_epi32(mprev, 3); for (; count < length; count++) { consumed += read_int_delta(in + consumed, out + count, &prev); } return consumed; } size_t read_ints_delta(const uint8_t* in, uint32_t* out, int length, uint32_t prev) { __m128i mprev = _mm_set1_epi32(prev); size_t consumed = 0; int count; for (count = 0; count + 15 < length;) { int ints_read; consumed += read_int_group_delta(in + consumed, out + count, &ints_read, &mprev); count += ints_read; } prev = _mm_extract_epi32(mprev, 3); for (; count < length; count++) { consumed += read_int_delta(in + consumed, out + count, &prev); } return consumed; } // inputsize : number of input bytes we want to decode // returns the number of written ints size_t masked_vbyte_decode_fromcompressedsize_delta(const uint8_t* in, uint32_t* out, size_t inputsize, uint32_t prev) { size_t consumed = 0; // number of bytes read uint32_t * initout = out; __m128i mprev = _mm_set1_epi32(prev); uint64_t sig = 0; int availablebytes = 0; if (96 < inputsize) { size_t scanned = 0; #ifdef __AVX2__ __m256i low = _mm256_loadu_si256((__m256i *)(in + scanned)); uint32_t lowSig = _mm256_movemask_epi8(low); #else __m128i low1 = _mm_loadu_si128((__m128i *) (in + scanned)); uint32_t lowSig1 = _mm_movemask_epi8(low1); __m128i low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16)); uint32_t lowSig2 = _mm_movemask_epi8(low2); uint32_t lowSig = lowSig2 << 16; lowSig |= lowSig1; #endif // excess verbosity to avoid problems with sign extension on conversions // better to think about what's happening and make it clearer __m128i high = _mm_loadu_si128((__m128i *) (in + scanned + 32)); uint32_t highSig = _mm_movemask_epi8(high); uint64_t nextSig = highSig; nextSig <<= 32; nextSig |= lowSig; scanned += 48; do { uint64_t thisSig = nextSig; #ifdef __AVX2__ low = _mm256_loadu_si256((__m256i *)(in + scanned)); lowSig = _mm256_movemask_epi8(low); #else low1 = _mm_loadu_si128((__m128i *) (in + scanned)); lowSig1 = _mm_movemask_epi8(low1); low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16)); lowSig2 = _mm_movemask_epi8(low2); lowSig = lowSig2 << 16; lowSig |= lowSig1; #endif high = _mm_loadu_si128((__m128i *) (in + scanned + 32)); highSig = _mm_movemask_epi8(high); nextSig = highSig; nextSig <<= 32; nextSig |= lowSig; uint64_t remaining = scanned - (consumed + 48); sig = (thisSig << remaining) | sig; uint64_t reload = scanned - 16; scanned += 48; // need to reload when less than 16 scanned bytes remain in sig while (consumed < reload) { uint64_t ints_read; uint64_t bytes = masked_vbyte_read_group_delta(in + consumed, out, sig, &ints_read, &mprev); sig >>= bytes; // seems like this might force the compiler to prioritize shifting sig >>= bytes if (sig == 0xFFFFFFFFFFFFFFFF) return 0; // fake check to force earliest evaluation consumed += bytes; out += ints_read; } } while (scanned + 112 < inputsize); // 112 == 48 + 48 ahead for scanning + up to 16 remaining in sig sig = (nextSig << (scanned - consumed - 48)) | sig; availablebytes = scanned - consumed; } while (1) { if (availablebytes < 16) { if (availablebytes + consumed + 31 < inputsize) { #ifdef __AVX2__ uint64_t newsigavx = (uint32_t) _mm256_movemask_epi8(_mm256_loadu_si256((__m256i *)(in + availablebytes + consumed))); sig |= (newsigavx << availablebytes); #else uint64_t newsig = _mm_movemask_epi8( _mm_lddqu_si128( (const __m128i *) (in + availablebytes + consumed))); uint64_t newsig2 = _mm_movemask_epi8( _mm_lddqu_si128( (const __m128i *) (in + availablebytes + 16 + consumed))); sig |= (newsig << availablebytes) | (newsig2 << (availablebytes + 16)); #endif availablebytes += 32; } else if(availablebytes + consumed + 15 < inputsize ) { int newsig = _mm_movemask_epi8( _mm_lddqu_si128( (const __m128i *) (in + availablebytes + consumed))); sig |= newsig << availablebytes; availablebytes += 16; } else { break; } } uint64_t ints_read; uint64_t bytes = masked_vbyte_read_group_delta(in + consumed, out, sig, &ints_read, &mprev); consumed += bytes; availablebytes -= bytes; sig >>= bytes; out += ints_read; } prev = _mm_extract_epi32(mprev, 3); while (consumed < inputsize) { unsigned int shift = 0; for (uint32_t v = 0; consumed < inputsize; shift += 7) { uint8_t c = in[consumed++]; if ((c & 128) == 0) { uint32_t delta = v + (c << shift); prev += delta; *out++ = prev; break; } else { v += (c & 127) << shift; } } } return out - initout; } static int8_t shuffle_mask_bytes1[16 * 16 ] ALIGNED(16) = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 4, 5, 6, 7, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15, 4, 5, 6, 7, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 12, 13, 14, 15, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 8, 9, 10, 11, 12, 13, 14, 15, 12, 13, 14, 15, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, }; static const __m128i *shuffle_mask = (__m128i *) shuffle_mask_bytes1; /* perform a lower-bound search for |key| in |out|; the resulting uint32 * is stored in |*presult|.*/ #define CHECK_AND_INCREMENT(i, out, key, presult) \ do { \ __m128i tmpout = _mm_sub_epi32(out, conversion); \ uint32_t mmask = _mm_movemask_ps(_mm_castsi128_ps(_mm_cmplt_epi32(tmpout, key4))); \ if (mmask != 15) { \ __m128i pp = _mm_shuffle_epi8(out, shuffle_mask[mmask ^ 15]); \ int offset; \ SIMDCOMP_CTZ(offset, mmask ^ 15); \ *presult = _mm_cvtsi128_si32(pp); \ return (i + offset); \ } \ i += 4; \ } while (0) /* perform a lower-bound search for |key| in |out|; the resulting uint32 * is stored in |*presult|.*/ #define CHECK_AND_INCREMENT_2(i, out, key, presult) \ do { \ __m128i tmpout = _mm_sub_epi32(out, conversion); \ uint32_t mmask = 3 & _mm_movemask_ps(_mm_castsi128_ps(_mm_cmplt_epi32(tmpout, key4))); \ if (mmask != 3) { \ __m128i pp = _mm_shuffle_epi8(out, shuffle_mask[mmask ^ 3]); \ int offset; \ SIMDCOMP_CTZ(offset, mmask ^ 3); \ *presult = _mm_cvtsi128_si32(pp); \ return (i + offset); \ } \ i += 2; \ } while (0) static int masked_vbyte_search_group_delta(const uint8_t *in, uint64_t *p, uint64_t mask, uint64_t *ints_read, __m128i *prev, int i, uint32_t key, uint32_t *presult) { __m128i initial = _mm_lddqu_si128((const __m128i *) (in)); __m128i conversion = _mm_set1_epi32(2147483648U); __m128i key4 = _mm_set1_epi32(key - 2147483648U); if (!(mask & 0xFFFF)) { __m128i result = _mm_cvtepi8_epi32(initial); *prev = PrefixSum(result, *prev); CHECK_AND_INCREMENT(i, *prev, key, presult); initial = _mm_srli_si128(initial, 4); result = _mm_cvtepi8_epi32(initial); *prev = PrefixSum(result, *prev); CHECK_AND_INCREMENT(i, *prev, key, presult); initial = _mm_srli_si128(initial, 4); result = _mm_cvtepi8_epi32(initial); *prev = PrefixSum(result, *prev); CHECK_AND_INCREMENT(i, *prev, key, presult); initial = _mm_srli_si128(initial, 4); result = _mm_cvtepi8_epi32(initial); *prev = PrefixSum(result, *prev); CHECK_AND_INCREMENT(i, *prev, key, presult); *ints_read = 16; *p = 16; return (-1); } uint32_t low_12_bits = mask & 0xFFF; // combine index and bytes consumed into a single lookup index_bytes_consumed combined = combined_lookup[low_12_bits]; uint64_t consumed = combined.bytes_consumed; uint8_t index = combined.index; __m128i shuffle_vector = vectors[index]; // __m128i shuffle_vector = {0, 0}; // speed check: 20% faster at large, less at small if (index < 64) { *ints_read = 6; __m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector); __m128i low_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi16(0x007F)); __m128i high_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi16(0x7F00)); __m128i high_bytes_shifted = _mm_srli_epi16(high_bytes, 1); __m128i packed_result = _mm_or_si128(low_bytes, high_bytes_shifted); __m128i unpacked_result_a = _mm_and_si128(packed_result, _mm_set1_epi32(0x0000FFFF)); *prev = PrefixSum(unpacked_result_a, *prev); CHECK_AND_INCREMENT(i, *prev, key, presult); __m128i unpacked_result_b = _mm_srli_epi32(packed_result, 16); *prev = PrefixSum2ints(unpacked_result_b, *prev); //_mm_storel_epi64(&out, *prev); CHECK_AND_INCREMENT_2(i, *prev, key, presult); *p = consumed; return (-1); } if (index < 145) { *ints_read = 4; __m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector); __m128i low_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi32(0x0000007F)); __m128i middle_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi32(0x00007F00)); __m128i high_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi32(0x007F0000)); __m128i middle_bytes_shifted = _mm_srli_epi32(middle_bytes, 1); __m128i high_bytes_shifted = _mm_srli_epi32(high_bytes, 2); __m128i low_middle = _mm_or_si128(low_bytes, middle_bytes_shifted); __m128i result = _mm_or_si128(low_middle, high_bytes_shifted); *prev = PrefixSum(result, *prev); CHECK_AND_INCREMENT(i, *prev, key, presult); *p = consumed; return (-1); } *ints_read = 2; __m128i data_bits = _mm_and_si128(initial, _mm_set1_epi8(0x7F)); __m128i bytes_to_decode = _mm_shuffle_epi8(data_bits, shuffle_vector); __m128i split_bytes = _mm_mullo_epi16(bytes_to_decode, _mm_setr_epi16(128, 64, 32, 16, 128, 64, 32, 16)); __m128i shifted_split_bytes = _mm_slli_epi64(split_bytes, 8); __m128i recombined = _mm_or_si128(split_bytes, shifted_split_bytes); __m128i low_byte = _mm_srli_epi64(bytes_to_decode, 56); __m128i result_evens = _mm_or_si128(recombined, low_byte); __m128i result = _mm_shuffle_epi8(result_evens, _mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1)); *prev = PrefixSum2ints(result, *prev); //_mm_storel_epi64(&out, *prev); CHECK_AND_INCREMENT_2(i, *prev, key, presult); *p = consumed; return (-1); } // returns the index of the matching key int masked_vbyte_search_delta(const uint8_t *in, uint64_t length, uint32_t prev, uint32_t key, uint32_t *presult) { size_t consumed = 0; // number of bytes read __m128i mprev = _mm_set1_epi32(prev); uint64_t count = 0; // how many integers we have read so far uint64_t sig = 0; int availablebytes = 0; if (96 < length) { size_t scanned = 0; #ifdef __AVX2__ __m256i low = _mm256_loadu_si256((__m256i *)(in + scanned)); uint32_t lowSig = _mm256_movemask_epi8(low); #else __m128i low1 = _mm_loadu_si128((__m128i *) (in + scanned)); uint32_t lowSig1 = _mm_movemask_epi8(low1); __m128i low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16)); uint32_t lowSig2 = _mm_movemask_epi8(low2); uint32_t lowSig = lowSig2 << 16; lowSig |= lowSig1; #endif // excess verbosity to avoid problems with sign extension on conversions // better to think about what's happening and make it clearer __m128i high = _mm_loadu_si128((__m128i *) (in + scanned + 32)); uint32_t highSig = _mm_movemask_epi8(high); uint64_t nextSig = highSig; nextSig <<= 32; nextSig |= lowSig; scanned += 48; do { uint64_t thisSig = nextSig; #ifdef __AVX2__ low = _mm256_loadu_si256((__m256i *)(in + scanned)); lowSig = _mm256_movemask_epi8(low); #else low1 = _mm_loadu_si128((__m128i *) (in + scanned)); lowSig1 = _mm_movemask_epi8(low1); low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16)); lowSig2 = _mm_movemask_epi8(low2); lowSig = lowSig2 << 16; lowSig |= lowSig1; #endif high = _mm_loadu_si128((__m128i *) (in + scanned + 32)); highSig = _mm_movemask_epi8(high); nextSig = highSig; nextSig <<= 32; nextSig |= lowSig; uint64_t remaining = scanned - (consumed + 48); sig = (thisSig << remaining) | sig; uint64_t reload = scanned - 16; scanned += 48; // need to reload when less than 16 scanned bytes remain in sig while (consumed < reload) { uint64_t ints_read = 0, bytes = 0; int ret = masked_vbyte_search_group_delta(in + consumed, &bytes, sig, &ints_read, &mprev, count, key, presult); if (ret >= 0) return (ret); sig >>= bytes; // seems like this might force the compiler to prioritize shifting sig >>= bytes if (sig == 0xFFFFFFFFFFFFFFFF) return 0; // fake check to force earliest evaluation consumed += bytes; count += ints_read; } } while (count + 112 < length); // 112 == 48 + 48 ahead for scanning + up to 16 remaining in sig sig = (nextSig << (scanned - consumed - 48)) | sig; availablebytes = scanned - consumed; } while (availablebytes + count < length) { if (availablebytes < 16) break; uint64_t ints_read = 0, bytes = 0; int ret = masked_vbyte_search_group_delta(in + consumed, &bytes, sig, &ints_read, &mprev, count, key, presult); if (ret >= 0) return (ret); consumed += bytes; availablebytes -= bytes; sig >>= bytes; count += ints_read; } prev = _mm_extract_epi32(mprev, 3); for (; count < length; count++) { uint32_t out; consumed += read_int_delta(in + consumed, &out, &prev); if (key <= prev) { *presult = prev; return (count); } } *presult = key + 1; return length; } static int8_t shuffle_mask_bytes2[16 * 16 ] ALIGNED(16) = { 0,1,2,3,0,0,0,0,0,0,0,0,0,0,0,0, 4,5,6,7,0,0,0,0,0,0,0,0,0,0,0,0, 8,9,10,11,0,0,0,0,0,0,0,0,0,0,0,0, 12,13,14,15,0,0,0,0,0,0,0,0,0,0,0,0, }; static const __m128i *shuffle_mask_extract = (__m128i *) shuffle_mask_bytes2; static uint32_t branchlessextract (__m128i out, int i) { return _mm_cvtsi128_si32(_mm_shuffle_epi8(out,shuffle_mask_extract[i])); } #define CHECK_SELECT(i, out, slot, presult) \ i += 4; \ if (i > slot) { \ *presult = branchlessextract (out, slot - (i - 4)); \ return (1); \ } #define CHECK_SELECT_2(i, out, slot, presult) \ i += 2; \ if (i > slot) { \ *presult = branchlessextract (out, slot - (i - 2)); \ return (1); \ } static int masked_vbyte_select_group_delta(const uint8_t *in, uint64_t *p, uint64_t mask, uint64_t *ints_read, __m128i *prev, int slot, uint32_t *presult) { __m128i initial = _mm_lddqu_si128((const __m128i *) (in)); int i = 0; if (!(mask & 0xFFFF)) { __m128i result = _mm_cvtepi8_epi32(initial); *prev = PrefixSum(result, *prev); CHECK_SELECT(i, *prev, slot, presult); initial = _mm_srli_si128(initial, 4); result = _mm_cvtepi8_epi32(initial); *prev = PrefixSum(result, *prev); CHECK_SELECT(i, *prev, slot, presult); initial = _mm_srli_si128(initial, 4); result = _mm_cvtepi8_epi32(initial); *prev = PrefixSum(result, *prev); CHECK_SELECT(i, *prev, slot, presult); initial = _mm_srli_si128(initial, 4); result = _mm_cvtepi8_epi32(initial); *prev = PrefixSum(result, *prev); CHECK_SELECT(i, *prev, slot, presult); *ints_read = 16; *p = 16; return (0); } uint32_t low_12_bits = mask & 0xFFF; // combine index and bytes consumed into a single lookup index_bytes_consumed combined = combined_lookup[low_12_bits]; uint64_t consumed = combined.bytes_consumed; uint8_t index = combined.index; __m128i shuffle_vector = vectors[index]; // __m128i shuffle_vector = {0, 0}; // speed check: 20% faster at large, less at small if (index < 64) { *ints_read = 6; __m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector); __m128i low_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi16(0x007F)); __m128i high_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi16(0x7F00)); __m128i high_bytes_shifted = _mm_srli_epi16(high_bytes, 1); __m128i packed_result = _mm_or_si128(low_bytes, high_bytes_shifted); __m128i unpacked_result_a = _mm_and_si128(packed_result, _mm_set1_epi32(0x0000FFFF)); *prev = PrefixSum(unpacked_result_a, *prev); CHECK_SELECT(i, *prev, slot, presult); __m128i unpacked_result_b = _mm_srli_epi32(packed_result, 16); *prev = PrefixSum2ints(unpacked_result_b, *prev); //_mm_storel_epi64(&out, *prev); CHECK_SELECT_2(i, *prev, slot, presult); *p = consumed; return (0); } if (index < 145) { *ints_read = 4; __m128i bytes_to_decode = _mm_shuffle_epi8(initial, shuffle_vector); __m128i low_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi32(0x0000007F)); __m128i middle_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi32(0x00007F00)); __m128i high_bytes = _mm_and_si128(bytes_to_decode, _mm_set1_epi32(0x007F0000)); __m128i middle_bytes_shifted = _mm_srli_epi32(middle_bytes, 1); __m128i high_bytes_shifted = _mm_srli_epi32(high_bytes, 2); __m128i low_middle = _mm_or_si128(low_bytes, middle_bytes_shifted); __m128i result = _mm_or_si128(low_middle, high_bytes_shifted); *prev = PrefixSum(result, *prev); CHECK_SELECT(i, *prev, slot, presult); *p = consumed; return (0); } *ints_read = 2; __m128i data_bits = _mm_and_si128(initial, _mm_set1_epi8(0x7F)); __m128i bytes_to_decode = _mm_shuffle_epi8(data_bits, shuffle_vector); __m128i split_bytes = _mm_mullo_epi16(bytes_to_decode, _mm_setr_epi16(128, 64, 32, 16, 128, 64, 32, 16)); __m128i shifted_split_bytes = _mm_slli_epi64(split_bytes, 8); __m128i recombined = _mm_or_si128(split_bytes, shifted_split_bytes); __m128i low_byte = _mm_srli_epi64(bytes_to_decode, 56); __m128i result_evens = _mm_or_si128(recombined, low_byte); __m128i result = _mm_shuffle_epi8(result_evens, _mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1)); *prev = PrefixSum2ints(result, *prev); //_mm_storel_epi64(&out, *prev); CHECK_SELECT_2(i, *prev, slot, presult); *p = consumed; return (0); } uint32_t masked_vbyte_select_delta(const uint8_t *in, uint64_t length, uint32_t prev, size_t slot) { size_t consumed = 0; // number of bytes read __m128i mprev = _mm_set1_epi32(prev); uint64_t count = 0; // how many integers we have read so far uint64_t sig = 0; int availablebytes = 0; if (96 < length) { size_t scanned = 0; #ifdef __AVX2__ __m256i low = _mm256_loadu_si256((__m256i *)(in + scanned)); uint32_t lowSig = _mm256_movemask_epi8(low); #else __m128i low1 = _mm_loadu_si128((__m128i *) (in + scanned)); uint32_t lowSig1 = _mm_movemask_epi8(low1); __m128i low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16)); uint32_t lowSig2 = _mm_movemask_epi8(low2); uint32_t lowSig = lowSig2 << 16; lowSig |= lowSig1; #endif // excess verbosity to avoid problems with sign extension on conversions // better to think about what's happening and make it clearer __m128i high = _mm_loadu_si128((__m128i *) (in + scanned + 32)); uint32_t highSig = _mm_movemask_epi8(high); uint64_t nextSig = highSig; nextSig <<= 32; nextSig |= lowSig; scanned += 48; do { uint64_t thisSig = nextSig; #ifdef __AVX2__ low = _mm256_loadu_si256((__m256i *)(in + scanned)); lowSig = _mm256_movemask_epi8(low); #else low1 = _mm_loadu_si128((__m128i *) (in + scanned)); lowSig1 = _mm_movemask_epi8(low1); low2 = _mm_loadu_si128((__m128i *) (in + scanned + 16)); lowSig2 = _mm_movemask_epi8(low2); lowSig = lowSig2 << 16; lowSig |= lowSig1; #endif high = _mm_loadu_si128((__m128i *) (in + scanned + 32)); highSig = _mm_movemask_epi8(high); nextSig = highSig; nextSig <<= 32; nextSig |= lowSig; uint64_t remaining = scanned - (consumed + 48); sig = (thisSig << remaining) | sig; uint64_t reload = scanned - 16; scanned += 48; // need to reload when less than 16 scanned bytes remain in sig while (consumed < reload) { uint32_t result; uint64_t ints_read, bytes; if (masked_vbyte_select_group_delta(in + consumed, &bytes, sig, &ints_read, &mprev, slot - count, &result)) { return (result); } sig >>= bytes; // seems like this might force the compiler to prioritize shifting sig >>= bytes if (sig == 0xFFFFFFFFFFFFFFFF) return 0; // fake check to force earliest evaluation consumed += bytes; count += ints_read; } } while (count + 112 < length); // 112 == 48 + 48 ahead for scanning + up to 16 remaining in sig sig = (nextSig << (scanned - consumed - 48)) | sig; availablebytes = scanned - consumed; } while (availablebytes + count < length) { if (availablebytes < 16) break; if (availablebytes < 16) { if (availablebytes + count + 31 < length) { #ifdef __AVX2__ uint64_t newsigavx = (uint32_t) _mm256_movemask_epi8(_mm256_loadu_si256((__m256i *)(in + availablebytes + consumed))); sig |= (newsigavx << availablebytes); #else uint64_t newsig = _mm_movemask_epi8( _mm_lddqu_si128( (const __m128i *) (in + availablebytes + consumed))); uint64_t newsig2 = _mm_movemask_epi8( _mm_lddqu_si128( (const __m128i *) (in + availablebytes + 16 + consumed))); sig |= (newsig << availablebytes) | (newsig2 << (availablebytes + 16)); #endif availablebytes += 32; } else if (availablebytes + count + 15 < length) { int newsig = _mm_movemask_epi8( _mm_lddqu_si128( (const __m128i *) (in + availablebytes + consumed))); sig |= newsig << availablebytes; availablebytes += 16; } else { break; } } uint32_t result; uint64_t ints_read, bytes; if (masked_vbyte_select_group_delta(in + consumed, &bytes, sig, &ints_read, &mprev, slot - count, &result)) { return (result); } consumed += bytes; availablebytes -= bytes; sig >>= bytes; count += ints_read; } prev = _mm_extract_epi32(mprev, 3); for (; count < slot + 1; count++) { uint32_t out; consumed += read_int_delta(in + consumed, &out, &prev); } return prev; }