notscared / notscared2-main / benchmarking / convert_file.py
convert_file.py
Raw
import sys

import zarr
# from numcodecs import Zlib
# from numcodecs import GZip
# from numcodecs import LZMA
# from numcodecs import BZ2
import os


import notscared.converters as converters

if __name__ == '__main__':

    if len(sys.argv) != 2:
        print("Usage: python convert_file.py <db_file>")
        sys.exit(1)

    converter = converters.Converter()
    db_file = sys.argv[1]

    proj_root = os.getcwd()
    filename = os.path.join(proj_root, db_file)

    # Uncomment the compressor to use for benchmarking.

    # The appropriate clevel to use depends on the balance 
    # between the required compression ratio and the acceptable 
    # computational overhead for your specific use case. 
    # Generally, a clevel value between 1 and 9 is used, with 1 
    # being the lowest (fastest) compression level and 9 being 
    # the highest (slowest) compression level.
    clevel = 3

    compressor = zarr.Blosc(cname='zstd', clevel=clevel, shuffle=zarr.Blosc.SHUFFLE)

    # (suggested by Dr. Immler)
    #compressor = zarr.Blosc(cname='lz4', clevel=clevel, shuffle=zarr.Blosc.SHUFFLE)
    #compressor = zarr.Blosc(cname='blosclz', clevel=clevel, shuffle=zarr.Blosc.SHUFFLE)
    #compressor = zarr.Blosc(cname='snappy', clevel=clevel, shuffle=zarr.Blosc.SHUFFLE)
    #compressor = Zlib(level=clevel)
    #compressor = GZip(level=clevel)
    #compressor = LZMA(preset=clevel)
    #compressor = BZ2(level=clevel)

    output_columns = ['samples', 'ptxt']

    converter.db_to_zarr(db_path=filename, 
                         table_name='traces', 
                         output_columns=output_columns,
                         chunk_size=10000,
                         compressor=compressor)