import sys
import os
from notscared import snr
from notscared import cpa
from notscared.data import trace_handler as trace_handler
from notscared.converters import Converter
import zarr
import time
import matplotlib.pyplot as plt
import numpy as np
Test compression levels 1-9 vs file size. Compare it to file size
file_sizes = []
file_paths = []
clevels = []
converter = Converter()
db_file = '1x1x100000_r1_singlerail5_sr_ise.db'
cwd = os.getcwd()
db_file = os.path.join(cwd, db_file)
print(db_file)
c:\Users\mjmey\OneDrive\Documents\Capstone\notscared2\benchmarking\1x1x100000_r1_singlerail5_sr_ise.db
for clevel in range(1, 10):
compressor = zarr.Blosc(cname='zstd', clevel=clevel, shuffle=zarr.Blosc.SHUFFLE)
converter.db_to_zarr(db_file,
'traces',
output_columns=['samples', 'ptxt', 'tile_x', 'tile_y'],
chunk_size=10000,
compressor=compressor)
old_zarr_file = db_file.replace('.db', '.zarr')
new_zarr_file = f'{db_file[:-3]}_zstd_{clevel}.zarr'
os.replace(old_zarr_file, new_zarr_file)
zarr_file_size = Converter.get_directory_size(new_zarr_file)
file_sizes.append(zarr_file_size)
file_paths.append(new_zarr_file)
clevels.append(clevel)
Converting database to Zarr: 100%|██████████| 10/10 [01:06<00:00, 6.69s/it]
Converting database to Zarr: 100%|██████████| 10/10 [01:47<00:00, 10.77s/it]
Converting database to Zarr: 100%|██████████| 10/10 [01:58<00:00, 11.89s/it]
Converting database to Zarr: 100%|██████████| 10/10 [02:40<00:00, 16.09s/it]
Converting database to Zarr: 100%|██████████| 10/10 [03:26<00:00, 20.69s/it]
Converting database to Zarr: 100%|██████████| 10/10 [03:47<00:00, 22.76s/it]
Converting database to Zarr: 100%|██████████| 10/10 [04:02<00:00, 24.26s/it]
Converting database to Zarr: 100%|██████████| 10/10 [04:44<00:00, 28.45s/it]
Converting database to Zarr: 100%|██████████| 10/10 [08:09<00:00, 48.93s/it]
fig, ax = plt.subplots(figsize=(8, 4))
# Create a plot
plt.plot(clevels, file_sizes)
plt.ylabel("File Size (GB)")
plt.xlabel("Compression Level")
plt.show()
file_times = []
clevels = []
for clevel in range(1, 10):
zarr_file = f'1x1x100000_r1_singlerail5_sr_ise_zstd_{clevel}.zarr'
proj_root = os.getcwd()
filename = os.path.join(proj_root, zarr_file)
th = trace_handler.TraceHandler(fileName=filename,
batchSize=5000,
batchStart=75001, # in '1x1x100000_r1_singlerail5_sr_ise.zarr' you should use a batchStart of 75001
tiles_coordinates=[])
snrresults = snr.SNR(Tracehandler=th, Bytes=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15])
start = time.perf_counter()
snrresults.run()
stop = time.perf_counter()
file_times.append(stop - start)
clevels.append(clevel)
opened zarr file c:\Users\mjmey\OneDrive\Documents\Capstone\notscared2\benchmarking\1x1x100000_r1_singlerail5_sr_ise_zstd_1.zarr
Computing SNR: Tiles: False Bytes: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
opened zarr file c:\Users\mjmey\OneDrive\Documents\Capstone\notscared2\benchmarking\1x1x100000_r1_singlerail5_sr_ise_zstd_2.zarr
Computing SNR: Tiles: False Bytes: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
opened zarr file c:\Users\mjmey\OneDrive\Documents\Capstone\notscared2\benchmarking\1x1x100000_r1_singlerail5_sr_ise_zstd_3.zarr
Computing SNR: Tiles: False Bytes: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
opened zarr file c:\Users\mjmey\OneDrive\Documents\Capstone\notscared2\benchmarking\1x1x100000_r1_singlerail5_sr_ise_zstd_4.zarr
Computing SNR: Tiles: False Bytes: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
opened zarr file c:\Users\mjmey\OneDrive\Documents\Capstone\notscared2\benchmarking\1x1x100000_r1_singlerail5_sr_ise_zstd_5.zarr
Computing SNR: Tiles: False Bytes: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
opened zarr file c:\Users\mjmey\OneDrive\Documents\Capstone\notscared2\benchmarking\1x1x100000_r1_singlerail5_sr_ise_zstd_6.zarr
Computing SNR: Tiles: False Bytes: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
opened zarr file c:\Users\mjmey\OneDrive\Documents\Capstone\notscared2\benchmarking\1x1x100000_r1_singlerail5_sr_ise_zstd_7.zarr
Computing SNR: Tiles: False Bytes: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
opened zarr file c:\Users\mjmey\OneDrive\Documents\Capstone\notscared2\benchmarking\1x1x100000_r1_singlerail5_sr_ise_zstd_8.zarr
Computing SNR: Tiles: False Bytes: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
opened zarr file c:\Users\mjmey\OneDrive\Documents\Capstone\notscared2\benchmarking\1x1x100000_r1_singlerail5_sr_ise_zstd_9.zarr
Computing SNR: Tiles: False Bytes: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
fig, ax = plt.subplots(figsize=(8, 4))
plt.plot(clevels, file_times)
plt.ylabel("SNR Compute Time (s)")
plt.xlabel("Compression Level")
plt.show()
file_times = []
clevels = []
for clevel in range(1, 10):
zarr_file = f'1x1x100000_r1_singlerail5_sr_ise_zstd_{clevel}.zarr'
proj_root = os.getcwd()
filename = os.path.join(proj_root, zarr_file)
th = trace_handler.TraceHandler(fileName=filename,
batchSize=5000,
batchStart=75001, # in '1x1x100000_r1_singlerail5_sr_ise.zarr' you should use a batchStart of 75001
tiles_coordinates=[],
time_slice=[0, 10000],
trace_step=5)
snrresults = snr.SNR(Tracehandler=th, Bytes=[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15])
start = time.perf_counter()
snrresults.run()
stop = time.perf_counter()
file_times.append(stop - start)
clevels.append(clevel)
opened zarr file c:\Users\mjmey\OneDrive\Documents\Capstone\notscared2\benchmarking\1x1x100000_r1_singlerail5_sr_ise_zstd_1.zarr
Computing SNR: Tiles: False Bytes: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
opened zarr file c:\Users\mjmey\OneDrive\Documents\Capstone\notscared2\benchmarking\1x1x100000_r1_singlerail5_sr_ise_zstd_2.zarr
Computing SNR: Tiles: False Bytes: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
opened zarr file c:\Users\mjmey\OneDrive\Documents\Capstone\notscared2\benchmarking\1x1x100000_r1_singlerail5_sr_ise_zstd_3.zarr
Computing SNR: Tiles: False Bytes: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
opened zarr file c:\Users\mjmey\OneDrive\Documents\Capstone\notscared2\benchmarking\1x1x100000_r1_singlerail5_sr_ise_zstd_4.zarr
Computing SNR: Tiles: False Bytes: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
opened zarr file c:\Users\mjmey\OneDrive\Documents\Capstone\notscared2\benchmarking\1x1x100000_r1_singlerail5_sr_ise_zstd_5.zarr
Computing SNR: Tiles: False Bytes: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
opened zarr file c:\Users\mjmey\OneDrive\Documents\Capstone\notscared2\benchmarking\1x1x100000_r1_singlerail5_sr_ise_zstd_6.zarr
Computing SNR: Tiles: False Bytes: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
opened zarr file c:\Users\mjmey\OneDrive\Documents\Capstone\notscared2\benchmarking\1x1x100000_r1_singlerail5_sr_ise_zstd_7.zarr
Computing SNR: Tiles: False Bytes: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
opened zarr file c:\Users\mjmey\OneDrive\Documents\Capstone\notscared2\benchmarking\1x1x100000_r1_singlerail5_sr_ise_zstd_8.zarr
Computing SNR: Tiles: False Bytes: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
opened zarr file c:\Users\mjmey\OneDrive\Documents\Capstone\notscared2\benchmarking\1x1x100000_r1_singlerail5_sr_ise_zstd_9.zarr
Computing SNR: Tiles: False Bytes: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
fig, ax = plt.subplots(figsize=(8, 4))
plt.plot(clevels, file_times)
plt.ylabel("SNR Compute Time (s)")
plt.xlabel("Compression Level")
plt.show()
file_times = []
clevels = []
for clevel in range(1, 10):
zarr_file = f'1x1x100000_r1_singlerail5_sr_ise_zstd_{clevel}.zarr'
proj_root = os.getcwd()
filename = os.path.join(proj_root, zarr_file)
th = trace_handler.TraceHandler(fileName=filename,
batchSize=5000,
batchStart=75001, # in '1x1x100000_r1_singlerail5_sr_ise.zarr' you should use a batchStart of 75001
tiles_coordinates=[])
cparesults = cpa.CPA(Tracehandler=th, Bytes=[0,1])
start = time.perf_counter()
cparesults.run()
stop = time.perf_counter()
file_times.append(stop - start)
clevels.append(clevel)
opened zarr file c:\Users\mjmey\OneDrive\Documents\Capstone\notscared2\benchmarking\1x1x100000_r1_singlerail5_sr_ise_zstd_1.zarr
Computing CPA: Tiles: False Bytes: [0, 1]
opened zarr file c:\Users\mjmey\OneDrive\Documents\Capstone\notscared2\benchmarking\1x1x100000_r1_singlerail5_sr_ise_zstd_2.zarr
Computing CPA: Tiles: False Bytes: [0, 1]
opened zarr file c:\Users\mjmey\OneDrive\Documents\Capstone\notscared2\benchmarking\1x1x100000_r1_singlerail5_sr_ise_zstd_3.zarr
Computing CPA: Tiles: False Bytes: [0, 1]
opened zarr file c:\Users\mjmey\OneDrive\Documents\Capstone\notscared2\benchmarking\1x1x100000_r1_singlerail5_sr_ise_zstd_4.zarr
Computing CPA: Tiles: False Bytes: [0, 1]
opened zarr file c:\Users\mjmey\OneDrive\Documents\Capstone\notscared2\benchmarking\1x1x100000_r1_singlerail5_sr_ise_zstd_5.zarr
Computing CPA: Tiles: False Bytes: [0, 1]
opened zarr file c:\Users\mjmey\OneDrive\Documents\Capstone\notscared2\benchmarking\1x1x100000_r1_singlerail5_sr_ise_zstd_6.zarr
Computing CPA: Tiles: False Bytes: [0, 1]
opened zarr file c:\Users\mjmey\OneDrive\Documents\Capstone\notscared2\benchmarking\1x1x100000_r1_singlerail5_sr_ise_zstd_7.zarr
Computing CPA: Tiles: False Bytes: [0, 1]
opened zarr file c:\Users\mjmey\OneDrive\Documents\Capstone\notscared2\benchmarking\1x1x100000_r1_singlerail5_sr_ise_zstd_8.zarr
Computing CPA: Tiles: False Bytes: [0, 1]
opened zarr file c:\Users\mjmey\OneDrive\Documents\Capstone\notscared2\benchmarking\1x1x100000_r1_singlerail5_sr_ise_zstd_9.zarr
Computing CPA: Tiles: False Bytes: [0, 1]
fig, ax = plt.subplots(figsize=(8, 4))
plt.plot(clevels, file_times)
plt.ylabel("CPA Compute Time (s)")
plt.xlabel("Compression Level")
plt.show()
file_times = []
clevels = []
for clevel in range(1, 10):
zarr_file = f'1x1x100000_r1_singlerail5_sr_ise_zstd_{clevel}.zarr'
proj_root = os.getcwd()
filename = os.path.join(proj_root, zarr_file)
th = trace_handler.TraceHandler(fileName=filename,
batchSize=5000,
batchStart=75001, # in '1x1x100000_r1_singlerail5_sr_ise.zarr' you should use a batchStart of 75001
tiles_coordinates=[],
time_slice=[0, 10000],
trace_step=5)
cparesults = cpa.CPA(Tracehandler=th, Bytes=[0,1])
start = time.perf_counter()
cparesults.run()
stop = time.perf_counter()
file_times.append(stop - start)
clevels.append(clevel)
opened zarr file c:\Users\mjmey\OneDrive\Documents\Capstone\notscared2\benchmarking\1x1x100000_r1_singlerail5_sr_ise_zstd_1.zarr
Computing CPA: Tiles: False Bytes: [0, 1]
opened zarr file c:\Users\mjmey\OneDrive\Documents\Capstone\notscared2\benchmarking\1x1x100000_r1_singlerail5_sr_ise_zstd_2.zarr
Computing CPA: Tiles: False Bytes: [0, 1]
opened zarr file c:\Users\mjmey\OneDrive\Documents\Capstone\notscared2\benchmarking\1x1x100000_r1_singlerail5_sr_ise_zstd_3.zarr
Computing CPA: Tiles: False Bytes: [0, 1]
opened zarr file c:\Users\mjmey\OneDrive\Documents\Capstone\notscared2\benchmarking\1x1x100000_r1_singlerail5_sr_ise_zstd_4.zarr
Computing CPA: Tiles: False Bytes: [0, 1]
opened zarr file c:\Users\mjmey\OneDrive\Documents\Capstone\notscared2\benchmarking\1x1x100000_r1_singlerail5_sr_ise_zstd_5.zarr
Computing CPA: Tiles: False Bytes: [0, 1]
opened zarr file c:\Users\mjmey\OneDrive\Documents\Capstone\notscared2\benchmarking\1x1x100000_r1_singlerail5_sr_ise_zstd_6.zarr
Computing CPA: Tiles: False Bytes: [0, 1]
opened zarr file c:\Users\mjmey\OneDrive\Documents\Capstone\notscared2\benchmarking\1x1x100000_r1_singlerail5_sr_ise_zstd_7.zarr
Computing CPA: Tiles: False Bytes: [0, 1]
opened zarr file c:\Users\mjmey\OneDrive\Documents\Capstone\notscared2\benchmarking\1x1x100000_r1_singlerail5_sr_ise_zstd_8.zarr
Computing CPA: Tiles: False Bytes: [0, 1]
opened zarr file c:\Users\mjmey\OneDrive\Documents\Capstone\notscared2\benchmarking\1x1x100000_r1_singlerail5_sr_ise_zstd_9.zarr
Computing CPA: Tiles: False Bytes: [0, 1]
fig, ax = plt.subplots(figsize=(8, 4))
plt.plot(clevels, file_times)
plt.ylabel("CPA Compute Time (s)")
plt.xlabel("Compression Level")
plt.show()