emflow-artifact / artifact / download.py
download.py
Raw
import argparse
from pathlib import Path
import kagglehub
from kagglehub import KaggleDatasetAdapter
import kaggle

def parse_args() -> argparse.Namespace:
    """Parse and return command line arguments."""
    parser = argparse.ArgumentParser(
        description="Downloads the data for the specified program(s)."
    )

    # which program
    parser.add_argument(
        "-c", "--program",
        type=str,
        nargs="+",
        choices=["checksum", "syringe-arduino", "syringe-pico", "servo-arduino", "servo-pico", "soldering", "home-iot", "distance", "all"],
        required=True,
        help="Which program to download data",
    )

    return parser.parse_args()

def download(slug:str) -> None:
    # dataset_slug = "emflow2026/em-data"
    kaggle.api.dataset_download_files(slug, path="data/", unzip=True, quiet=False)
    # print(f"Downloaing {slug}")

def main():
    args = parse_args()

    for program in args.program:
        match program:
            case "soldering":
                download("emflow2026/em-data-soldering")
            case "checksum":
                download("emflow2026/em-data")
            case "home-iot":
                download("emflow2026/em-data-coffee")
            case "distance":
                download("emflow2026/em-data-distance")
            case "syringe-arduino":
                download("emflow2026/em-data-syringe-arduino")
            case "syringe-pico":
                download("emflow2026/em-data-syringe-pico")
            case "servo-arduino":
                download("emflow2026/em-data-servo-arduino")
            case "servo-pico":
                download("emflow2026/em-data-servo-pico")
            case "all":
                download("emflow2026/em-data")
                download("emflow2026/em-data-coffee")
                download("emflow2026/em-data-syringe-arduino")
                

if __name__ == "__main__":
    main()