PAC-tree / docker / spark-worker-1 / Dockerfile
Dockerfile
Raw
FROM bitnami/spark:3.4.0

# Install additional packages
USER root
RUN apt-get update && apt-get install -y \
    python3 \
    python3-pip \
    && rm -rf /var/lib/apt/lists/*

# Install Python packages
RUN pip3 install --no-cache-dir \
    pyspark==3.4.0 \
    pandas \
    numpy \
    psycopg2-binary

USER 1001

# Set environment variables
ENV SPARK_MODE=worker
ENV SPARK_MASTER_URL=spark://master:7077
ENV SPARK_WORKER_MEMORY=1G
ENV SPARK_WORKER_CORES=1
ENV SPARK_RPC_AUTHENTICATION_ENABLED=no
ENV SPARK_RPC_ENCRYPTION_ENABLED=no
ENV SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
ENV SPARK_SSL_ENABLED=no

EXPOSE 8081