Newer
Older
docker-analytics / spark / Dockerfile
FROM python:3.6-alpine3.9

COPY spark-2.4.2-bin-hadoop2.7.tgz /usr/local/spark-2.4.2-bin-hadoop2.7.tgz

RUN apk add --no-cache \
        bash \
        openjdk8 \
        zeromq \
        tini \
    && apk add --no-cache --virtual .fetch-deps \
        tar \
    && cd /usr/local \
    && tar -xzf spark-2.4.2-bin-hadoop2.7.tgz \
    && mv spark-2.4.2-bin-hadoop2.7 spark \
    && rm spark-2.4.2-bin-hadoop2.7.tgz \
    && apk del .fetch-deps

COPY start-master.sh /usr/local/bin/start-master.sh
COPY start-worker.sh /usr/local/bin/start-worker.sh

ENV SPARK_HOME /usr/local/spark

# Spark doesn't seem to respond directly to SIGTERM as the exit status is
# for SIGKILL (137), after a pause. Presumably docker-compose down times out.
# Using tini gives immediate exit with status 143 (SIGTERM).
ENTRYPOINT ["/sbin/tini", "--"]

CMD ["bash"]