FROM python:3.6-alpine3.9
COPY spark-2.4.2-bin-hadoop2.7.tgz /usr/local/spark-2.4.2-bin-hadoop2.7.tgz
RUN apk add --no-cache \
bash \
openjdk8 \
zeromq \
tini \
&& apk add --no-cache --virtual .fetch-deps \
tar \
&& cd /usr/local \
&& tar -xzf spark-2.4.2-bin-hadoop2.7.tgz \
&& mv spark-2.4.2-bin-hadoop2.7 spark \
&& rm spark-2.4.2-bin-hadoop2.7.tgz \
&& apk del .fetch-deps
COPY start-master.sh /usr/local/bin/start-master.sh
COPY start-worker.sh /usr/local/bin/start-worker.sh
ENV SPARK_HOME /usr/local/spark
# Spark doesn't seem to respond directly to SIGTERM as the exit status is
# for SIGKILL (137), after a pause. Presumably docker-compose down times out.
# Using tini gives immediate exit with status 143 (SIGTERM).
ENTRYPOINT ["/sbin/tini", "--"]
CMD ["bash"]