FROM python:3.6-alpine3.9 ENV SPARK_VERSION 2.4.3 ENV SPARK_INSTALL /usr/local ENV HADOOP_VERSION 2.7 RUN apk add --no-cache \ bash \ openjdk8 \ tini \ zeromq RUN apk add --no-cache --virtual .fetch-deps \ curl \ tar RUN curl -s https://www-us.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop$HADOOP_VERSION.tgz | tar -xz -C $SPARK_INSTALL && \ cd $SPARK_INSTALL && ln -s spark-$SPARK_VERSION-bin-hadoop$HADOOP_VERSION spark RUN apk del .fetch-deps COPY start-master.sh /usr/local/bin/start-master.sh COPY start-worker.sh /usr/local/bin/start-worker.sh ENV SPARK_HOME $SPARK_INSTALL/spark ENV SPARK_LOCAL_IP 127.0.0.1 ENV SPARK_MASTER_PORT 7077 ENV SPARK_MASTER_WEBUI_PORT 8080 ENV PYSPARK_PYTHON /usr/local/bin/python3 # Spark doesn't seem to respond directly to SIGTERM as the exit status is # for SIGKILL (137), after a pause. Presumably docker-compose down times out. # Using tini gives immediate exit with status 143 (SIGTERM). ENTRYPOINT ["/sbin/tini", "--"] CMD ["/usr/local/bin/start-master.sh"]