FROM analytics/spark ENV KAFKA_VERSION="2.2.0" \ SCALA_VERSION="2.12" \ KAFKA_INSTALL="/usr/local" RUN apk add --no-cache \ bash \ openjdk8 \ tini \ zeromq RUN apk add --no-cache --virtual .fetch-deps \ curl \ tar RUN curl -s https://www-us.apache.org/dist/kafka/$KAFKA_VERSION/kafka_$SCALA_VERSION-$KAFKA_VERSION.tgz | tar -xz -C $KAFKA_INSTALL && \ cd $KAFKA_INSTALL && ln -s kafka_$SCALA_VERSION-$KAFKA_VERSION kafka RUN apk del .fetch-deps # COPY start-master.sh /usr/local/bin/start-master.sh # COPY start-worker.sh /usr/local/bin/start-worker.sh ENV KAFKA_HOME="$KAFKA_INSTALL/kafka" # SPARK_LOCAL_IP="127.0.0.1" \ # SPARK_MASTER_PORT="7077" \ # SPARK_MASTER_WEBUI_PORT="8080" \ # PYSPARK_PYTHON="/usr/local/bin/python3" COPY start-kafka.sh /usr/local/bin/start-kafka.sh COPY server.properties $KAFKA_HOME/config/server.properties # Spark doesn't seem to respond directly to SIGTERM as the exit status is # for SIGKILL (137), after a pause. Presumably docker-compose down times out. # Using tini gives immediate exit with status 143 (SIGTERM). ENTRYPOINT ["/sbin/tini", "--"] CMD ["/usr/local/bin/start-kafka.sh"]