FROM nstanger/spark ENV KAFKA_VERSION="2.2.2" \ SCALA_VERSION="2.11" \ APACHE_MIRROR="https://archive.apache.org/dist" \ KAFKA_INSTALL="/usr/local" RUN apt-get update && \ apt-get install -y --no-install-recommends wget ; \ # download, install, and symlink kafka cd $SPARK_INSTALL && \ wget -q --show-progress --progress=bar:force:noscroll $APACHE_MIRROR/kafka/$KAFKA_VERSION/kafka_$SCALA_VERSION-$KAFKA_VERSION.tgz 2>&1 && \ tar xzf kafka_$SCALA_VERSION-$KAFKA_VERSION.tgz && \ ln -s kafka_$SCALA_VERSION-$KAFKA_VERSION kafka && \ rm -f kafka_$SCALA_VERSION-$KAFKA_VERSION.tgz ; \ # Clean up the crap apt-get remove -y --purge wget && \ apt -y autoremove && \ apt-get clean -y && \ rm -rf /var/lib/apt/lists/* && \ rm -rf /tmp/* && \ rm -rf /var/cache/* && \ rm -rf /root/.cache # COPY start-master.sh /usr/local/bin/start-master.sh # COPY start-worker.sh /usr/local/bin/start-worker.sh ENV KAFKA_HOME="$KAFKA_INSTALL/kafka" # SPARK_HOSTNAME="127.0.0.1" \ # SPARK_MASTER_PORT="7077" \ # SPARK_MASTER_WEBUI_PORT="8080" \ # PYSPARK_PYTHON="/usr/local/bin/python3" COPY start-kafka.sh /usr/local/bin/ COPY server.properties $KAFKA_HOME/config/ # Spark doesn't seem to respond directly to SIGTERM as the exit status is # for SIGKILL (137), after a pause. Presumably docker-compose down times out. # Using tini gives immediate exit with status 143 (SIGTERM). ENTRYPOINT ["/sbin/tini", "--"] CMD ["/usr/local/bin/start-kafka.sh"]