Newer
Older
docker-analytics / kafka / Dockerfile
FROM nstanger/spark

ENV KAFKA_VERSION="2.2.2" \
    SCALA_VERSION="2.11" \
    APACHE_MIRROR="https://archive.apache.org/dist" \
    KAFKA_INSTALL="/usr/local"

RUN apt-get update && \
    apt-get install -y --no-install-recommends wget ; \
    # download, install, and symlink kafka
    cd $SPARK_INSTALL && \
    wget -q --show-progress --progress=bar:force:noscroll $APACHE_MIRROR/kafka/$KAFKA_VERSION/kafka_$SCALA_VERSION-$KAFKA_VERSION.tgz 2>&1 && \
    tar xzf kafka_$SCALA_VERSION-$KAFKA_VERSION.tgz && \
    ln -s kafka_$SCALA_VERSION-$KAFKA_VERSION kafka && \
    rm -f kafka_$SCALA_VERSION-$KAFKA_VERSION.tgz ; \
    # Clean up the crap
    apt-get remove -y --purge wget && \
    apt -y autoremove && \
    apt-get clean -y && \
    rm -rf /var/lib/apt/lists/* && \
    rm -rf /tmp/* && \
    rm -rf /var/cache/* && \
    rm -rf /root/.cache

# COPY start-master.sh /usr/local/bin/start-master.sh
# COPY start-worker.sh /usr/local/bin/start-worker.sh

ENV KAFKA_HOME="$KAFKA_INSTALL/kafka"
#     SPARK_HOSTNAME="127.0.0.1" \
#     SPARK_MASTER_PORT="7077" \
#     SPARK_MASTER_WEBUI_PORT="8080" \
#     PYSPARK_PYTHON="/usr/local/bin/python3"
COPY start-kafka.sh /usr/local/bin/
COPY server.properties $KAFKA_HOME/config/

# Spark doesn't seem to respond directly to SIGTERM as the exit status is
# for SIGKILL (137), after a pause. Presumably docker-compose down times out.
# Using tini gives immediate exit with status 143 (SIGTERM).
ENTRYPOINT ["/sbin/tini", "--"]

CMD ["/usr/local/bin/start-kafka.sh"]