FROM analytics/spark
ENV KAFKA_VERSION="2.2.0" \
SCALA_VERSION="2.12" \
KAFKA_INSTALL="/usr/local"
RUN apk add --no-cache \
bash \
openjdk8 \
tini \
zeromq
RUN apk add --no-cache --virtual .fetch-deps \
curl \
tar
RUN curl -s https://www-us.apache.org/dist/kafka/$KAFKA_VERSION/kafka_$SCALA_VERSION-$KAFKA_VERSION.tgz | tar -xz -C $KAFKA_INSTALL && \
cd $KAFKA_INSTALL && ln -s kafka_$SCALA_VERSION-$KAFKA_VERSION kafka
RUN apk del .fetch-deps
# COPY start-master.sh /usr/local/bin/start-master.sh
# COPY start-worker.sh /usr/local/bin/start-worker.sh
ENV KAFKA_HOME="$KAFKA_INSTALL/kafka"
# SPARK_LOCAL_IP="127.0.0.1" \
# SPARK_MASTER_PORT="7077" \
# SPARK_MASTER_WEBUI_PORT="8080" \
# PYSPARK_PYTHON="/usr/local/bin/python3"
COPY start-kafka.sh /usr/local/bin/
COPY server.properties $KAFKA_HOME/config/
# Spark doesn't seem to respond directly to SIGTERM as the exit status is
# for SIGKILL (137), after a pause. Presumably docker-compose down times out.
# Using tini gives immediate exit with status 143 (SIGTERM).
ENTRYPOINT ["/sbin/tini", "--"]
CMD ["/usr/local/bin/start-kafka.sh"]