Changed image name to Docker Hub

nigel.stanger / docker-analytics

Browse code Changed image name to Docker Hub master spark3
1 parent f93eab3 commit a98b2d15c700402d991362c41d98f786c546263f Nigel Stanger authored on 19 Sep 2020

Patch

Showing 3 changed files

Ignore Space Show notes View Makefile
all: spark pyspark kafka spark: spark/Dockerfile spark/start-master.sh spark/start-worker.sh docker build $(BUILD_OPTS) -t nstanger/spark:latest -f $< $@ # Rebuild both the following if spark changes! pyspark: pyspark/Dockerfile pyspark/kernel.json pyspark/pyspark-kernel.sh docker build $(BUILD_OPTS) -t nstanger/pyspark:latest -f $< $@ kafka: kafka/Dockerfile kafka/start-kafka.sh docker build $(BUILD_OPTS) -t nstanger/kafka:latest -f $< $@ all: spark pyspark kafka spark: spark/Dockerfile spark/start-master.sh spark/start-worker.sh docker build $(BUILD_OPTS) -t analytics/spark:latest -f $< $@ # Rebuild both the following if spark changes! pyspark: pyspark/Dockerfile pyspark/kernel.json pyspark/pyspark-kernel.sh docker build $(BUILD_OPTS) -t analytics/pyspark:latest -f $< $@ kafka: kafka/Dockerfile kafka/start-kafka.sh docker build $(BUILD_OPTS) -t analytics/kafka:latest -f $< $@

Ignore Space Show notes View

Makefile

Ignore Space Show notes View kafka/Dockerfile
FROM nstanger/spark ENV KAFKA_VERSION="2.2.0" \ SCALA_VERSION="2.11" \ KAFKA_INSTALL="/usr/local" RUN apk add --no-cache \ bash \ openjdk8 \ tini \ zeromq RUN apk add --no-cache --virtual .fetch-deps \ curl \ tar RUN curl -s https://www-us.apache.org/dist/kafka/$KAFKA_VERSION/kafka_$SCALA_VERSION-$KAFKA_VERSION.tgz \| tar -xz -C $KAFKA_INSTALL && \ cd $KAFKA_INSTALL && ln -s kafka_$SCALA_VERSION-$KAFKA_VERSION kafka RUN apk del .fetch-deps # COPY start-master.sh /usr/local/bin/start-master.sh # COPY start-worker.sh /usr/local/bin/start-worker.sh ENV KAFKA_HOME="$KAFKA_INSTALL/kafka" # SPARK_HOSTNAME="127.0.0.1" \ # SPARK_MASTER_PORT="7077" \ # SPARK_MASTER_WEBUI_PORT="8080" \ # PYSPARK_PYTHON="/usr/local/bin/python3" COPY start-kafka.sh /usr/local/bin/ COPY server.properties $KAFKA_HOME/config/ # Spark doesn't seem to respond directly to SIGTERM as the exit status is # for SIGKILL (137), after a pause. Presumably docker-compose down times out. # Using tini gives immediate exit with status 143 (SIGTERM). ENTRYPOINT ["/sbin/tini", "--"] CMD ["/usr/local/bin/start-kafka.sh"] FROM analytics/spark ENV KAFKA_VERSION="2.2.0" \ SCALA_VERSION="2.11" \ KAFKA_INSTALL="/usr/local" RUN apk add --no-cache \ bash \ openjdk8 \ tini \ zeromq RUN apk add --no-cache --virtual .fetch-deps \ curl \ tar RUN curl -s https://www-us.apache.org/dist/kafka/$KAFKA_VERSION/kafka_$SCALA_VERSION-$KAFKA_VERSION.tgz \| tar -xz -C $KAFKA_INSTALL && \ cd $KAFKA_INSTALL && ln -s kafka_$SCALA_VERSION-$KAFKA_VERSION kafka RUN apk del .fetch-deps # COPY start-master.sh /usr/local/bin/start-master.sh # COPY start-worker.sh /usr/local/bin/start-worker.sh ENV KAFKA_HOME="$KAFKA_INSTALL/kafka" # SPARK_HOSTNAME="127.0.0.1" \ # SPARK_MASTER_PORT="7077" \ # SPARK_MASTER_WEBUI_PORT="8080" \ # PYSPARK_PYTHON="/usr/local/bin/python3" COPY start-kafka.sh /usr/local/bin/ COPY server.properties $KAFKA_HOME/config/ # Spark doesn't seem to respond directly to SIGTERM as the exit status is # for SIGKILL (137), after a pause. Presumably docker-compose down times out. # Using tini gives immediate exit with status 143 (SIGTERM). ENTRYPOINT ["/sbin/tini", "--"] CMD ["/usr/local/bin/start-kafka.sh"]

Ignore Space Show notes View

kafka/Dockerfile

FROM nstanger/spark

ENV KAFKA_VERSION="2.2.0" \
    SCALA_VERSION="2.11" \
    KAFKA_INSTALL="/usr/local"

RUN apk add --no-cache \
        bash \
        openjdk8 \
        tini \
        zeromq

RUN apk add --no-cache --virtual .fetch-deps \
        curl \
        tar

RUN curl -s https://www-us.apache.org/dist/kafka/$KAFKA_VERSION/kafka_$SCALA_VERSION-$KAFKA_VERSION.tgz | tar -xz -C $KAFKA_INSTALL && \
    cd $KAFKA_INSTALL && ln -s kafka_$SCALA_VERSION-$KAFKA_VERSION kafka

RUN apk del .fetch-deps

# COPY start-master.sh /usr/local/bin/start-master.sh
# COPY start-worker.sh /usr/local/bin/start-worker.sh

ENV KAFKA_HOME="$KAFKA_INSTALL/kafka"
#     SPARK_HOSTNAME="127.0.0.1" \
#     SPARK_MASTER_PORT="7077" \
#     SPARK_MASTER_WEBUI_PORT="8080" \
#     PYSPARK_PYTHON="/usr/local/bin/python3"
COPY start-kafka.sh /usr/local/bin/
COPY server.properties $KAFKA_HOME/config/

# Spark doesn't seem to respond directly to SIGTERM as the exit status is
# for SIGKILL (137), after a pause. Presumably docker-compose down times out.
# Using tini gives immediate exit with status 143 (SIGTERM).
ENTRYPOINT ["/sbin/tini", "--"]

CMD ["/usr/local/bin/start-kafka.sh"]

FROM analytics/spark

ENV KAFKA_VERSION="2.2.0" \
    SCALA_VERSION="2.11" \
    KAFKA_INSTALL="/usr/local"

RUN apk add --no-cache \
        bash \
        openjdk8 \
        tini \
        zeromq

RUN apk add --no-cache --virtual .fetch-deps \
        curl \
        tar

RUN apk del .fetch-deps

# COPY start-master.sh /usr/local/bin/start-master.sh
# COPY start-worker.sh /usr/local/bin/start-worker.sh

CMD ["/usr/local/bin/start-kafka.sh"]

Ignore Space Show notes View pyspark/Dockerfile
# Don't use the official jupyter/pyspark-notebook image because it's massive! FROM nstanger/spark ARG NB_USER="pyspark" ARG NB_UID="1000" ARG NB_GID="1000" USER root RUN adduser -D -u $NB_UID $NB_USER RUN apk add --no-cache --virtual .build-deps \ build-base \ zeromq-dev \ python3-dev \ py3-zmq RUN pip install --upgrade \ pip \ && pip install \ # Tornado 6 breaks sparkmonitor tornado==5.1 \ jupyter \ kafka-python \ pyspark \ sparkmonitor \ tini RUN apk del .build-deps ENV PYSPARK_PYTHON="/usr/local/bin/python3" ENV KERNEL_NAME="PySpark" \ KERNELS_TEMPLATE_PATH="/tmp" \ KERNELS_DIR_PATH="/home/$NB_USER/.local/share/jupyter/kernels" \ PYTHONPATH="$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.7-src.zip" \ PYSPARK_DRIVER_PYTHON="$PYSPARK_PYTHON" \ PYSPARK_SUBMIT_ARGS="--master $SPARK_MASTER pyspark-shell" \ PYTHONSTARTUP="$SPARK_HOME/python/pyspark/shell.py" # ENV SPARKMONITOR_UI_PORT 8080 USER $NB_UID RUN ipython kernel install --user --name $KERNEL_NAME # kernel.json is based on the template from <https://github.com/Anchormen/pyspark-jupyter-kernels>. COPY kernel.json pyspark-kernel.sh $KERNELS_DIR_PATH/$KERNEL_NAME/ RUN jupyter nbextension install sparkmonitor --py --user --symlink \ && jupyter nbextension enable sparkmonitor --py --user \ && jupyter serverextension enable --py --user sparkmonitor \ && ipython profile create \ && echo "c.InteractiveShellApp.extensions.append('sparkmonitor.kernelextension')" >> $(ipython profile locate default)/ipython_kernel_config.py RUN mkdir -p /home/$NB_USER/work WORKDIR /home/$NB_USER/work ENTRYPOINT ["/sbin/tini", "--"] # pyspark-kernel.sh will automatically set up the PySpark context when # the kernel is loaded. CMD ["jupyter", "notebook", "--ip=0.0.0.0", "--port=8888"] # debugging # CMD ["bash"] # Don't use the official jupyter/pyspark-notebook image because it's massive! FROM analytics/spark ARG NB_USER="pyspark" ARG NB_UID="1000" ARG NB_GID="1000" USER root RUN adduser -D -u $NB_UID $NB_USER RUN apk add --no-cache --virtual .build-deps \ build-base \ zeromq-dev \ python3-dev \ py3-zmq RUN pip install --upgrade \ pip \ && pip install \ # Tornado 6 breaks sparkmonitor tornado==5.1 \ jupyter \ kafka-python \ pyspark \ sparkmonitor \ tini RUN apk del .build-deps ENV PYSPARK_PYTHON="/usr/local/bin/python3" ENV KERNEL_NAME="PySpark" \ KERNELS_TEMPLATE_PATH="/tmp" \ KERNELS_DIR_PATH="/home/$NB_USER/.local/share/jupyter/kernels" \ PYTHONPATH="$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.7-src.zip" \ PYSPARK_DRIVER_PYTHON="$PYSPARK_PYTHON" \ PYSPARK_SUBMIT_ARGS="--master $SPARK_MASTER pyspark-shell" \ PYTHONSTARTUP="$SPARK_HOME/python/pyspark/shell.py" # ENV SPARKMONITOR_UI_PORT 8080 USER $NB_UID RUN ipython kernel install --user --name $KERNEL_NAME # kernel.json is based on the template from <https://github.com/Anchormen/pyspark-jupyter-kernels>. COPY kernel.json pyspark-kernel.sh $KERNELS_DIR_PATH/$KERNEL_NAME/ RUN jupyter nbextension install sparkmonitor --py --user --symlink \ && jupyter nbextension enable sparkmonitor --py --user \ && jupyter serverextension enable --py --user sparkmonitor \ && ipython profile create \ && echo "c.InteractiveShellApp.extensions.append('sparkmonitor.kernelextension')" >> $(ipython profile locate default)/ipython_kernel_config.py RUN mkdir -p /home/$NB_USER/work WORKDIR /home/$NB_USER/work ENTRYPOINT ["/sbin/tini", "--"] # pyspark-kernel.sh will automatically set up the PySpark context when # the kernel is loaded. CMD ["jupyter", "notebook", "--ip=0.0.0.0", "--port=8888"] # debugging # CMD ["bash"]

Ignore Space Show notes View

pyspark/Dockerfile

# Don't use the official jupyter/pyspark-notebook image because it's massive!
FROM nstanger/spark

ARG NB_USER="pyspark"
ARG NB_UID="1000"
ARG NB_GID="1000"

USER root

RUN adduser -D -u $NB_UID $NB_USER

RUN apk add --no-cache --virtual .build-deps \
        build-base \
        zeromq-dev \
        python3-dev \
        py3-zmq

RUN pip install --upgrade \
        pip \
    && pip install \
        # Tornado 6 breaks sparkmonitor
        tornado==5.1 \
        jupyter \
        kafka-python \
        pyspark \
        sparkmonitor \
        tini

RUN apk del .build-deps

ENV PYSPARK_PYTHON="/usr/local/bin/python3"

ENV KERNEL_NAME="PySpark" \
    KERNELS_TEMPLATE_PATH="/tmp" \
    KERNELS_DIR_PATH="/home/$NB_USER/.local/share/jupyter/kernels" \
    PYTHONPATH="$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.7-src.zip" \
    PYSPARK_DRIVER_PYTHON="$PYSPARK_PYTHON" \
    PYSPARK_SUBMIT_ARGS="--master $SPARK_MASTER pyspark-shell" \
    PYTHONSTARTUP="$SPARK_HOME/python/pyspark/shell.py"

# ENV SPARKMONITOR_UI_PORT 8080

USER $NB_UID

RUN ipython kernel install --user --name $KERNEL_NAME
# kernel.json is based on the template from <https://github.com/Anchormen/pyspark-jupyter-kernels>.
COPY kernel.json pyspark-kernel.sh $KERNELS_DIR_PATH/$KERNEL_NAME/

RUN jupyter nbextension install sparkmonitor --py --user --symlink \
    && jupyter nbextension enable sparkmonitor --py --user \
    && jupyter serverextension enable --py --user sparkmonitor \
    && ipython profile create \
    && echo "c.InteractiveShellApp.extensions.append('sparkmonitor.kernelextension')" >> $(ipython profile locate default)/ipython_kernel_config.py

RUN mkdir -p /home/$NB_USER/work
WORKDIR /home/$NB_USER/work

ENTRYPOINT ["/sbin/tini", "--"]

# pyspark-kernel.sh will automatically set up the PySpark context when
# the kernel is loaded.
CMD ["jupyter", "notebook", "--ip=0.0.0.0", "--port=8888"]

# debugging
# CMD ["bash"]

# Don't use the official jupyter/pyspark-notebook image because it's massive!
FROM analytics/spark

ARG NB_USER="pyspark"
ARG NB_UID="1000"
ARG NB_GID="1000"

USER root

RUN adduser -D -u $NB_UID $NB_USER

RUN apk add --no-cache --virtual .build-deps \
        build-base \
        zeromq-dev \
        python3-dev \
        py3-zmq

RUN apk del .build-deps

ENV PYSPARK_PYTHON="/usr/local/bin/python3"

# ENV SPARKMONITOR_UI_PORT 8080

USER $NB_UID

RUN mkdir -p /home/$NB_USER/work
WORKDIR /home/$NB_USER/work

ENTRYPOINT ["/sbin/tini", "--"]

# pyspark-kernel.sh will automatically set up the PySpark context when
# the kernel is loaded.
CMD ["jupyter", "notebook", "--ip=0.0.0.0", "--port=8888"]

# debugging
# CMD ["bash"]

Show line notes below