GitBucket
4.21.2
Toggle navigation
Snippets
Sign in
Files
Branches
2
Releases
Issues
3
Pull requests
Labels
Priorities
Milestones
Wiki
Forks
nigel.stanger
/
docker-analytics
Browse code
Changed SPARK_LOCAL_IP to SPARK_HOSTNAME
master
spark3
1 parent
bc420cb
commit
3bb431596bc6627376e25ad1a34872e5e70388fb
Nigel Stanger
authored
on 14 May 2019
Patch
Showing
3 changed files
docker-compose.yml
kafka/Dockerfile
spark/Dockerfile
Ignore Space
Show notes
View
docker-compose.yml
version: "3.3" services: spark-master: image: analytics/spark:latest container_name: spark-master hostname: spark-master ports: - "8080:8080" - "7077:7077" networks: - spark-network environment: - "SPARK_HOSTNAME=spark-master" - "SPARK_MASTER=spark://spark-master:7077" command: "start-master.sh" volumes: - ${HOME}/tmp/sparkdata:/mnt/sparkdata spark-worker: image: analytics/spark:latest depends_on: - spark-master ports: - 8080 networks: - spark-network environment: - "SPARK_MASTER=spark://spark-master:7077" - "SPARK_WORKER_WEBUI_PORT=8080" - "SPARKMONITOR_UI_PORT=8080" command: "start-worker.sh" volumes: - ${HOME}/tmp/sparkdata:/mnt/sparkdata pyspark: image: analytics/pyspark:latest depends_on: - spark-master ports: - "8888:8888" networks: - spark-network environment: - "SPARK_MASTER=spark://spark-master:7077" - "SPARK_MASTER_WEBUI_PORT=8080" - "PYSPARK_SUBMIT_ARGS=--master spark://spark-master:7077 --packages org.apache.spark:spark-sql-kafka-0-10_2.11:2.4.3 pyspark-shell" working_dir: /home/pyspark/work volumes: - ${HOME}/tmp/sparkdata:/mnt/sparkdata kafka: image: analytics/kafka:latest hostname: kafka ports: - 9092 networks: - spark-network volumes: - ${HOME}/tmp/sparkdata:/mnt/sparkdata networks: spark-network: driver: bridge ipam: driver: default
version: "3.3" services: spark-master: image: analytics/spark:latest container_name: spark-master hostname: spark-master ports: - "8080:8080" - "7077:7077" networks: - spark-network environment: - "SPARK_LOCAL_IP=spark-master" - "SPARK_MASTER=spark://spark-master:7077" command: "start-master.sh" volumes: - ${HOME}/tmp/sparkdata:/mnt/sparkdata spark-worker: image: analytics/spark:latest depends_on: - spark-master ports: - 8080 networks: - spark-network environment: - "SPARK_MASTER=spark://spark-master:7077" - "SPARK_WORKER_WEBUI_PORT=8080" - "SPARKMONITOR_UI_PORT=8080" command: "start-worker.sh" volumes: - ${HOME}/tmp/sparkdata:/mnt/sparkdata pyspark: image: analytics/pyspark:latest depends_on: - spark-master ports: - "8888:8888" networks: - spark-network environment: - "SPARK_MASTER=spark://spark-master:7077" - "SPARK_MASTER_WEBUI_PORT=8080" - "PYSPARK_SUBMIT_ARGS=--master spark://spark-master:7077 --packages org.apache.spark:spark-sql-kafka-0-10_2.11:2.4.3 pyspark-shell" working_dir: /home/pyspark/work volumes: - ${HOME}/tmp/sparkdata:/mnt/sparkdata kafka: image: analytics/kafka:latest hostname: kafka ports: - 9092 networks: - spark-network volumes: - ${HOME}/tmp/sparkdata:/mnt/sparkdata networks: spark-network: driver: bridge ipam: driver: default
Ignore Space
Show notes
View
kafka/Dockerfile
FROM analytics/spark ENV KAFKA_VERSION="2.2.0" \ SCALA_VERSION="2.11" \ KAFKA_INSTALL="/usr/local" RUN apk add --no-cache \ bash \ openjdk8 \ tini \ zeromq RUN apk add --no-cache --virtual .fetch-deps \ curl \ tar RUN curl -s https://www-us.apache.org/dist/kafka/$KAFKA_VERSION/kafka_$SCALA_VERSION-$KAFKA_VERSION.tgz | tar -xz -C $KAFKA_INSTALL && \ cd $KAFKA_INSTALL && ln -s kafka_$SCALA_VERSION-$KAFKA_VERSION kafka RUN apk del .fetch-deps # COPY start-master.sh /usr/local/bin/start-master.sh # COPY start-worker.sh /usr/local/bin/start-worker.sh ENV KAFKA_HOME="$KAFKA_INSTALL/kafka" # SPARK_HOSTNAME="127.0.0.1" \ # SPARK_MASTER_PORT="7077" \ # SPARK_MASTER_WEBUI_PORT="8080" \ # PYSPARK_PYTHON="/usr/local/bin/python3" COPY start-kafka.sh /usr/local/bin/ COPY server.properties $KAFKA_HOME/config/ # Spark doesn't seem to respond directly to SIGTERM as the exit status is # for SIGKILL (137), after a pause. Presumably docker-compose down times out. # Using tini gives immediate exit with status 143 (SIGTERM). ENTRYPOINT ["/sbin/tini", "--"] CMD ["/usr/local/bin/start-kafka.sh"]
FROM analytics/spark ENV KAFKA_VERSION="2.2.0" \ SCALA_VERSION="2.11" \ KAFKA_INSTALL="/usr/local" RUN apk add --no-cache \ bash \ openjdk8 \ tini \ zeromq RUN apk add --no-cache --virtual .fetch-deps \ curl \ tar RUN curl -s https://www-us.apache.org/dist/kafka/$KAFKA_VERSION/kafka_$SCALA_VERSION-$KAFKA_VERSION.tgz | tar -xz -C $KAFKA_INSTALL && \ cd $KAFKA_INSTALL && ln -s kafka_$SCALA_VERSION-$KAFKA_VERSION kafka RUN apk del .fetch-deps # COPY start-master.sh /usr/local/bin/start-master.sh # COPY start-worker.sh /usr/local/bin/start-worker.sh ENV KAFKA_HOME="$KAFKA_INSTALL/kafka" # SPARK_LOCAL_IP="127.0.0.1" \ # SPARK_MASTER_PORT="7077" \ # SPARK_MASTER_WEBUI_PORT="8080" \ # PYSPARK_PYTHON="/usr/local/bin/python3" COPY start-kafka.sh /usr/local/bin/ COPY server.properties $KAFKA_HOME/config/ # Spark doesn't seem to respond directly to SIGTERM as the exit status is # for SIGKILL (137), after a pause. Presumably docker-compose down times out. # Using tini gives immediate exit with status 143 (SIGTERM). ENTRYPOINT ["/sbin/tini", "--"] CMD ["/usr/local/bin/start-kafka.sh"]
Ignore Space
Show notes
View
spark/Dockerfile
FROM python:3.6-alpine ENV SPARK_VERSION="2.4.3" \ HADOOP_VERSION="2.7" \ SPARK_INSTALL="/usr/local" RUN apk add --no-cache \ bash \ openjdk8 \ tini \ zeromq RUN apk add --no-cache --virtual .fetch-deps \ curl \ tar RUN curl -s https://www-us.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop$HADOOP_VERSION.tgz | tar -xz -C $SPARK_INSTALL && \ cd $SPARK_INSTALL && ln -s spark-$SPARK_VERSION-bin-hadoop$HADOOP_VERSION spark RUN apk del .fetch-deps COPY start-master.sh start-worker.sh /usr/local/bin/ ENV SPARK_HOME="$SPARK_INSTALL/spark" \ SPARK_HOSTNAME="localhost" \ SPARK_MASTER_PORT="7077" \ SPARK_MASTER_WEBUI_PORT="8080" COPY spark-defaults.conf $SPARK_HOME/conf ENV SPARK_MASTER="spark://$SPARK_HOSTNAME:$SPARK_MASTER_PORT" # Spark doesn't seem to respond directly to SIGTERM as the exit status is # for SIGKILL (137), after a pause. Presumably docker-compose down times out. # Using tini gives immediate exit with status 143 (SIGTERM). ENTRYPOINT ["/sbin/tini", "--"] CMD ["/usr/local/bin/start-master.sh"]
FROM python:3.6-alpine ENV SPARK_VERSION="2.4.3" \ HADOOP_VERSION="2.7" \ SPARK_INSTALL="/usr/local" RUN apk add --no-cache \ bash \ openjdk8 \ tini \ zeromq RUN apk add --no-cache --virtual .fetch-deps \ curl \ tar RUN curl -s https://www-us.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop$HADOOP_VERSION.tgz | tar -xz -C $SPARK_INSTALL && \ cd $SPARK_INSTALL && ln -s spark-$SPARK_VERSION-bin-hadoop$HADOOP_VERSION spark RUN apk del .fetch-deps COPY start-master.sh start-worker.sh /usr/local/bin/ ENV SPARK_HOME="$SPARK_INSTALL/spark" \ SPARK_LOCAL_IP="localhost" \ SPARK_MASTER_PORT="7077" \ SPARK_MASTER_WEBUI_PORT="8080" COPY spark-defaults.conf $SPARK_HOME/conf ENV SPARK_MASTER="spark://$SPARK_LOCAL_IP:$SPARK_MASTER_PORT" # Spark doesn't seem to respond directly to SIGTERM as the exit status is # for SIGKILL (137), after a pause. Presumably docker-compose down times out. # Using tini gives immediate exit with status 143 (SIGTERM). ENTRYPOINT ["/sbin/tini", "--"] CMD ["/usr/local/bin/start-master.sh"]
Show line notes below