GitBucket
4.21.2
Toggle navigation
Snippets
Sign in
Files
Branches
2
Releases
Issues
3
Pull requests
Labels
Priorities
Milestones
Wiki
Forks
nigel.stanger
/
docker-analytics
Browse code
Constructed new, more portable spark Dockerfile
master
spark3
1 parent
ee4600b
commit
557586ca0904842dc7ecfb2fc1ef79f09d741156
Nigel Stanger
authored
on 13 May 2019
Patch
Showing
3 changed files
Makefile
docker-compose.yml
spark/Dockerfile
Ignore Space
Show notes
View
Makefile
all: spark pyspark spark: spark/Dockerfile docker build -t analytics/spark:latest -f $< $@ pyspark: pyspark/Dockerfile docker build -t analytics/pyspark:latest -f $< $@
all: spark pyspark spark: spark/Dockerfile docker build -t analytics/spark:latest -f $< . pyspark: pyspark/Dockerfile docker build -t analytics/pyspark:latest -f $< .
Ignore Space
Show notes
View
docker-compose.yml
version: "3.3" services: spark-master: image: analytics/spark:latest container_name: spark-master hostname: spark-master ports: - "8080:8080" - "7077:7077" networks: - spark-network environment: - "SPARK_LOCAL_IP=spark-master" - "SPARK_MASTER_PORT=7077" - "SPARK_MASTER_WEBUI_PORT=8080" - "PYSPARK_PYTHON=/usr/local/bin/python3" command: "start-master.sh" volumes: - ${HOME}/tmp/sparkdata:/mnt/sparkdata spark-worker: image: analytics/spark:latest depends_on: - spark-master ports: - 8080 networks: - spark-network environment: - "SPARK_MASTER=spark://spark-master:7077" - "SPARK_WORKER_WEBUI_PORT=8080" - "SPARKMONITOR_UI_PORT=8080" - "PYSPARK_PYTHON=/usr/local/bin/python3" command: "start-worker.sh" volumes: - ${HOME}/tmp/sparkdata:/mnt/sparkdata pyspark: image: comp101/pyspark:latest ports: - "8888:8888" networks: - spark-network environment: - "PYSPARK_PYTHON=/usr/local/bin/python3" working_dir: /mnt/sparkdata volumes: - ${HOME}/tmp/sparkdata:/mnt/sparkdata networks: spark-network: driver: bridge ipam: driver: default
version: "3.3" services: spark-master: image: comp101/spark:latest container_name: spark-master hostname: spark-master ports: - "8080:8080" - "7077:7077" networks: - spark-network environment: - "SPARK_LOCAL_IP=spark-master" - "SPARK_MASTER_PORT=7077" - "SPARK_MASTER_WEBUI_PORT=8080" - "PYSPARK_PYTHON=/usr/local/bin/python3" command: "start-master.sh" volumes: - ${HOME}/tmp/sparkdata:/mnt/sparkdata spark-worker: image: comp101/spark:latest depends_on: - spark-master ports: - 8080 networks: - spark-network environment: - "SPARK_MASTER=spark://spark-master:7077" - "SPARK_WORKER_WEBUI_PORT=8080" - "SPARKMONITOR_UI_PORT=8080" - "PYSPARK_PYTHON=/usr/local/bin/python3" command: "start-worker.sh" volumes: - ${HOME}/tmp/sparkdata:/mnt/sparkdata pyspark: image: comp101/pyspark:latest ports: - "8888:8888" networks: - spark-network environment: - "PYSPARK_PYTHON=/usr/local/bin/python3" working_dir: /mnt/sparkdata volumes: - ${HOME}/tmp/sparkdata:/mnt/sparkdata networks: spark-network: driver: bridge ipam: driver: default
Ignore Space
Show notes
View
spark/Dockerfile
FROM python:3.6-alpine3.9 ENV SPARK_VERSION 2.4.3 ENV SPARK_INSTALL /usr/local ENV HADOOP_VERSION 2.7 RUN apk add --no-cache \ bash \ openjdk8 \ tini \ zeromq RUN apk add --no-cache --virtual .fetch-deps \ curl \ tar RUN curl -s https://www-us.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop$HADOOP_VERSION.tgz | tar -xz -C $SPARK_INSTALL && \ cd $SPARK_INSTALL && ln -s spark-$SPARK_VERSION-bin-hadoop$HADOOP_VERSION spark RUN apk del .fetch-deps COPY start-master.sh /usr/local/bin/start-master.sh COPY start-worker.sh /usr/local/bin/start-worker.sh ENV SPARK_HOME $SPARK_INSTALL/spark ENV SPARK_LOCAL_IP 127.0.0.1 ENV SPARK_MASTER_PORT 7077 ENV SPARK_MASTER_WEBUI_PORT 8080 ENV PYSPARK_PYTHON /usr/local/bin/python3 # Spark doesn't seem to respond directly to SIGTERM as the exit status is # for SIGKILL (137), after a pause. Presumably docker-compose down times out. # Using tini gives immediate exit with status 143 (SIGTERM). ENTRYPOINT ["/sbin/tini", "--"] CMD ["/usr/local/bin/start-master.sh"]
FROM python:3.6-alpine3.9 COPY spark-2.4.2-bin-hadoop2.7.tgz /usr/local/spark-2.4.2-bin-hadoop2.7.tgz RUN apk add --no-cache \ bash \ openjdk8 \ zeromq \ tini \ && apk add --no-cache --virtual .fetch-deps \ tar \ && cd /usr/local \ && tar -xzf spark-2.4.2-bin-hadoop2.7.tgz \ && mv spark-2.4.2-bin-hadoop2.7 spark \ && rm spark-2.4.2-bin-hadoop2.7.tgz \ && apk del .fetch-deps COPY start-master.sh /usr/local/bin/start-master.sh COPY start-worker.sh /usr/local/bin/start-worker.sh ENV SPARK_HOME /usr/local/spark # Spark doesn't seem to respond directly to SIGTERM as the exit status is # for SIGKILL (137), after a pause. Presumably docker-compose down times out. # Using tini gives immediate exit with status 143 (SIGTERM). ENTRYPOINT ["/sbin/tini", "--"] CMD ["bash"]
Show line notes below