Newer
Older
docker-analytics / docker-compose.yml
version: "3.3"

services:
  spark-master:
    image: "nstanger/spark:latest"
    container_name: spark-master
    hostname: spark-master
    ports:
      - "8080:8080"
      - "7077:7077"
    networks:
      - spark-network
    environment:
      - "SPARK_HOSTNAME=spark-master"
      - "SPARK_MASTER=spark://spark-master:7077"
      - "SPARK_WORKER_MEMORY=2g"
    command: "start-master.sh"
    volumes:
      - ${HOME}/tmp/sparkdata:/mnt/sparkdata
  spark-worker:
    image: nstanger/spark:latest
    depends_on:
      - spark-master
    ports:
      - 8080
    networks:
      - spark-network
    environment:
      - "SPARK_MASTER=spark://spark-master:7077"
      - "SPARK_WORKER_WEBUI_PORT=8080"
      - "SPARKMONITOR_UI_PORT=8080"
      - "SPARK_WORKER_MEMORY=2g"
    command: "start-worker.sh"
    volumes:
      - ${HOME}/tmp/sparkdata:/mnt/sparkdata
  pyspark:
    image: nstanger/pyspark:latest
    depends_on:
      - spark-master
    ports:
      - "8888:8888"
    networks:
      - spark-network
    environment:
      - "SPARK_MASTER=spark://spark-master:7077"
      - "SPARK_MASTER_WEBUI_PORT=8080"
      - "PYSPARK_SUBMIT_ARGS=--master spark://spark-master:7077 --packages org.apache.spark:spark-sql-kafka-0-10_2.11:2.4.3,graphframes:graphframes:0.8.1-spark2.4-s_2.11 pyspark-shell"
    working_dir: /home/pyspark/work
    volumes:
      - ${HOME}/tmp/sparkdata:/mnt/sparkdata
  zookeeper:
    image: "bitnami/zookeeper:3"
    hostname: zookeeper
    ports:
      - "2181:2181"
    networks:
      - spark-network
    volumes:
      - "zookeeper_data:/bitnami"
    environment:
      - ALLOW_ANONYMOUS_LOGIN=yes
  kafka:
    image: "bitnami/kafka:2"
    hostname: kafka
    ports:
      - "9092:9092"
    networks:
      - spark-network
    environment:
      - "ALLOW_PLAINTEXT_LISTENER=yes"
      - "KAFKA_LISTENERS=PLAINTEXT://0.0.0.0:9092"
      - "KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://localhost:9092"
      - "KAFKA_CFG_ZOOKEEPER_CONNECT=zookeeper:2181"
    volumes:
      - ${HOME}/tmp/sparkdata:/mnt/sparkdata
      - 'kafka_data:/bitnami'
    depends_on:
      - zookeeper

volumes:
  zookeeper_data:
    driver: local
  kafka_data:
    driver: local

networks:
  spark-network:
    driver: bridge
    ipam:
      driver: default