Newer
Older
docker-analytics / docker-compose.yml
version: "3.3"
services:
  spark-master:
    image: analytics/spark:latest
    container_name: spark-master
    hostname: spark-master
    ports:
      - "8080:8080"
      - "7077:7077"
    networks:
      - spark-network
    environment:
      - "SPARK_HOSTNAME=spark-master"
      - "SPARK_MASTER=spark://spark-master:7077"
      - "SPARK_WORKER_MEMORY=2g"
    command: "start-master.sh"
    volumes:
      - ${HOME}/tmp/sparkdata:/mnt/sparkdata
  spark-worker:
    image: analytics/spark:latest
    depends_on:
      - spark-master
    ports:
      - 8080
    networks:
      - spark-network
    environment:
      - "SPARK_MASTER=spark://spark-master:7077"
      - "SPARK_WORKER_WEBUI_PORT=8080"
      - "SPARKMONITOR_UI_PORT=8080"
      - "SPARK_WORKER_MEMORY=2g"
    command: "start-worker.sh"
    volumes:
      - ${HOME}/tmp/sparkdata:/mnt/sparkdata
  pyspark:
    image: analytics/pyspark:latest
    depends_on:
      - spark-master
    ports:
      - "8888:8888"
    networks:
      - spark-network
    environment:
      - "SPARK_MASTER=spark://spark-master:7077"
      - "SPARK_MASTER_WEBUI_PORT=8080"
      - "PYSPARK_SUBMIT_ARGS=--master spark://spark-master:7077 --packages org.apache.spark:spark-sql-kafka-0-10_2.11:2.4.3 pyspark-shell"
    working_dir: /home/pyspark/work
    volumes:
      - ${HOME}/tmp/sparkdata:/mnt/sparkdata
  kafka:
    image: analytics/kafka:latest
    hostname: kafka
    ports:
      - 9092
    networks:
      - spark-network
    volumes:
      - ${HOME}/tmp/sparkdata:/mnt/sparkdata
networks:
  spark-network:
    driver: bridge
    ipam:
      driver: default