version: "3.3" services: spark-master: image: "nstanger/spark:latest" container_name: spark-master hostname: spark-master ports: - "8080:8080" - "7077:7077" networks: - spark-network environment: - "SPARK_HOSTNAME=spark-master" - "SPARK_MASTER=spark://spark-master:7077" - "SPARK_WORKER_MEMORY=2g" command: "start-master.sh" volumes: - ${HOME}/tmp/sparkdata:/mnt/sparkdata spark-worker: image: nstanger/spark:latest depends_on: - spark-master ports: - 8080 networks: - spark-network environment: - "SPARK_MASTER=spark://spark-master:7077" - "SPARK_WORKER_WEBUI_PORT=8080" - "SPARKMONITOR_UI_PORT=8080" - "SPARK_WORKER_MEMORY=2g" command: "start-worker.sh" volumes: - ${HOME}/tmp/sparkdata:/mnt/sparkdata pyspark: image: nstanger/pyspark:latest depends_on: - spark-master ports: - "8888:8888" networks: - spark-network environment: - "SPARK_MASTER=spark://spark-master:7077" - "SPARK_MASTER_WEBUI_PORT=8080" - "PYSPARK_SUBMIT_ARGS=--master spark://spark-master:7077 --packages org.apache.spark:spark-sql-kafka-0-10_2.11:2.4.3,graphframes:graphframes:0.8.1-spark2.4-s_2.11 pyspark-shell" working_dir: /home/pyspark/work volumes: - ${HOME}/tmp/sparkdata:/mnt/sparkdata zookeeper: image: "bitnami/zookeeper:3" hostname: zookeeper ports: - "2181:2181" networks: - spark-network volumes: - "zookeeper_data:/bitnami" environment: - ALLOW_ANONYMOUS_LOGIN=yes kafka: image: "bitnami/kafka:2" hostname: kafka ports: - "9092:9092" networks: - spark-network environment: - "ALLOW_PLAINTEXT_LISTENER=yes" - "KAFKA_LISTENERS=PLAINTEXT://0.0.0.0:9092" - "KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://localhost:9092" - "KAFKA_CFG_ZOOKEEPER_CONNECT=zookeeper:2181" volumes: - ${HOME}/tmp/sparkdata:/mnt/sparkdata - 'kafka_data:/bitnami' depends_on: - zookeeper volumes: zookeeper_data: driver: local kafka_data: driver: local networks: spark-network: driver: bridge ipam: driver: default