GitBucket
4.21.2
Toggle navigation
Snippets
Sign in
Files
Branches
2
Releases
Issues
3
Pull requests
Labels
Priorities
Milestones
Wiki
Forks
nigel.stanger
/
docker-analytics
Browse code
Increased spark worker memory
(to no useful effect 😖)
master
spark3
1 parent
3bb4315
commit
ff62864a587c16622e06ab6485995e87e9e5ab0f
Nigel Stanger
authored
on 15 May 2019
Patch
Showing
2 changed files
docker-compose.yml
spark/spark-defaults.conf
Ignore Space
Show notes
View
docker-compose.yml
version: "3.3" services: spark-master: image: analytics/spark:latest container_name: spark-master hostname: spark-master ports: - "8080:8080" - "7077:7077" networks: - spark-network environment: - "SPARK_HOSTNAME=spark-master" - "SPARK_MASTER=spark://spark-master:7077" - "SPARK_WORKER_MEMORY=2g" command: "start-master.sh" volumes: - ${HOME}/tmp/sparkdata:/mnt/sparkdata spark-worker: image: analytics/spark:latest depends_on: - spark-master ports: - 8080 networks: - spark-network environment: - "SPARK_MASTER=spark://spark-master:7077" - "SPARK_WORKER_WEBUI_PORT=8080" - "SPARKMONITOR_UI_PORT=8080" - "SPARK_WORKER_MEMORY=2g" command: "start-worker.sh" volumes: - ${HOME}/tmp/sparkdata:/mnt/sparkdata pyspark: image: analytics/pyspark:latest depends_on: - spark-master ports: - "8888:8888" networks: - spark-network environment: - "SPARK_MASTER=spark://spark-master:7077" - "SPARK_MASTER_WEBUI_PORT=8080" - "PYSPARK_SUBMIT_ARGS=--master spark://spark-master:7077 --packages org.apache.spark:spark-sql-kafka-0-10_2.11:2.4.3 pyspark-shell" working_dir: /home/pyspark/work volumes: - ${HOME}/tmp/sparkdata:/mnt/sparkdata kafka: image: analytics/kafka:latest hostname: kafka ports: - 9092 networks: - spark-network volumes: - ${HOME}/tmp/sparkdata:/mnt/sparkdata networks: spark-network: driver: bridge ipam: driver: default
version: "3.3" services: spark-master: image: analytics/spark:latest container_name: spark-master hostname: spark-master ports: - "8080:8080" - "7077:7077" networks: - spark-network environment: - "SPARK_HOSTNAME=spark-master" - "SPARK_MASTER=spark://spark-master:7077" command: "start-master.sh" volumes: - ${HOME}/tmp/sparkdata:/mnt/sparkdata spark-worker: image: analytics/spark:latest depends_on: - spark-master ports: - 8080 networks: - spark-network environment: - "SPARK_MASTER=spark://spark-master:7077" - "SPARK_WORKER_WEBUI_PORT=8080" - "SPARKMONITOR_UI_PORT=8080" command: "start-worker.sh" volumes: - ${HOME}/tmp/sparkdata:/mnt/sparkdata pyspark: image: analytics/pyspark:latest depends_on: - spark-master ports: - "8888:8888" networks: - spark-network environment: - "SPARK_MASTER=spark://spark-master:7077" - "SPARK_MASTER_WEBUI_PORT=8080" - "PYSPARK_SUBMIT_ARGS=--master spark://spark-master:7077 --packages org.apache.spark:spark-sql-kafka-0-10_2.11:2.4.3 pyspark-shell" working_dir: /home/pyspark/work volumes: - ${HOME}/tmp/sparkdata:/mnt/sparkdata kafka: image: analytics/kafka:latest hostname: kafka ports: - 9092 networks: - spark-network volumes: - ${HOME}/tmp/sparkdata:/mnt/sparkdata networks: spark-network: driver: bridge ipam: driver: default
Ignore Space
Show notes
View
spark/spark-defaults.conf
# # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # Default system properties included when running spark-submit. # This is useful for setting default environmental settings. # Example: # spark.master spark://master:7077 # spark.eventLog.enabled true # spark.eventLog.dir hdfs://namenode:8021/directory # spark.serializer org.apache.spark.serializer.KryoSerializer # spark.driver.memory 5g # spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three" spark.deploy.defaultCores 2 spark.cores.max 2 # spark.executor.memory 2g # spark.driver.memory 4g
# # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # Default system properties included when running spark-submit. # This is useful for setting default environmental settings. # Example: # spark.master spark://master:7077 # spark.eventLog.enabled true # spark.eventLog.dir hdfs://namenode:8021/directory # spark.serializer org.apache.spark.serializer.KryoSerializer # spark.driver.memory 5g # spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three" spark.deploy.defaultCores 2 spark.cores.max 2
Show line notes below