diff --git a/pyspark/Dockerfile b/pyspark/Dockerfile index b328fc9..6d9d214 100644 --- a/pyspark/Dockerfile +++ b/pyspark/Dockerfile @@ -1,7 +1,6 @@ # Don't use the official jupyter/pyspark-notebook image because it's massive! FROM analytics/spark - ARG NB_USER="pyspark" ARG NB_UID="1000" ARG NB_GID="1000" @@ -15,9 +14,14 @@ zeromq-dev \ python3-dev \ py3-zmq \ - && pip install --upgrade pip \ - # Tornado 6 breaks sparkmonitor - && pip install tornado==5.1 pyspark jupyter sparkmonitor \ + && pip install --upgrade \ + pip \ + && pip install \ + # Tornado 6 breaks sparkmonitor + tornado==5.1 \ + pyspark \ + jupyter \ + sparkmonitor \ && apk del .build-deps ENV PYTHONPATH $SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.7-src.zip @@ -33,8 +37,7 @@ && ipython profile create \ && echo "c.InteractiveShellApp.extensions.append('sparkmonitor.kernelextension')" >> $(ipython profile locate default)/ipython_kernel_config.py -# WORKDIR /home/$NB_USER/work -WORKDIR /mnt/sparkdata +WORKDIR /home/$NB_USER/work # CMD ["pyspark"] # PySpark doesn't seem to load the sparkmonitor extension, so let's just