diff --git a/pyspark/Dockerfile b/pyspark/Dockerfile index a496741..071bf2a 100644 --- a/pyspark/Dockerfile +++ b/pyspark/Dockerfile @@ -35,17 +35,12 @@ PYSPARK_SUBMIT_ARGS="--master $SPARK_MASTER pyspark-shell" \ PYTHONSTARTUP="$SPARK_HOME/python/pyspark/shell.py" - -# RUN mkdir -p $KERNELS_DIR_PATH/$KERNEL_NAME \ -# && pip install pyhocon \ -# && cat $KERNELS_TEMPLATE_PATH/kernel.json | pyhocon -f json >> $KERNELS_DIR_PATH/$KERNEL_NAME/kernel.json \ -# && pip uninstall -y pyhocon pyparsing - # ENV SPARKMONITOR_UI_PORT 8080 USER $NB_UID RUN ipython kernel install --user --name $KERNEL_NAME +# kernel.json is based on the template from . COPY kernel.json pyspark-kernel.sh $KERNELS_DIR_PATH/$KERNEL_NAME/ RUN jupyter nbextension install sparkmonitor --py --user --symlink \ @@ -57,9 +52,8 @@ RUN mkdir -p /home/$NB_USER/work WORKDIR /home/$NB_USER/work -# CMD ["pyspark"] -# PySpark doesn't seem to load the sparkmonitor extension, so let's just -# go with Jupyter and manually create contexts and sessions as required. +# pyspark-kernel.sh will automatically set up the PySpark context when +# the kernel is loaded. CMD ["jupyter", "notebook", "--ip=0.0.0.0", "--port=8888", "--no-browser"] # debugging