diff --git a/pyspark/Dockerfile b/pyspark/Dockerfile index 5827bb7..3ad2140 100644 --- a/pyspark/Dockerfile +++ b/pyspark/Dockerfile @@ -1,13 +1,15 @@ # Don't use the official jupyter/pyspark-notebook image because it's massive! FROM nstanger/spark -ARG NB_USER="pyspark" -ARG NB_UID="1000" -ARG NB_GID="1000" +# Ideally we should run as a non-root user, but it's problematic to set +# up shared files, especially if running Docker within, say, VirtualBox. +# ARG NB_USER="pyspark" +# ARG NB_UID="1000" +# ARG NB_GID="1000" USER root -RUN adduser -D -u $NB_UID $NB_USER +# RUN adduser -D -G root $NB_USER RUN apk add --no-cache --virtual .build-deps \ build-base \ @@ -41,26 +43,26 @@ # ENV SPARKMONITOR_UI_PORT 8080 -USER $NB_UID +# USER $NB_USER -RUN ipython kernel install --user --name $KERNEL_NAME +RUN ipython kernel install --name $KERNEL_NAME # kernel.json is based on the template from . COPY kernel.json pyspark-kernel.sh $KERNELS_DIR_PATH/$KERNEL_NAME/ -RUN jupyter nbextension install sparkmonitor --py --user --symlink \ - && jupyter nbextension enable sparkmonitor --py --user \ - && jupyter serverextension enable --py --user sparkmonitor \ +RUN jupyter nbextension install sparkmonitor --py --symlink \ + && jupyter nbextension enable sparkmonitor --py \ + && jupyter serverextension enable --py sparkmonitor \ && ipython profile create \ && echo "c.InteractiveShellApp.extensions.append('sparkmonitor.kernelextension')" >> $(ipython profile locate default)/ipython_kernel_config.py -RUN mkdir -p /home/$NB_USER/work -WORKDIR /home/$NB_USER/work +# RUN mkdir -p /home/$NB_USER/work +# WORKDIR /home/$NB_USER/work ENTRYPOINT ["/sbin/tini", "--"] # pyspark-kernel.sh will automatically set up the PySpark context when # the kernel is loaded. -CMD ["jupyter", "notebook", "--ip=0.0.0.0", "--port=8888"] +CMD ["jupyter", "notebook", "--ip=0.0.0.0", "--port=8888", "--allow-root"] # debugging # CMD ["bash"]