diff --git a/spark-pyspark-kafka/Dockerfile b/spark-pyspark-kafka/Dockerfile index 1ee2e73..1091294 100644 --- a/spark-pyspark-kafka/Dockerfile +++ b/spark-pyspark-kafka/Dockerfile @@ -18,6 +18,13 @@ # we don’t have time to test that at the moment. Which is why this is still # using Python 3.6: if it ain't broke... +# Ideally we should run as a non-root user, but it's problematic to set +# up shared files, especially if running Docker within, say, VirtualBox. +# See https://vsupalov.com/docker-shared-permissions/ +# ARG NB_USER="pyspark" +# ARG NB_UID="1000" +# ARG NB_GID="1000" + ENV \ # download mirrors SPARK_MIRROR="https://dlcdn.apache.org" \