From 7b73229d5ac05e68eb932336cf0966f6fe807aa0 Mon Sep 17 00:00:00 2001 From: Mark Veidemanis Date: Tue, 4 Oct 2022 20:17:04 +0100 Subject: [PATCH] Add Apache Superset and fix Druid resource usage --- docker-compose.yml | 137 +++++++++++++++++++------ docker/.env-non-dev | 46 +++++++++ docker/docker-compose.prod.yml | 120 +++++++++++++++++----- docker/superset/docker-bootstrap.sh | 50 +++++++++ docker/superset/docker-init.sh | 78 ++++++++++++++ docker/superset/requirements-local.txt | 1 + environment | 54 ++++++++-- 7 files changed, 418 insertions(+), 68 deletions(-) create mode 100644 docker/.env-non-dev create mode 100755 docker/superset/docker-bootstrap.sh create mode 100755 docker/superset/docker-init.sh create mode 100644 docker/superset/requirements-local.txt diff --git a/docker-compose.yml b/docker-compose.yml index f68b2c3..621b96d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,13 +1,13 @@ version: "2.2" -volumes: - metadata_data: {} - middle_var: {} - historical_var: {} - broker_var: {} - coordinator_var: {} - router_var: {} - druid_shared: {} +x-superset-image: &superset-image apache/superset:${TAG:-latest-dev} +x-superset-depends-on: &superset-depends-on + - db + - redis_superset +x-superset-volumes: &superset-volumes + # /app/pythonpath_docker will be appended to the PYTHONPATH in the final container + - ./docker/superset:/app/docker + - superset_home:/app/superset_home services: app: @@ -66,35 +66,89 @@ services: # DEV: 1 # restart: always + # turnilo: + # container_name: turnilo + # image: uchhatre/turnilo:latest + # ports: + # - 9093:9090 + # environment: + # - DRUID_BROKER_URL=http://broker:8082 + # - CONFIG_FILE=/config.yaml + # volumes: + # - ${PORTAINER_GIT_DIR}/docker/turnilo.yaml:/config.yaml + # depends_on: + # - broker - turnilo: - container_name: turnilo - image: uchhatre/turnilo:latest - ports: - - 9093:9090 - environment: - - DRUID_BROKER_URL=http://broker:8082 - - CONFIG_FILE=/config.yaml + # metabase: + # container_name: metabase + # image: metabase/metabase:latest + # ports: + # - 3096:3000 + # environment: + # JAVA_OPTS: -Xmx1g + # MB_DB_TYPE: postgres + # MB_DB_DBNAME: metabase + # MB_DB_PORT: 5432 + # MB_DB_USER: druid + # MB_DB_PASS: FoolishPassword + # MB_DB_HOST: postgres + # depends_on: + # - broker + + redis_superset: + image: redis:latest + container_name: superset_cache + restart: unless-stopped volumes: - - ${PORTAINER_GIT_DIR}/docker/turnilo.yaml:/config.yaml - depends_on: - - broker + - redis:/data + db: + env_file: docker/.env-non-dev + image: postgres:10 + container_name: superset_db + restart: unless-stopped + volumes: + - db_home:/var/lib/postgresql/data - metabase: - container_name: metabase - image: metabase/metabase:latest + superset: + env_file: docker/.env-non-dev + image: *superset-image + container_name: superset_app + command: ["/app/docker/docker-bootstrap.sh", "app-gunicorn"] + user: "root" + restart: unless-stopped ports: - - 3096:3000 - environment: - JAVA_OPTS: -Xmx1g - MB_DB_TYPE: postgres - MB_DB_DBNAME: metabase - MB_DB_PORT: 5432 - MB_DB_USER: druid - MB_DB_PASS: FoolishPassword - MB_DB_HOST: postgres - depends_on: - - broker + - 8088:8088 + depends_on: *superset-depends-on + volumes: *superset-volumes + + superset-init: + image: *superset-image + container_name: superset_init + command: ["/app/docker/docker-init.sh"] + env_file: docker/.env-non-dev + depends_on: *superset-depends-on + user: "root" + volumes: *superset-volumes + + superset-worker: + image: *superset-image + container_name: superset_worker + command: ["/app/docker/docker-bootstrap.sh", "worker"] + env_file: docker/.env-non-dev + restart: unless-stopped + depends_on: *superset-depends-on + user: "root" + volumes: *superset-volumes + + superset-worker-beat: + image: *superset-image + container_name: superset_worker_beat + command: ["/app/docker/docker-bootstrap.sh", "beat"] + env_file: docker/.env-non-dev + restart: unless-stopped + depends_on: *superset-depends-on + user: "root" + volumes: *superset-volumes postgres: container_name: postgres @@ -277,4 +331,19 @@ services: networks: default: external: - name: pathogen \ No newline at end of file + name: pathogen + +volumes: + superset_home: + external: false + db_home: + external: false + redis: + external: false + metadata_data: {} + middle_var: {} + historical_var: {} + broker_var: {} + coordinator_var: {} + router_var: {} + druid_shared: {} \ No newline at end of file diff --git a/docker/.env-non-dev b/docker/.env-non-dev new file mode 100644 index 0000000..1cb5d30 --- /dev/null +++ b/docker/.env-non-dev @@ -0,0 +1,46 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +COMPOSE_PROJECT_NAME=superset + +# database configurations (do not modify) +DATABASE_DB=superset +DATABASE_HOST=db +DATABASE_PASSWORD=superset +DATABASE_USER=superset + +# database engine specific environment variables +# change the below if you prefers another database engine +DATABASE_PORT=5432 +DATABASE_DIALECT=postgresql +POSTGRES_DB=superset +POSTGRES_USER=superset +POSTGRES_PASSWORD=superset +#MYSQL_DATABASE=superset +#MYSQL_USER=superset +#MYSQL_PASSWORD=superset +#MYSQL_RANDOM_ROOT_PASSWORD=yes + +# Add the mapped in /app/pythonpath_docker which allows devs to override stuff +PYTHONPATH=/app/pythonpath:/app/docker/pythonpath_dev +REDIS_HOST=redis +REDIS_PORT=6379 + +FLASK_ENV=production +SUPERSET_ENV=production +SUPERSET_LOAD_EXAMPLES=yes +CYPRESS_CONFIG=false +SUPERSET_PORT=8088 diff --git a/docker/docker-compose.prod.yml b/docker/docker-compose.prod.yml index 59c57e8..4b0d476 100644 --- a/docker/docker-compose.prod.yml +++ b/docker/docker-compose.prod.yml @@ -9,6 +9,15 @@ version: "2.2" # router_var: {} # druid_shared: {} +x-superset-image: &superset-image apache/superset:${TAG:-latest-dev} +x-superset-depends-on: &superset-depends-on + - db + - redis_superset +x-superset-volumes: &superset-volumes + # /app/pythonpath_docker will be appended to the PYTHONPATH in the final container + - ./docker/superset:/app/docker + - superset_home:/app/superset_home + services: app: image: pathogen/monolith:latest @@ -64,31 +73,86 @@ services: # restart: always - turnilo: - container_name: turnilo - image: uchhatre/turnilo:latest - ports: - - 9093:9090 - environment: - - DRUID_BROKER_URL=http://broker:8082 - depends_on: - - broker + # turnilo: + # container_name: turnilo + # image: uchhatre/turnilo:latest + # ports: + # - 9093:9090 + # environment: + # - DRUID_BROKER_URL=http://broker:8082 + # depends_on: + # - broker - metabase: - container_name: metabase - image: metabase/metabase:latest + # metabase: + # container_name: metabase + # image: metabase/metabase:latest + # ports: + # - 3096:3000 + # environment: + # JAVA_OPTS: -Xmx1g + # MB_DB_TYPE: postgres + # MB_DB_DBNAME: metabase + # MB_DB_PORT: 5432 + # MB_DB_USER: druid + # MB_DB_PASS: FoolishPassword + # MB_DB_HOST: postgres + # depends_on: + # - broker + + redis_superset: + image: redis:latest + container_name: superset_cache + restart: unless-stopped + volumes: + - redis:/data + db: + env_file: docker/.env-non-dev + image: postgres:10 + container_name: superset_db + restart: unless-stopped + volumes: + - db_home:/var/lib/postgresql/data + + superset: + env_file: docker/.env-non-dev + image: *superset-image + container_name: superset_app + command: ["/app/docker/docker-bootstrap.sh", "app-gunicorn"] + user: "root" + restart: unless-stopped ports: - - 3096:3000 - environment: - JAVA_OPTS: -Xmx1g - MB_DB_TYPE: postgres - MB_DB_DBNAME: metabase - MB_DB_PORT: 5432 - MB_DB_USER: druid - MB_DB_PASS: FoolishPassword - MB_DB_HOST: postgres - depends_on: - - broker + - 8088:8088 + depends_on: *superset-depends-on + volumes: *superset-volumes + + superset-init: + image: *superset-image + container_name: superset_init + command: ["/app/docker/docker-init.sh"] + env_file: docker/.env-non-dev + depends_on: *superset-depends-on + user: "root" + volumes: *superset-volumes + + superset-worker: + image: *superset-image + container_name: superset_worker + command: ["/app/docker/docker-bootstrap.sh", "worker"] + env_file: docker/.env-non-dev + restart: unless-stopped + depends_on: *superset-depends-on + user: "root" + volumes: *superset-volumes + + superset-worker-beat: + image: *superset-image + container_name: superset_worker_beat + command: ["/app/docker/docker-bootstrap.sh", "beat"] + env_file: docker/.env-non-dev + restart: unless-stopped + depends_on: *superset-depends-on + user: "root" + volumes: *superset-volumes postgres: container_name: postgres @@ -271,4 +335,12 @@ services: networks: default: external: - name: pathogen \ No newline at end of file + name: pathogen + +volumes: + superset_home: + external: false + db_home: + external: false + redis: + external: false \ No newline at end of file diff --git a/docker/superset/docker-bootstrap.sh b/docker/superset/docker-bootstrap.sh new file mode 100755 index 0000000..150f351 --- /dev/null +++ b/docker/superset/docker-bootstrap.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -eo pipefail + +REQUIREMENTS_LOCAL="/app/docker/requirements-local.txt" +# If Cypress run – overwrite the password for admin and export env variables +if [ "$CYPRESS_CONFIG" == "true" ]; then + export SUPERSET_CONFIG=tests.integration_tests.superset_test_config + export SUPERSET_TESTENV=true + export SUPERSET__SQLALCHEMY_DATABASE_URI=postgresql+psycopg2://superset:superset@db:5432/superset +fi +# +# Make sure we have dev requirements installed +# +if [ -f "${REQUIREMENTS_LOCAL}" ]; then + echo "Installing local overrides at ${REQUIREMENTS_LOCAL}" + pip install -r "${REQUIREMENTS_LOCAL}" +else + echo "Skipping local overrides" +fi + +if [[ "${1}" == "worker" ]]; then + echo "Starting Celery worker..." + celery --app=superset.tasks.celery_app:app worker -Ofair -l INFO +elif [[ "${1}" == "beat" ]]; then + echo "Starting Celery beat..." + celery --app=superset.tasks.celery_app:app beat --pidfile /tmp/celerybeat.pid -l INFO -s "${SUPERSET_HOME}"/celerybeat-schedule +elif [[ "${1}" == "app" ]]; then + echo "Starting web app..." + flask run -p 8088 --with-threads --reload --debugger --host=0.0.0.0 +elif [[ "${1}" == "app-gunicorn" ]]; then + echo "Starting web app..." + /usr/bin/run-server.sh +fi diff --git a/docker/superset/docker-init.sh b/docker/superset/docker-init.sh new file mode 100755 index 0000000..c98f498 --- /dev/null +++ b/docker/superset/docker-init.sh @@ -0,0 +1,78 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +set -e + +# +# Always install local overrides first +# +/app/docker/docker-bootstrap.sh + +STEP_CNT=4 + +echo_step() { +cat < +# Java tuning +#DRUID_XMX=1g +#DRUID_XMS=1g +#DRUID_MAXNEWSIZE=250m +#DRUID_NEWSIZE=250m +#DRUID_MAXDIRECTMEMORYSIZE=6172m +DRUID_SINGLE_NODE_CONF=micro-quickstart druid_emitter_logging_logLevel=debug @@ -39,15 +74,14 @@ druid_metadata_storage_connector_password=FoolishPassword druid_coordinator_balancer_strategy=cachingCost druid_indexer_runner_javaOptsArray=["-server", "-Xmx1g", "-Xms1g", "-XX:MaxDirectMemorySize=3g", "-Duser.timezone=UTC", "-Dfile.encoding=UTF-8", "-Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager"] -druid_indexer_fork_property_druid_processing_buffer_sizeBytes=128MiB -druid_processing_buffer_sizeBytes=134217728 # 128MiB +druid_indexer_fork_property_druid_processing_buffer_sizeBytes=256MiB druid_storage_type=local druid_storage_storageDirectory=/opt/shared/segments druid_indexer_logs_type=file druid_indexer_logs_directory=/opt/shared/indexing-logs -druid_processing_numThreads=1 -druid_processing_numMergeBuffers=1 +druid_processing_numThreads=2 +druid_processing_numMergeBuffers=2 -DRUID_LOG4J= +DRUID_LOG4J= \ No newline at end of file