Add Apache Superset and fix Druid resource usage

This commit is contained in:
Mark Veidemanis 2022-10-04 20:17:04 +01:00
parent 5ad6cd0354
commit 464c831686
7 changed files with 418 additions and 68 deletions

View File

@ -1,13 +1,13 @@
version: "2.2" version: "2.2"
volumes: x-superset-image: &superset-image apache/superset:${TAG:-latest-dev}
metadata_data: {} x-superset-depends-on: &superset-depends-on
middle_var: {} - db
historical_var: {} - redis_superset
broker_var: {} x-superset-volumes: &superset-volumes
coordinator_var: {} # /app/pythonpath_docker will be appended to the PYTHONPATH in the final container
router_var: {} - ./docker/superset:/app/docker
druid_shared: {} - superset_home:/app/superset_home
services: services:
app: app:
@ -66,35 +66,89 @@ services:
# DEV: 1 # DEV: 1
# restart: always # restart: always
# turnilo:
# container_name: turnilo
# image: uchhatre/turnilo:latest
# ports:
# - 9093:9090
# environment:
# - DRUID_BROKER_URL=http://broker:8082
# - CONFIG_FILE=/config.yaml
# volumes:
# - ${PORTAINER_GIT_DIR}/docker/turnilo.yaml:/config.yaml
# depends_on:
# - broker
turnilo: # metabase:
container_name: turnilo # container_name: metabase
image: uchhatre/turnilo:latest # image: metabase/metabase:latest
ports: # ports:
- 9093:9090 # - 3096:3000
environment: # environment:
- DRUID_BROKER_URL=http://broker:8082 # JAVA_OPTS: -Xmx1g
- CONFIG_FILE=/config.yaml # MB_DB_TYPE: postgres
# MB_DB_DBNAME: metabase
# MB_DB_PORT: 5432
# MB_DB_USER: druid
# MB_DB_PASS: FoolishPassword
# MB_DB_HOST: postgres
# depends_on:
# - broker
redis_superset:
image: redis:latest
container_name: superset_cache
restart: unless-stopped
volumes: volumes:
- ${PORTAINER_GIT_DIR}/docker/turnilo.yaml:/config.yaml - redis:/data
depends_on: db:
- broker env_file: docker/.env-non-dev
image: postgres:10
container_name: superset_db
restart: unless-stopped
volumes:
- db_home:/var/lib/postgresql/data
metabase: superset:
container_name: metabase env_file: docker/.env-non-dev
image: metabase/metabase:latest image: *superset-image
container_name: superset_app
command: ["/app/docker/docker-bootstrap.sh", "app-gunicorn"]
user: "root"
restart: unless-stopped
ports: ports:
- 3096:3000 - 8088:8088
environment: depends_on: *superset-depends-on
JAVA_OPTS: -Xmx1g volumes: *superset-volumes
MB_DB_TYPE: postgres
MB_DB_DBNAME: metabase superset-init:
MB_DB_PORT: 5432 image: *superset-image
MB_DB_USER: druid container_name: superset_init
MB_DB_PASS: FoolishPassword command: ["/app/docker/docker-init.sh"]
MB_DB_HOST: postgres env_file: docker/.env-non-dev
depends_on: depends_on: *superset-depends-on
- broker user: "root"
volumes: *superset-volumes
superset-worker:
image: *superset-image
container_name: superset_worker
command: ["/app/docker/docker-bootstrap.sh", "worker"]
env_file: docker/.env-non-dev
restart: unless-stopped
depends_on: *superset-depends-on
user: "root"
volumes: *superset-volumes
superset-worker-beat:
image: *superset-image
container_name: superset_worker_beat
command: ["/app/docker/docker-bootstrap.sh", "beat"]
env_file: docker/.env-non-dev
restart: unless-stopped
depends_on: *superset-depends-on
user: "root"
volumes: *superset-volumes
postgres: postgres:
container_name: postgres container_name: postgres
@ -277,4 +331,19 @@ services:
networks: networks:
default: default:
external: external:
name: pathogen name: pathogen
volumes:
superset_home:
external: false
db_home:
external: false
redis:
external: false
metadata_data: {}
middle_var: {}
historical_var: {}
broker_var: {}
coordinator_var: {}
router_var: {}
druid_shared: {}

46
docker/.env-non-dev Normal file
View File

@ -0,0 +1,46 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
COMPOSE_PROJECT_NAME=superset
# database configurations (do not modify)
DATABASE_DB=superset
DATABASE_HOST=db
DATABASE_PASSWORD=superset
DATABASE_USER=superset
# database engine specific environment variables
# change the below if you prefers another database engine
DATABASE_PORT=5432
DATABASE_DIALECT=postgresql
POSTGRES_DB=superset
POSTGRES_USER=superset
POSTGRES_PASSWORD=superset
#MYSQL_DATABASE=superset
#MYSQL_USER=superset
#MYSQL_PASSWORD=superset
#MYSQL_RANDOM_ROOT_PASSWORD=yes
# Add the mapped in /app/pythonpath_docker which allows devs to override stuff
PYTHONPATH=/app/pythonpath:/app/docker/pythonpath_dev
REDIS_HOST=redis
REDIS_PORT=6379
FLASK_ENV=production
SUPERSET_ENV=production
SUPERSET_LOAD_EXAMPLES=yes
CYPRESS_CONFIG=false
SUPERSET_PORT=8088

View File

@ -9,6 +9,15 @@ version: "2.2"
# router_var: {} # router_var: {}
# druid_shared: {} # druid_shared: {}
x-superset-image: &superset-image apache/superset:${TAG:-latest-dev}
x-superset-depends-on: &superset-depends-on
- db
- redis_superset
x-superset-volumes: &superset-volumes
# /app/pythonpath_docker will be appended to the PYTHONPATH in the final container
- ./docker/superset:/app/docker
- superset_home:/app/superset_home
services: services:
app: app:
image: pathogen/monolith:latest image: pathogen/monolith:latest
@ -64,31 +73,86 @@ services:
# restart: always # restart: always
turnilo: # turnilo:
container_name: turnilo # container_name: turnilo
image: uchhatre/turnilo:latest # image: uchhatre/turnilo:latest
ports: # ports:
- 9093:9090 # - 9093:9090
environment: # environment:
- DRUID_BROKER_URL=http://broker:8082 # - DRUID_BROKER_URL=http://broker:8082
depends_on: # depends_on:
- broker # - broker
metabase: # metabase:
container_name: metabase # container_name: metabase
image: metabase/metabase:latest # image: metabase/metabase:latest
# ports:
# - 3096:3000
# environment:
# JAVA_OPTS: -Xmx1g
# MB_DB_TYPE: postgres
# MB_DB_DBNAME: metabase
# MB_DB_PORT: 5432
# MB_DB_USER: druid
# MB_DB_PASS: FoolishPassword
# MB_DB_HOST: postgres
# depends_on:
# - broker
redis_superset:
image: redis:latest
container_name: superset_cache
restart: unless-stopped
volumes:
- redis:/data
db:
env_file: docker/.env-non-dev
image: postgres:10
container_name: superset_db
restart: unless-stopped
volumes:
- db_home:/var/lib/postgresql/data
superset:
env_file: docker/.env-non-dev
image: *superset-image
container_name: superset_app
command: ["/app/docker/docker-bootstrap.sh", "app-gunicorn"]
user: "root"
restart: unless-stopped
ports: ports:
- 3096:3000 - 8088:8088
environment: depends_on: *superset-depends-on
JAVA_OPTS: -Xmx1g volumes: *superset-volumes
MB_DB_TYPE: postgres
MB_DB_DBNAME: metabase superset-init:
MB_DB_PORT: 5432 image: *superset-image
MB_DB_USER: druid container_name: superset_init
MB_DB_PASS: FoolishPassword command: ["/app/docker/docker-init.sh"]
MB_DB_HOST: postgres env_file: docker/.env-non-dev
depends_on: depends_on: *superset-depends-on
- broker user: "root"
volumes: *superset-volumes
superset-worker:
image: *superset-image
container_name: superset_worker
command: ["/app/docker/docker-bootstrap.sh", "worker"]
env_file: docker/.env-non-dev
restart: unless-stopped
depends_on: *superset-depends-on
user: "root"
volumes: *superset-volumes
superset-worker-beat:
image: *superset-image
container_name: superset_worker_beat
command: ["/app/docker/docker-bootstrap.sh", "beat"]
env_file: docker/.env-non-dev
restart: unless-stopped
depends_on: *superset-depends-on
user: "root"
volumes: *superset-volumes
postgres: postgres:
container_name: postgres container_name: postgres
@ -271,4 +335,12 @@ services:
networks: networks:
default: default:
external: external:
name: pathogen name: pathogen
volumes:
superset_home:
external: false
db_home:
external: false
redis:
external: false

View File

@ -0,0 +1,50 @@
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
set -eo pipefail
REQUIREMENTS_LOCAL="/app/docker/requirements-local.txt"
# If Cypress run overwrite the password for admin and export env variables
if [ "$CYPRESS_CONFIG" == "true" ]; then
export SUPERSET_CONFIG=tests.integration_tests.superset_test_config
export SUPERSET_TESTENV=true
export SUPERSET__SQLALCHEMY_DATABASE_URI=postgresql+psycopg2://superset:superset@db:5432/superset
fi
#
# Make sure we have dev requirements installed
#
if [ -f "${REQUIREMENTS_LOCAL}" ]; then
echo "Installing local overrides at ${REQUIREMENTS_LOCAL}"
pip install -r "${REQUIREMENTS_LOCAL}"
else
echo "Skipping local overrides"
fi
if [[ "${1}" == "worker" ]]; then
echo "Starting Celery worker..."
celery --app=superset.tasks.celery_app:app worker -Ofair -l INFO
elif [[ "${1}" == "beat" ]]; then
echo "Starting Celery beat..."
celery --app=superset.tasks.celery_app:app beat --pidfile /tmp/celerybeat.pid -l INFO -s "${SUPERSET_HOME}"/celerybeat-schedule
elif [[ "${1}" == "app" ]]; then
echo "Starting web app..."
flask run -p 8088 --with-threads --reload --debugger --host=0.0.0.0
elif [[ "${1}" == "app-gunicorn" ]]; then
echo "Starting web app..."
/usr/bin/run-server.sh
fi

78
docker/superset/docker-init.sh Executable file
View File

@ -0,0 +1,78 @@
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
set -e
#
# Always install local overrides first
#
/app/docker/docker-bootstrap.sh
STEP_CNT=4
echo_step() {
cat <<EOF
######################################################################
Init Step ${1}/${STEP_CNT} [${2}] -- ${3}
######################################################################
EOF
}
ADMIN_PASSWORD="admin"
# If Cypress run overwrite the password for admin and export env variables
if [ "$CYPRESS_CONFIG" == "true" ]; then
ADMIN_PASSWORD="general"
export SUPERSET_CONFIG=tests.integration_tests.superset_test_config
export SUPERSET_TESTENV=true
export SUPERSET__SQLALCHEMY_DATABASE_URI=postgresql+psycopg2://superset:superset@db:5432/superset
fi
# Initialize the database
echo_step "1" "Starting" "Applying DB migrations"
superset db upgrade
echo_step "1" "Complete" "Applying DB migrations"
# Create an admin user
echo_step "2" "Starting" "Setting up admin user ( admin / $ADMIN_PASSWORD )"
superset fab create-admin \
--username admin \
--firstname Superset \
--lastname Admin \
--email admin@superset.com \
--password $ADMIN_PASSWORD
echo_step "2" "Complete" "Setting up admin user"
# Create default roles and permissions
echo_step "3" "Starting" "Setting up roles and perms"
superset init
echo_step "3" "Complete" "Setting up roles and perms"
if [ "$SUPERSET_LOAD_EXAMPLES" = "yes" ]; then
# Load some data to play with
echo_step "4" "Starting" "Loading examples"
# If Cypress run which consumes superset_test_config load required data for tests
if [ "$CYPRESS_CONFIG" == "true" ]; then
superset load_test_users
superset load_examples --load-test-data
else
superset load_examples
fi
echo_step "4" "Complete" "Loading examples"
fi

View File

@ -0,0 +1 @@
pydruid

View File

@ -18,11 +18,46 @@
# #
# Java tuning # Java tuning
DRUID_XMX=1g #DRUID_XMX=1g
DRUID_XMS=1g #DRUID_XMS=1g
DRUID_MAXNEWSIZE=250m #DRUID_MAXNEWSIZE=250m
DRUID_NEWSIZE=250m #DRUID_NEWSIZE=250m
DRUID_MAXDIRECTMEMORYSIZE=500m #DRUID_MAXDIRECTMEMORYSIZE=1g
#druid_emitter_logging_logLevel=debug
#druid_extensions_loadList=["druid-histogram", "druid-datasketches", "druid-lookups-cached-global", "postgresql-metadata-storage", "druid-kafka-indexing-service"]
#druid_zk_service_host=zookeeper
#druid_metadata_storage_host=
#druid_metadata_storage_type=postgresql
#druid_metadata_storage_connector_connectURI=jdbc:postgresql://postgres:5432/druid
#druid_metadata_storage_connector_user=druid
#druid_metadata_storage_connector_password=FoolishPassword
#druid_coordinator_balancer_strategy=cachingCost
#druid_indexer_runner_javaOptsArray=["-server", "-Xmx1g", "-Xms1g", "-XX:MaxDirectMemorySize=3g", "-Duser.timezone=UTC", "-Dfile.encoding=UTF-8", "-Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager"]
#druid_indexer_fork_property_druid_processing_buffer_sizeBytes=128MiB
#druid_processing_buffer_sizeBytes=268435456 # 256MiB
#druid_storage_type=local
#druid_storage_storageDirectory=/opt/shared/segments
#druid_indexer_logs_type=file
#druid_indexer_logs_directory=/opt/shared/indexing-logs
#druid_processing_numThreads=1
#druid_processing_numMergeBuffers=1
#DRUID_LOG4J=<?xml version="1.0" encoding="UTF-8" ?><Configuration status="WARN"><Appenders><Console name="Console" target="SYSTEM_OUT"><PatternLayout pattern="%d{ISO8601} %p [%t] %c - %m%n"/></Console></Appenders><Loggers><Root level="info"><AppenderRef ref="Console"/></Root><Logger name="org.apache.druid.jetty.RequestLog" additivity="false" level="DEBUG"><AppenderRef ref="Console"/></Logger></Loggers></Configuration>
# Java tuning
#DRUID_XMX=1g
#DRUID_XMS=1g
#DRUID_MAXNEWSIZE=250m
#DRUID_NEWSIZE=250m
#DRUID_MAXDIRECTMEMORYSIZE=6172m
DRUID_SINGLE_NODE_CONF=micro-quickstart
druid_emitter_logging_logLevel=debug druid_emitter_logging_logLevel=debug
@ -39,15 +74,14 @@ druid_metadata_storage_connector_password=FoolishPassword
druid_coordinator_balancer_strategy=cachingCost druid_coordinator_balancer_strategy=cachingCost
druid_indexer_runner_javaOptsArray=["-server", "-Xmx1g", "-Xms1g", "-XX:MaxDirectMemorySize=3g", "-Duser.timezone=UTC", "-Dfile.encoding=UTF-8", "-Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager"] druid_indexer_runner_javaOptsArray=["-server", "-Xmx1g", "-Xms1g", "-XX:MaxDirectMemorySize=3g", "-Duser.timezone=UTC", "-Dfile.encoding=UTF-8", "-Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager"]
druid_indexer_fork_property_druid_processing_buffer_sizeBytes=128MiB druid_indexer_fork_property_druid_processing_buffer_sizeBytes=256MiB
druid_processing_buffer_sizeBytes=134217728 # 128MiB
druid_storage_type=local druid_storage_type=local
druid_storage_storageDirectory=/opt/shared/segments druid_storage_storageDirectory=/opt/shared/segments
druid_indexer_logs_type=file druid_indexer_logs_type=file
druid_indexer_logs_directory=/opt/shared/indexing-logs druid_indexer_logs_directory=/opt/shared/indexing-logs
druid_processing_numThreads=1 druid_processing_numThreads=2
druid_processing_numMergeBuffers=1 druid_processing_numMergeBuffers=2
DRUID_LOG4J=<?xml version="1.0" encoding="UTF-8" ?><Configuration status="WARN"><Appenders><Console name="Console" target="SYSTEM_OUT"><PatternLayout pattern="%d{ISO8601} %p [%t] %c - %m%n"/></Console></Appenders><Loggers><Root level="info"><AppenderRef ref="Console"/></Root><Logger name="org.apache.druid.jetty.RequestLog" additivity="false" level="DEBUG"><AppenderRef ref="Console"/></Logger></Loggers></Configuration> DRUID_LOG4J=<?xml version="1.0" encoding="UTF-8" ?><Configuration status="WARN"><Appenders><Console name="Console" target="SYSTEM_OUT"><PatternLayout pattern="%d{ISO8601} %p [%t] %c - %m%n"/></Console></Appenders><Loggers><Root level="info"><AppenderRef ref="Console"/></Root><Logger name="org.apache.druid.jetty.RequestLog" additivity="false" level="DEBUG"><AppenderRef ref="Console"/></Logger></Loggers></Configuration>