From 7336340afda036086ce2d1f78c10c63c6bab673a Mon Sep 17 00:00:00 2001 From: Puckel_ Date: Wed, 7 Aug 2019 10:35:49 +0200 Subject: [PATCH] Bump to Airflow 1.10.4 and Python 3.7 --- .circleci/config.yml | 10 ++-- Dockerfile | 6 +-- README.md | 2 +- config/airflow.cfg | 77 ++++++++++++++++++++++++------- docker-compose-CeleryExecutor.yml | 10 ++-- docker-compose-LocalExecutor.yml | 2 +- script/entrypoint.sh | 4 +- 7 files changed, 81 insertions(+), 30 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 3e28437e..00e0f9dc 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -9,14 +9,18 @@ jobs: - checkout - setup_remote_docker: docker_layer_caching: true - - run: + - run: name: Build docker image command: | docker build -t puckel/docker-airflow . - - run: + - run: + name: Test Python version + command: | + docker run puckel/docker-airflow python -V | grep '3.7' + - run: name: Test docker image command: | - docker run puckel/docker-airflow version |grep '1.10.3' + docker run puckel/docker-airflow version |grep '1.10.4' workflows: version: 2 build_and_test: diff --git a/Dockerfile b/Dockerfile index f44a6dc6..f1bf0330 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,10 +1,10 @@ -# VERSION 1.10.3 +# VERSION 1.10.4 # AUTHOR: Matthieu "Puckel_" Roisil # DESCRIPTION: Basic Airflow container # BUILD: docker build --rm -t puckel/docker-airflow . # SOURCE: https://github.com/puckel/docker-airflow -FROM python:3.6-slim-stretch +FROM python:3.7-slim-stretch LABEL maintainer="Puckel_" # Never prompts the user for choices on installation/configuration of packages @@ -12,7 +12,7 @@ ENV DEBIAN_FRONTEND noninteractive ENV TERM linux # Airflow -ARG AIRFLOW_VERSION=1.10.3 +ARG AIRFLOW_VERSION=1.10.4 ARG AIRFLOW_USER_HOME=/usr/local/airflow ARG AIRFLOW_DEPS="" ARG PYTHON_DEPS="" diff --git a/README.md b/README.md index 4a4329fd..03b5a3f0 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ This repository contains **Dockerfile** of [apache-airflow](https://github.com/a ## Informations -* Based on Python (3.6-slim-stretch) official Image [python:3.6-slim-stretch](https://hub.docker.com/_/python/) and uses the official [Postgres](https://hub.docker.com/_/postgres/) as backend and [Redis](https://hub.docker.com/_/redis/) as queue +* Based on Python (3.7-slim-stretch) official Image [python:3.7-slim-stretch](https://hub.docker.com/_/python/) and uses the official [Postgres](https://hub.docker.com/_/postgres/) as backend and [Redis](https://hub.docker.com/_/redis/) as queue * Install [Docker](https://www.docker.com/) * Install [Docker Compose](https://docs.docker.com/compose/install/) * Following the Airflow release from [Python Package Index](https://pypi.python.org/pypi/apache-airflow) diff --git a/config/airflow.cfg b/config/airflow.cfg index c5ca80c5..3e037424 100644 --- a/config/airflow.cfg +++ b/config/airflow.cfg @@ -28,7 +28,11 @@ fab_logging_level = WARN logging_config_class = # Log format -# we need to escape the curly braces by adding an additional curly brace +# Colour the logs when the controlling terminal is a TTY. +colored_console_log = True +colored_log_format = [%%(blue)s%%(asctime)s%%(reset)s] {{%%(blue)s%%(filename)s:%%(reset)s%%(lineno)d}} %%(log_color)s%%(levelname)s%%(reset)s - %%(log_color)s%%(message)s%%(reset)s +colored_formatter_class = airflow.utils.log.colored_log.CustomTTYColoredFormatter + log_format = [%%(asctime)s] {{%%(filename)s:%%(lineno)d}} %%(levelname)s - %%(message)s simple_log_format = %%(asctime)s %%(levelname)s - %%(message)s @@ -54,16 +58,26 @@ executor = SequentialExecutor # their website # sql_alchemy_conn = sqlite:////tmp/airflow.db -# If SqlAlchemy should pool database connections. -sql_alchemy_pool_enabled = True - # The encoding for the databases sql_engine_encoding = utf-8 +# If SqlAlchemy should pool database connections. +sql_alchemy_pool_enabled = True + # The SqlAlchemy pool size is the maximum number of database connections # in the pool. 0 indicates no limit. sql_alchemy_pool_size = 5 +# The maximum overflow size of the pool. +# When the number of checked-out connections reaches the size set in pool_size, +# additional connections will be returned up to this limit. +# When those additional connections are returned to the pool, they are disconnected and discarded. +# It follows then that the total number of simultaneous connections the pool will allow is pool_size + max_overflow, +# and the total number of "sleeping" connections the pool will allow is pool_size. +# max_overflow can be set to -1 to indicate no overflow limit; +# no limit will be placed on the total number of concurrent connections. Defaults to 10. +sql_alchemy_max_overflow = 10 + # The SqlAlchemy pool recycle is the number of seconds a connection # can be idle in the pool before it is invalidated. This config does # not apply to sqlite. If the number of DB connections is ever exceeded, @@ -182,7 +196,7 @@ password = [operators] # The default owner assigned to each new operator, unless # provided explicitly or passed via `default_args` -default_owner = Airflow +default_owner = airflow default_cpus = 1 default_ram = 512 default_disk = 512 @@ -191,9 +205,6 @@ default_gpus = 0 [hive] # Default mapreduce queue for HiveOperator tasks default_hive_mapred_queue = -# Template for mapred_job_name in HiveOperator, supports the following named parameters: -# hostname, dag_id, task_id, execution_date -mapred_job_name_template = Airflow HiveOperator task for {hostname}.{dag_id}.{task_id}.{execution_date} [webserver] # The base url of your website as airflow cannot guess what domain or @@ -301,6 +312,9 @@ cookie_secure = False # Set samesite policy on session cookie cookie_samesite = +# Default setting for wrap toggle on DAG code and TI log views. +default_wrap = False + [email] email_backend = airflow.utils.email.send_email_smtp @@ -391,6 +405,13 @@ ssl_key = ssl_cert = ssl_cacert = +# Celery Pool implementation. +# Choices include: prefork (default), eventlet, gevent or solo. +# See: +# https://docs.celeryproject.org/en/latest/userguide/workers.html#concurrency +# https://docs.celeryproject.org/en/latest/userguide/concurrency/eventlet.html +pool = prefork + [celery_broker_transport_options] # This section is for specifying options which can be passed to the # underlying celery broker transport. See: @@ -505,8 +526,8 @@ basedn = dc=example,dc=com cacert = /etc/ca/ldap_ca.crt search_scope = LEVEL -# This setting allows the use of LDAP servers that either return a -# broken schema, or do not return a schema. +# This setting allows the use of LDAP servers that either return a +# broken schema, or do not return a schema. ignore_malformed_schema = False [mesos] @@ -567,10 +588,22 @@ api_rev = v3 hide_sensitive_variable_fields = True [elasticsearch] -elasticsearch_host = -# we need to escape the curly braces by adding an additional curly brace -elasticsearch_log_id_template = {dag_id}-{task_id}-{execution_date}-{try_number} -elasticsearch_end_of_log_mark = end_of_log +# Elasticsearch host +host = +# Format of the log_id, which is used to query for a given tasks logs +log_id_template = {{dag_id}}-{{task_id}}-{{execution_date}}-{{try_number}} +# Used to mark the end of a log stream for a task +end_of_log_mark = end_of_log +# Qualified URL for an elasticsearch frontend (like Kibana) with a template argument for log_id +# Code will construct log_id using the log_id template from the argument above. +# NOTE: The code will prefix the https:// automatically, don't include that here. +frontend = +# Write the task logs to the stdout of the worker, rather than the default files +write_stdout = False +# Instead of the default log formatter, write the log lines as JSON +json_format = False +# Log fields to also attach to the json output, if enabled +json_fields = asctime, filename, lineno, levelname, message [kubernetes] # The repository, tag and imagePullPolicy of the Kubernetes Image for the Worker to Run @@ -606,7 +639,6 @@ logs_volume_subpath = # A shared volume claim for the logs logs_volume_claim = - # For DAGs mounted via a hostPath volume (mutually exclusive with volume claim and git-sync) # Useful in local environment, discouraged in production dags_volume_host = @@ -634,7 +666,7 @@ git_password = git_sync_root = /git git_sync_dest = repo # Mount point of the volume if git-sync is being used. -# i.e. /root/airflow/dags +# i.e. {AIRFLOW_HOME}/dags git_dags_folder_mount_point = # To get Git-sync SSH authentication set up follow this format @@ -705,6 +737,13 @@ affinity = # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.12/#toleration-v1-core tolerations = +# **kwargs parameters to pass while calling a kubernetes client core_v1_api methods from Kubernetes Executor +# provided as a single line formatted JSON dictionary string. +# List of supported params in **kwargs are similar for all core_v1_apis, hence a single config variable for all apis +# See: +# https://raw.githubusercontent.com/kubernetes-client/python/master/kubernetes/client/apis/core_v1_api.py +kube_client_request_args = + # Worker pods security context options # See: # https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ @@ -753,3 +792,9 @@ fs_group = # # Additionally you may override worker airflow settings with the AIRFLOW__
__ # formatting as supported by airflow normally. + +[kubernetes_labels] +# The Key-value pairs to be given to worker pods. +# The worker pods will be given these static labels, as well as some additional dynamic labels +# to identify the task. +# Should be supplied in the format: key = value diff --git a/docker-compose-CeleryExecutor.yml b/docker-compose-CeleryExecutor.yml index d949ab43..d2169dd9 100644 --- a/docker-compose-CeleryExecutor.yml +++ b/docker-compose-CeleryExecutor.yml @@ -1,7 +1,7 @@ version: '2.1' services: redis: - image: 'redis:3.2.7' + image: 'redis:5.0.5' # command: redis-server --requirepass redispass postgres: @@ -16,7 +16,7 @@ services: # - ./pgdata:/var/lib/postgresql/data/pgdata webserver: - image: puckel/docker-airflow:1.10.3 + image: puckel/docker-airflow:1.10.4 restart: always depends_on: - postgres @@ -43,7 +43,7 @@ services: retries: 3 flower: - image: puckel/docker-airflow:1.10.3 + image: puckel/docker-airflow:1.10.4 restart: always depends_on: - redis @@ -55,7 +55,7 @@ services: command: flower scheduler: - image: puckel/docker-airflow:1.10.3 + image: puckel/docker-airflow:1.10.4 restart: always depends_on: - webserver @@ -74,7 +74,7 @@ services: command: scheduler worker: - image: puckel/docker-airflow:1.10.3 + image: puckel/docker-airflow:1.10.4 restart: always depends_on: - scheduler diff --git a/docker-compose-LocalExecutor.yml b/docker-compose-LocalExecutor.yml index 5f6d973c..bc75a9b4 100644 --- a/docker-compose-LocalExecutor.yml +++ b/docker-compose-LocalExecutor.yml @@ -8,7 +8,7 @@ services: - POSTGRES_DB=airflow webserver: - image: puckel/docker-airflow:1.10.3 + image: puckel/docker-airflow:1.10.4 restart: always depends_on: - postgres diff --git a/script/entrypoint.sh b/script/entrypoint.sh index fb3f9ad1..525310e1 100755 --- a/script/entrypoint.sh +++ b/script/entrypoint.sh @@ -13,10 +13,12 @@ TRY_LOOP="20" : "${POSTGRES_DB:="airflow"}" # Defaults and back-compat +: "${AIRFLOW_HOME:="/usr/local/airflow"}" : "${AIRFLOW__CORE__FERNET_KEY:=${FERNET_KEY:=$(python -c "from cryptography.fernet import Fernet; FERNET_KEY = Fernet.generate_key().decode(); print(FERNET_KEY)")}}" : "${AIRFLOW__CORE__EXECUTOR:=${EXECUTOR:-Sequential}Executor}" export \ + AIRFLOW_HOME \ AIRFLOW__CELERY__BROKER_URL \ AIRFLOW__CELERY__RESULT_BACKEND \ AIRFLOW__CORE__EXECUTOR \ @@ -33,7 +35,7 @@ fi # Install custom python package if requirements.txt is present if [ -e "/requirements.txt" ]; then - $(which pip) install --user -r /requirements.txt + $(command -v pip) install --user -r /requirements.txt fi if [ -n "$REDIS_PASSWORD" ]; then