Update to Airflow 1.10

Updates the image building to Airflow 1.10, including necessary
configuration changes and a general update of dependencies.

Airflow 1.10 includes many enhancements and bugfixes since 1.9 [0]

This change introduces many "unused" configuration parameters to satsify
Airflow's expectations[1].  An ugly, but likely harmless change to the
log output with interleaved newline characters from Airflow steps[2].

Changes to the chart and other dependendencies have also been introduced
to match this update.

[0] https://github.com/apache/incubator-airflow/blob/master/CHANGELOG.txt
[1] https://issues.apache.org/jira/browse/AIRFLOW-3099
[2] https://issues.apache.org/jira/browse/AIRFLOW-1917

Change-Id: I179dcf1f0369650b8c4519f704abb7fb495f4248
This commit is contained in:
Bryan Strassner 2018-09-19 18:29:19 -05:00
parent 7fa3136470
commit 44c526af96
9 changed files with 187 additions and 75 deletions

View File

@ -33,10 +33,6 @@
{{- tuple "postgresql_airflow_celery_db" "internal" "user" "postgresql" . | include "helm-toolkit.endpoints.authenticated_endpoint_uri_lookup" | set .Values.conf.airflow.celery "result_backend" | quote | trunc 0 -}}
{{- end -}}
{{- if empty .Values.conf.airflow.celery.celery_result_backend -}}
{{- tuple "postgresql_airflow_celery_db" "internal" "user" "postgresql" . | include "helm-toolkit.endpoints.authenticated_endpoint_uri_lookup" | set .Values.conf.airflow.celery "celery_result_backend" | quote | trunc 0 -}}
{{- end -}}
# Add endpoint URI lookup for RabbitMQ Connection
{{- if empty .Values.conf.airflow.celery.broker_url -}}
{{- tuple "oslo_messaging" "internal" "user" "amqp" . | include "helm-toolkit.endpoints.authenticated_endpoint_uri_lookup" | set .Values.conf.airflow.celery "broker_url" | quote | trunc 0 -}}

View File

@ -423,21 +423,44 @@ conf:
airflow_config_file:
path: /usr/local/airflow/airflow.cfg
airflow:
# NOTE: Airflow 1.10 introduces a need to declare all config options:
# https://issues.apache.org/jira/browse/AIRFLOW-3099
core:
airflow_home: /usr/local/airflow
dags_folder: /usr/local/airflow/dags
base_log_folder: /usr/local/airflow/logs
remote_logging: "False"
remote_log_conn_id:
remote_log_conn_id: ""
remote_base_log_folder: ""
encrypt_s3_logs: "False"
logging_level: "INFO"
log_format: "[%%(asctime)s] {{%%(filename)s:%%(lineno)d}} %%(levelname)s - %%(message)s"
fab_logging_level: "WARN"
# TODO(bryan-strassner) Use this for custom log formatting!
logging_config_class: ""
# NOTE: Airflow 1.10 introduces extra newline characters between log
# records. Version 1.10.1 should resolve this issue
# https://issues.apache.org/jira/browse/AIRFLOW-1917
#
# NOTE: The log format ends up repeated for each log record that we log
# in our custom operators, once for the logging_mixin class of
# Airflow itself, and once again for the message we want to log.
# E.g.:
# 2018-09-21 19:38:48,950 INFO logging_mixin(95) write - 2018-09-21 19:38:48,950 INFO deployment_configuration_operator(135) get_doc - Deckhand Client acquired
#
# NOTE: Updated from default to match Shipyard logging as much as
# possible without more aggressive techniques
#
log_format: "%%(asctime)s %%(levelname)-8s %%(module)s(%%(lineno)d) %%(funcName)s - %%(message)s"
simple_log_format: "%%(asctime)s %%(levelname)s - %%(message)s"
log_filename_template: "{{ ti.dag_id }}/{{ ti.task_id }}/{{ execution_date.strftime('%%Y-%%m-%%dT%%H:%%M:%%S') }}/{{ try_number }}.log"
log_processor_filename_template: "{{ filename }}.log"
hostname_callable: "socket:getfqdn"
default_timezone: "utc"
executor: "CeleryExecutor"
# sql_alchemy_conn is extracted from endpoints by the configmap template
sql_alchemy_pool_enabled: "True"
sql_alchemy_pool_size: 5
sql_alchemy_pool_recycle: 3600
sql_alchemy_pool_recycle: 1800
sql_alchemy_reconnect_timeout: 30
parallelism: 32
dag_concurrency: 16
@ -449,28 +472,47 @@ conf:
fernet_key: fKp7omMJ4QlTxfZzVBSiyXVgeCK-6epRjGgMpEIsjvs=
donot_pickle: "False"
dagbag_import_timeout: 30
# NOTE: Versions after 1.10 will change this to StandardTaskRunner
task_runner: "BashTaskRunner"
default_impersonation:
security:
default_impersonation: ""
security: ""
secure_mode: "True"
unit_test_mode: "False"
task_log_reader: "task"
enable_xcom_pickling: "False"
killed_task_cleanup_time: 60
dag_run_conf_overrides_params: "False"
cli:
api_client: airflow.api.client.local_client
# endpoint_url is extracted from endpoints by the configmap template
api:
auth_backend: airflow.api.auth.backend.default
lineage:
# Shipyard is not using this
backend: ""
atlas:
# Shipyard is not using this
sasl_enabled: "False"
host: ""
port: 21000
username: ""
password: ""
operators:
default_owner: "Airflow"
default_cpus: 1
default_ram: 512
default_disk: 512
default_igpus: 0
default_gpus: 0
hive:
# Shipyard is not using this
default_hive_mapred_queue: ""
webserver:
# base_url is extracted from endpoints by the configmap template
web_server_host: 0.0.0.0
web_server_port: 8080
web_server_ssl_cert:
web_server_ssl_key:
web_server_ssl_cert: ""
web_server_ssl_key: ""
web_server_master_timeout: 120
web_server_worker_timeout: 120
worker_refresh_batch_size: 1
worker_refresh_interval: 30
@ -486,62 +528,130 @@ conf:
dag_default_view: "tree"
dag_orientation: "LR"
demo_mode: "False"
log_fetch_timeout_sec: 5
log_fetch_timeout_sec: 10
hide_paused_dags_by_default: "False"
page_size: 100
rbac: "False"
navbar_color: "#007A87"
default_dag_run_display_number: 25
email:
# Shipyard is not using this
email_backend: airflow.utils.send_email_smtp
smtp:
# Shipyard is not using this
smtp_host: "localhost"
smtp_starttls: "True"
smtp_ssl: "False"
smtp_user: "airflow"
smtp_port: 25
smtp_password: "airflow"
smtp_port: 25
smtp_mail_from: airflow@airflow.local
celery:
celery_app_name: airflow.executors.celery_executor
worker_concurrency: 16
worker_log_server_port: 8793
# broker_url is extracted from endpoints by the configmap template
# result_backend is extracted from endpoints by the configmap template
flower_host: 0.0.0.0
flower_url_prefix:
flower_port: 5555
default_queue: "default"
celery_config_options: airflow.config_templates.default_celery.DEFAULT_CELERY_CONFIG
# TODO: Enable this for security
ssl_active: "False"
ssl_key: ""
ssl_cert: ""
ssl_cacert: ""
celery_broker_transport_options:
visibility_timeout: 21600
ssl_active: "False"
ssl_key:
ssl_cert:
ssl_cacert:
dask:
# Shipyard is not using this
cluster_adresss: "127.0.0.1:8786"
tls_ca: ""
tls_cert: ""
tls_key: ""
scheduler:
job_heartbeat_sec: 5
scheduler_heartbeat_sec: 5
run_duration: -1
min_file_process_interval: 0
min_file_parsing_loop_time: 1
dag_dir_list_interval: 300
print_stats_interval: 30
print_stats_interval: 120
child_process_log_directory: /usr/local/airflow/logs/scheduler
scheduler_zombie_task_threshold: 300
catchup_by_default: "True"
max_tis_per_query: 0
max_tis_per_query: 512
statsd_on: "False"
statsd_host: "localhost"
statsd_port: 8125
statsd_prefix: "airflow"
max_threads: 2
authenticate: "False"
ldap:
uri:
user_filter:
user_name_attr:
group_member_attr:
superuser_filter:
data_profiler_filter:
bind_user:
bind_password:
basedn:
cacert:
search_scope:
# Shipyard is not using this
uri: ""
user_filter: "objectClass=*"
user_name_attr: "uid"
group_member_attr: "memberOf"
superuser_filter: ""
data_profiler_filter: ""
bind_user: "cn=Manager,dc=example,dc=com"
bind_password: "insecure"
basedn: "dc=example,dc=com"
cacert: "/etc/ca/ldap_ca.crt"
search_scope: "LEVEL"
mesos:
# Shipyard is not using this
master: ""
framework_name: ""
task_cpu: ""
task_memory: ""
checkpoint: ""
authenticate: ""
kerberos:
# Shipyard is not using this
ccache: ""
principal: ""
reinit_frequency: ""
kinit_path: ""
keytab: ""
github_enterprise:
# Shipyard is not using this
api_rev: v3
admin:
hide_sensitive_variable_fields: "True"
elasticsearch:
# Shipyard is not using this
elasticsearch_host: ""
elasticsearch_log_id_template: ""
elasticsearch_end_of_log_mark: ""
kubernetes:
# Shipyard is not using this (maybe future for spawning own workers)
worker_container_repository: ""
worker_container_tag: ""
delete_worker_pods: "True"
namespace: "default"
airflow_configmap: ""
dags_volume_subpath: ""
dags_volume_claim: ""
logs_volume_subpath: ""
logs_volume_claim: ""
git_repo: ""
git_branch: ""
git_user: ""
git_password: ""
git_subpath: ""
git_sync_container_repository: ""
git_sync_container_tag: ""
git_sync_init_container_name: ""
worker_service_account_name: ""
image_pull_secrets: ""
gcp_service_account_keys: ""
in_cluster: ""
kubernetes_secrets:
#Shipyard is not using this
# End of Airflow config options
pod:
mounts:
dag_path: /home/ubuntu/workbench/dags

View File

@ -24,6 +24,7 @@ ENV container docker
ENV WEB_PORT 8080
ENV FLOWER_PORT 5555
ENV WORKER_PORT 8793
ENV SLUGIFY_USES_TEXT_UNIDECODE yes
# Expose port for applications
EXPOSE $WEB_PORT
@ -36,7 +37,7 @@ ARG DEBIAN_FRONTEND=noninteractive
ARG ctx_base=src/bin
# Kubectl version
ARG KUBECTL_VERSION=1.8.6
ARG KUBECTL_VERSION=1.10.2
RUN set -ex && \
apt-get -qq update && \

View File

@ -12,18 +12,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.
pytz==2017.2
pyOpenSSL==17.3.0
ndg-httpsclient==0.4.3
pyasn1==0.3.6
psycopg2==2.7.3.1
docker-py==1.6.0
apache-airflow[crypto,celery,postgres,hive,hdfs,jdbc]==1.9.0
python-openstackclient==3.11.0
pytz==2018.5
pyOpenSSL==18.0.0
ndg-httpsclient==0.5.1
pyasn1==0.4.4
psycopg2==2.7.5
docker==3.5.0
apache-airflow[crypto,celery,postgres,hive,hdfs,jdbc]==1.10.0
python-openstackclient==3.16.1
kubernetes>=6.0.0
# Dependencies for other Airship components
git+https://git.openstack.org/openstack/airship-deckhand@177675e96fffcda9799c68bbce831424c1167020#egg=deckhand
git+https://github.com/openstack/airship-drydock.git@8af92eaf29ca0dd6a129748c132ea7f6593eae83#egg=drydock_provisioner
git+https://github.com/openstack/airship-armada.git@7a2ba22ab12a3f1f180b6af4085972ba44853377#egg=armada
# Dependencies for other UCP components
git+https://git.openstack.org/openstack/airship-deckhand@d1701774266069dda6eab9e70c7851da7e97919a#egg=deckhand
git+https://git.openstack.org/openstack/airship-drydock.git@b1d24ad254c04cdbb4dc4e06f2bfe92c266aad70#egg=drydock_provisioner&subdirectory=python
git+https://git.openstack.org/openstack/airship-armada.git@90618f549c1f6d7741b11dc5c4898f3c6d536895#egg=armada

View File

@ -14,27 +14,27 @@
# API requirements
alembic==0.9.5
arrow==0.10.0
alembic==1.0.0
arrow==0.12.1
configparser==3.5.0
cryptography==2.3
falcon==1.2.0
falcon==1.4.1
jsonschema==2.6.0
keystoneauth1==3.4.0
keystonemiddleware==4.21.0
keystoneauth1==3.11.0
keystonemiddleware==5.2.0
networkx==2.1 # common/deployment_group
oslo.config==5.2.0
oslo.policy==1.33.1
oslo.config==6.4.0
oslo.policy==1.38.1
PasteDeploy==1.5.2
psycopg2==2.7.3.1
python-dateutil==2.6.1
python-memcached==1.58
requests==2.18.4
setuptools==39.0.1
SQLAlchemy==1.2.8
psycopg2==2.7.4
python-dateutil==2.7.3
python-memcached==1.59
requests==2.19.1
setuptools==40.4.1
SQLAlchemy==1.2.12
ulid==1.1
uwsgi==2.0.15
uwsgi==2.0.17
# Dependencies for other Airship components
git+https://git.openstack.org/openstack/airship-deckhand@177675e96fffcda9799c68bbce831424c1167020#egg=deckhand
git+https://github.com/openstack/airship-drydock.git@8af92eaf29ca0dd6a129748c132ea7f6593eae83#egg=drydock_provisioner
# Dependencies for other UCP components
git+https://git.openstack.org/openstack/airship-deckhand@d1701774266069dda6eab9e70c7851da7e97919a#egg=deckhand
git+https://git.openstack.org/openstack/airship-drydock.git@b1d24ad254c04cdbb4dc4e06f2bfe92c266aad70#egg=drydock_provisioner&subdirectory=python

View File

@ -3,15 +3,15 @@ pytest==3.4
pytest-cov==2.5.1
responses==0.8.1
testfixtures==5.1.1
apache-airflow[crypto,celery,postgres,hive,hdfs,jdbc]==1.9.0
apache-airflow[crypto,celery,postgres,hive,hdfs,jdbc]==1.10.0
# Testing - Client libraries for Airship components
git+https://github.com/openstack/airship-deckhand.git@3cdf3d2d896d43c6e3bc26170522c3eee0d7158f#egg=deckhand
git+https://github.com/openstack/airship-drydock.git@8af92eaf29ca0dd6a129748c132ea7f6593eae83#egg=drydock_provisioner
git+https://github.com/openstack/airship-armada.git@7a2ba22ab12a3f1f180b6af4085972ba44853377#egg=armada
# Testing - Client libraries for UCP components
git+https://git.openstack.org/openstack/airship-deckhand@d1701774266069dda6eab9e70c7851da7e97919a#egg=deckhand
git+https://git.openstack.org/openstack/airship-drydock.git@b1d24ad254c04cdbb4dc4e06f2bfe92c266aad70#egg=drydock_provisioner&subdirectory=python
git+https://git.openstack.org/openstack/airship-armada.git@90618f549c1f6d7741b11dc5c4898f3c6d536895#egg=armada
# TODO(bryan-strassner) Pin to version for airflow when added to the
# requirements.txt in the airflow images directory
git+https://github.com/openstack/airship-promenade.git@master#egg=promenade
git+https://git.openstack.org/openstack/airship-promenade.git@master#egg=promenade
# Linting

View File

@ -89,7 +89,7 @@ def test_get_revision_id(ti):
shipyard_conf="shipyard.conf",
task_id="t1")
ti = airflow.models.TaskInstance(task=mock.MagicMock(),
execution_date="no")
execution_date=None)
rid = dco.get_revision_id(ti)
assert rid == 2
@ -101,7 +101,8 @@ def test_get_revision_id_none(ti):
dco = DeploymentConfigurationOperator(main_dag_name="main",
shipyard_conf="shipyard.conf",
task_id="t1")
ti = airflow.models.TaskInstance(task=mock.MagicMock(), execution_date="o")
ti = airflow.models.TaskInstance(task=mock.MagicMock(),
execution_date=None)
with pytest.raises(AirflowException) as expected_exc:
rid = dco.get_revision_id(ti)
assert "Design_revision is not set." in str(expected_exc)

View File

@ -9,6 +9,8 @@ deps = -r{toxinidir}/requirements.txt
[testenv:py35]
skipsdist=True
setenv=
SLUGIFY_USES_TEXT_UNIDECODE=yes
commands =
pytest \
{posargs} \
@ -20,6 +22,8 @@ commands =
[testenv:py36]
skipsdist=True
setenv=
SLUGIFY_USES_TEXT_UNIDECODE=yes
commands =
pytest \
{posargs} \

View File

@ -14,11 +14,11 @@
# CLI/Client requirements
arrow==0.10.0
arrow==0.12.1
click==6.7
click-default-group==1.2
keystoneauth1==3.4.0
requests==2.18.4
setuptools==39.0.1
keystoneauth1==3.11.0
requests==2.19.1
setuptools==40.4.1
PTable==0.9.2
pyyaml==3.12
pyyaml==3.13