From 44c526af963c5637479aa4064cb70b289279c194 Mon Sep 17 00:00:00 2001 From: Bryan Strassner Date: Wed, 19 Sep 2018 18:29:19 -0500 Subject: [PATCH] Update to Airflow 1.10 Updates the image building to Airflow 1.10, including necessary configuration changes and a general update of dependencies. Airflow 1.10 includes many enhancements and bugfixes since 1.9 [0] This change introduces many "unused" configuration parameters to satsify Airflow's expectations[1]. An ugly, but likely harmless change to the log output with interleaved newline characters from Airflow steps[2]. Changes to the chart and other dependendencies have also been introduced to match this update. [0] https://github.com/apache/incubator-airflow/blob/master/CHANGELOG.txt [1] https://issues.apache.org/jira/browse/AIRFLOW-3099 [2] https://issues.apache.org/jira/browse/AIRFLOW-1917 Change-Id: I179dcf1f0369650b8c4519f704abb7fb495f4248 --- .../templates/configmap-airflow-etc.yaml | 4 - charts/shipyard/values.yaml | 166 +++++++++++++++--- images/airflow/Dockerfile | 3 +- images/airflow/requirements.txt | 24 +-- src/bin/shipyard_airflow/requirements.txt | 34 ++-- .../shipyard_airflow/test-requirements.txt | 12 +- .../test_deployment_configuration_operator.py | 5 +- src/bin/shipyard_airflow/tox.ini | 4 + src/bin/shipyard_client/requirements.txt | 10 +- 9 files changed, 187 insertions(+), 75 deletions(-) diff --git a/charts/shipyard/templates/configmap-airflow-etc.yaml b/charts/shipyard/templates/configmap-airflow-etc.yaml index cbdf9bb2..6d37fd43 100644 --- a/charts/shipyard/templates/configmap-airflow-etc.yaml +++ b/charts/shipyard/templates/configmap-airflow-etc.yaml @@ -33,10 +33,6 @@ {{- tuple "postgresql_airflow_celery_db" "internal" "user" "postgresql" . | include "helm-toolkit.endpoints.authenticated_endpoint_uri_lookup" | set .Values.conf.airflow.celery "result_backend" | quote | trunc 0 -}} {{- end -}} -{{- if empty .Values.conf.airflow.celery.celery_result_backend -}} -{{- tuple "postgresql_airflow_celery_db" "internal" "user" "postgresql" . | include "helm-toolkit.endpoints.authenticated_endpoint_uri_lookup" | set .Values.conf.airflow.celery "celery_result_backend" | quote | trunc 0 -}} -{{- end -}} - # Add endpoint URI lookup for RabbitMQ Connection {{- if empty .Values.conf.airflow.celery.broker_url -}} {{- tuple "oslo_messaging" "internal" "user" "amqp" . | include "helm-toolkit.endpoints.authenticated_endpoint_uri_lookup" | set .Values.conf.airflow.celery "broker_url" | quote | trunc 0 -}} diff --git a/charts/shipyard/values.yaml b/charts/shipyard/values.yaml index dd15babd..d7cb8130 100644 --- a/charts/shipyard/values.yaml +++ b/charts/shipyard/values.yaml @@ -423,21 +423,44 @@ conf: airflow_config_file: path: /usr/local/airflow/airflow.cfg airflow: + # NOTE: Airflow 1.10 introduces a need to declare all config options: + # https://issues.apache.org/jira/browse/AIRFLOW-3099 core: airflow_home: /usr/local/airflow dags_folder: /usr/local/airflow/dags base_log_folder: /usr/local/airflow/logs remote_logging: "False" - remote_log_conn_id: + remote_log_conn_id: "" + remote_base_log_folder: "" encrypt_s3_logs: "False" logging_level: "INFO" - log_format: "[%%(asctime)s] {{%%(filename)s:%%(lineno)d}} %%(levelname)s - %%(message)s" + fab_logging_level: "WARN" + # TODO(bryan-strassner) Use this for custom log formatting! + logging_config_class: "" + # NOTE: Airflow 1.10 introduces extra newline characters between log + # records. Version 1.10.1 should resolve this issue + # https://issues.apache.org/jira/browse/AIRFLOW-1917 + # + # NOTE: The log format ends up repeated for each log record that we log + # in our custom operators, once for the logging_mixin class of + # Airflow itself, and once again for the message we want to log. + # E.g.: + # 2018-09-21 19:38:48,950 INFO logging_mixin(95) write - 2018-09-21 19:38:48,950 INFO deployment_configuration_operator(135) get_doc - Deckhand Client acquired + # + # NOTE: Updated from default to match Shipyard logging as much as + # possible without more aggressive techniques + # + log_format: "%%(asctime)s %%(levelname)-8s %%(module)s(%%(lineno)d) %%(funcName)s - %%(message)s" simple_log_format: "%%(asctime)s %%(levelname)s - %%(message)s" + log_filename_template: "{{ ti.dag_id }}/{{ ti.task_id }}/{{ execution_date.strftime('%%Y-%%m-%%dT%%H:%%M:%%S') }}/{{ try_number }}.log" + log_processor_filename_template: "{{ filename }}.log" + hostname_callable: "socket:getfqdn" default_timezone: "utc" executor: "CeleryExecutor" + # sql_alchemy_conn is extracted from endpoints by the configmap template sql_alchemy_pool_enabled: "True" sql_alchemy_pool_size: 5 - sql_alchemy_pool_recycle: 3600 + sql_alchemy_pool_recycle: 1800 sql_alchemy_reconnect_timeout: 30 parallelism: 32 dag_concurrency: 16 @@ -449,28 +472,47 @@ conf: fernet_key: fKp7omMJ4QlTxfZzVBSiyXVgeCK-6epRjGgMpEIsjvs= donot_pickle: "False" dagbag_import_timeout: 30 + # NOTE: Versions after 1.10 will change this to StandardTaskRunner task_runner: "BashTaskRunner" - default_impersonation: - security: + default_impersonation: "" + security: "" secure_mode: "True" unit_test_mode: "False" + task_log_reader: "task" enable_xcom_pickling: "False" killed_task_cleanup_time: 60 + dag_run_conf_overrides_params: "False" cli: api_client: airflow.api.client.local_client + # endpoint_url is extracted from endpoints by the configmap template api: auth_backend: airflow.api.auth.backend.default + lineage: + # Shipyard is not using this + backend: "" + atlas: + # Shipyard is not using this + sasl_enabled: "False" + host: "" + port: 21000 + username: "" + password: "" operators: default_owner: "Airflow" default_cpus: 1 default_ram: 512 default_disk: 512 - default_igpus: 0 + default_gpus: 0 + hive: + # Shipyard is not using this + default_hive_mapred_queue: "" webserver: + # base_url is extracted from endpoints by the configmap template web_server_host: 0.0.0.0 web_server_port: 8080 - web_server_ssl_cert: - web_server_ssl_key: + web_server_ssl_cert: "" + web_server_ssl_key: "" + web_server_master_timeout: 120 web_server_worker_timeout: 120 worker_refresh_batch_size: 1 worker_refresh_interval: 30 @@ -486,62 +528,130 @@ conf: dag_default_view: "tree" dag_orientation: "LR" demo_mode: "False" - log_fetch_timeout_sec: 5 + log_fetch_timeout_sec: 10 hide_paused_dags_by_default: "False" page_size: 100 + rbac: "False" + navbar_color: "#007A87" + default_dag_run_display_number: 25 email: + # Shipyard is not using this email_backend: airflow.utils.send_email_smtp smtp: + # Shipyard is not using this smtp_host: "localhost" smtp_starttls: "True" smtp_ssl: "False" smtp_user: "airflow" - smtp_port: 25 smtp_password: "airflow" + smtp_port: 25 smtp_mail_from: airflow@airflow.local celery: celery_app_name: airflow.executors.celery_executor worker_concurrency: 16 worker_log_server_port: 8793 + # broker_url is extracted from endpoints by the configmap template + # result_backend is extracted from endpoints by the configmap template flower_host: 0.0.0.0 flower_url_prefix: flower_port: 5555 default_queue: "default" celery_config_options: airflow.config_templates.default_celery.DEFAULT_CELERY_CONFIG + # TODO: Enable this for security + ssl_active: "False" + ssl_key: "" + ssl_cert: "" + ssl_cacert: "" celery_broker_transport_options: visibility_timeout: 21600 - ssl_active: "False" - ssl_key: - ssl_cert: - ssl_cacert: + dask: + # Shipyard is not using this + cluster_adresss: "127.0.0.1:8786" + tls_ca: "" + tls_cert: "" + tls_key: "" scheduler: job_heartbeat_sec: 5 scheduler_heartbeat_sec: 5 run_duration: -1 min_file_process_interval: 0 + min_file_parsing_loop_time: 1 dag_dir_list_interval: 300 - print_stats_interval: 30 + print_stats_interval: 120 child_process_log_directory: /usr/local/airflow/logs/scheduler scheduler_zombie_task_threshold: 300 catchup_by_default: "True" - max_tis_per_query: 0 + max_tis_per_query: 512 + statsd_on: "False" + statsd_host: "localhost" + statsd_port: 8125 + statsd_prefix: "airflow" max_threads: 2 authenticate: "False" ldap: - uri: - user_filter: - user_name_attr: - group_member_attr: - superuser_filter: - data_profiler_filter: - bind_user: - bind_password: - basedn: - cacert: - search_scope: + # Shipyard is not using this + uri: "" + user_filter: "objectClass=*" + user_name_attr: "uid" + group_member_attr: "memberOf" + superuser_filter: "" + data_profiler_filter: "" + bind_user: "cn=Manager,dc=example,dc=com" + bind_password: "insecure" + basedn: "dc=example,dc=com" + cacert: "/etc/ca/ldap_ca.crt" + search_scope: "LEVEL" + mesos: + # Shipyard is not using this + master: "" + framework_name: "" + task_cpu: "" + task_memory: "" + checkpoint: "" + authenticate: "" + kerberos: + # Shipyard is not using this + ccache: "" + principal: "" + reinit_frequency: "" + kinit_path: "" + keytab: "" + github_enterprise: + # Shipyard is not using this + api_rev: v3 admin: hide_sensitive_variable_fields: "True" - + elasticsearch: + # Shipyard is not using this + elasticsearch_host: "" + elasticsearch_log_id_template: "" + elasticsearch_end_of_log_mark: "" + kubernetes: + # Shipyard is not using this (maybe future for spawning own workers) + worker_container_repository: "" + worker_container_tag: "" + delete_worker_pods: "True" + namespace: "default" + airflow_configmap: "" + dags_volume_subpath: "" + dags_volume_claim: "" + logs_volume_subpath: "" + logs_volume_claim: "" + git_repo: "" + git_branch: "" + git_user: "" + git_password: "" + git_subpath: "" + git_sync_container_repository: "" + git_sync_container_tag: "" + git_sync_init_container_name: "" + worker_service_account_name: "" + image_pull_secrets: "" + gcp_service_account_keys: "" + in_cluster: "" + kubernetes_secrets: + #Shipyard is not using this + # End of Airflow config options pod: mounts: dag_path: /home/ubuntu/workbench/dags diff --git a/images/airflow/Dockerfile b/images/airflow/Dockerfile index 7f1cf1ad..6e495379 100644 --- a/images/airflow/Dockerfile +++ b/images/airflow/Dockerfile @@ -24,6 +24,7 @@ ENV container docker ENV WEB_PORT 8080 ENV FLOWER_PORT 5555 ENV WORKER_PORT 8793 +ENV SLUGIFY_USES_TEXT_UNIDECODE yes # Expose port for applications EXPOSE $WEB_PORT @@ -36,7 +37,7 @@ ARG DEBIAN_FRONTEND=noninteractive ARG ctx_base=src/bin # Kubectl version -ARG KUBECTL_VERSION=1.8.6 +ARG KUBECTL_VERSION=1.10.2 RUN set -ex && \ apt-get -qq update && \ diff --git a/images/airflow/requirements.txt b/images/airflow/requirements.txt index c195d35b..23fa3deb 100644 --- a/images/airflow/requirements.txt +++ b/images/airflow/requirements.txt @@ -12,18 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. -pytz==2017.2 -pyOpenSSL==17.3.0 -ndg-httpsclient==0.4.3 -pyasn1==0.3.6 -psycopg2==2.7.3.1 -docker-py==1.6.0 -apache-airflow[crypto,celery,postgres,hive,hdfs,jdbc]==1.9.0 -python-openstackclient==3.11.0 +pytz==2018.5 +pyOpenSSL==18.0.0 +ndg-httpsclient==0.5.1 +pyasn1==0.4.4 +psycopg2==2.7.5 +docker==3.5.0 +apache-airflow[crypto,celery,postgres,hive,hdfs,jdbc]==1.10.0 +python-openstackclient==3.16.1 kubernetes>=6.0.0 -# Dependencies for other Airship components -git+https://git.openstack.org/openstack/airship-deckhand@177675e96fffcda9799c68bbce831424c1167020#egg=deckhand -git+https://github.com/openstack/airship-drydock.git@8af92eaf29ca0dd6a129748c132ea7f6593eae83#egg=drydock_provisioner -git+https://github.com/openstack/airship-armada.git@7a2ba22ab12a3f1f180b6af4085972ba44853377#egg=armada +# Dependencies for other UCP components +git+https://git.openstack.org/openstack/airship-deckhand@d1701774266069dda6eab9e70c7851da7e97919a#egg=deckhand +git+https://git.openstack.org/openstack/airship-drydock.git@b1d24ad254c04cdbb4dc4e06f2bfe92c266aad70#egg=drydock_provisioner&subdirectory=python +git+https://git.openstack.org/openstack/airship-armada.git@90618f549c1f6d7741b11dc5c4898f3c6d536895#egg=armada diff --git a/src/bin/shipyard_airflow/requirements.txt b/src/bin/shipyard_airflow/requirements.txt index 8f3906db..1cc3d9ff 100644 --- a/src/bin/shipyard_airflow/requirements.txt +++ b/src/bin/shipyard_airflow/requirements.txt @@ -14,27 +14,27 @@ # API requirements -alembic==0.9.5 -arrow==0.10.0 +alembic==1.0.0 +arrow==0.12.1 configparser==3.5.0 cryptography==2.3 -falcon==1.2.0 +falcon==1.4.1 jsonschema==2.6.0 -keystoneauth1==3.4.0 -keystonemiddleware==4.21.0 +keystoneauth1==3.11.0 +keystonemiddleware==5.2.0 networkx==2.1 # common/deployment_group -oslo.config==5.2.0 -oslo.policy==1.33.1 +oslo.config==6.4.0 +oslo.policy==1.38.1 PasteDeploy==1.5.2 -psycopg2==2.7.3.1 -python-dateutil==2.6.1 -python-memcached==1.58 -requests==2.18.4 -setuptools==39.0.1 -SQLAlchemy==1.2.8 +psycopg2==2.7.4 +python-dateutil==2.7.3 +python-memcached==1.59 +requests==2.19.1 +setuptools==40.4.1 +SQLAlchemy==1.2.12 ulid==1.1 -uwsgi==2.0.15 +uwsgi==2.0.17 -# Dependencies for other Airship components -git+https://git.openstack.org/openstack/airship-deckhand@177675e96fffcda9799c68bbce831424c1167020#egg=deckhand -git+https://github.com/openstack/airship-drydock.git@8af92eaf29ca0dd6a129748c132ea7f6593eae83#egg=drydock_provisioner +# Dependencies for other UCP components +git+https://git.openstack.org/openstack/airship-deckhand@d1701774266069dda6eab9e70c7851da7e97919a#egg=deckhand +git+https://git.openstack.org/openstack/airship-drydock.git@b1d24ad254c04cdbb4dc4e06f2bfe92c266aad70#egg=drydock_provisioner&subdirectory=python diff --git a/src/bin/shipyard_airflow/test-requirements.txt b/src/bin/shipyard_airflow/test-requirements.txt index 10c0201c..3d2e6400 100644 --- a/src/bin/shipyard_airflow/test-requirements.txt +++ b/src/bin/shipyard_airflow/test-requirements.txt @@ -3,15 +3,15 @@ pytest==3.4 pytest-cov==2.5.1 responses==0.8.1 testfixtures==5.1.1 -apache-airflow[crypto,celery,postgres,hive,hdfs,jdbc]==1.9.0 +apache-airflow[crypto,celery,postgres,hive,hdfs,jdbc]==1.10.0 -# Testing - Client libraries for Airship components -git+https://github.com/openstack/airship-deckhand.git@3cdf3d2d896d43c6e3bc26170522c3eee0d7158f#egg=deckhand -git+https://github.com/openstack/airship-drydock.git@8af92eaf29ca0dd6a129748c132ea7f6593eae83#egg=drydock_provisioner -git+https://github.com/openstack/airship-armada.git@7a2ba22ab12a3f1f180b6af4085972ba44853377#egg=armada +# Testing - Client libraries for UCP components +git+https://git.openstack.org/openstack/airship-deckhand@d1701774266069dda6eab9e70c7851da7e97919a#egg=deckhand +git+https://git.openstack.org/openstack/airship-drydock.git@b1d24ad254c04cdbb4dc4e06f2bfe92c266aad70#egg=drydock_provisioner&subdirectory=python +git+https://git.openstack.org/openstack/airship-armada.git@90618f549c1f6d7741b11dc5c4898f3c6d536895#egg=armada # TODO(bryan-strassner) Pin to version for airflow when added to the # requirements.txt in the airflow images directory -git+https://github.com/openstack/airship-promenade.git@master#egg=promenade +git+https://git.openstack.org/openstack/airship-promenade.git@master#egg=promenade # Linting diff --git a/src/bin/shipyard_airflow/tests/unit/plugins/test_deployment_configuration_operator.py b/src/bin/shipyard_airflow/tests/unit/plugins/test_deployment_configuration_operator.py index 62c67c1e..c0b51d03 100644 --- a/src/bin/shipyard_airflow/tests/unit/plugins/test_deployment_configuration_operator.py +++ b/src/bin/shipyard_airflow/tests/unit/plugins/test_deployment_configuration_operator.py @@ -89,7 +89,7 @@ def test_get_revision_id(ti): shipyard_conf="shipyard.conf", task_id="t1") ti = airflow.models.TaskInstance(task=mock.MagicMock(), - execution_date="no") + execution_date=None) rid = dco.get_revision_id(ti) assert rid == 2 @@ -101,7 +101,8 @@ def test_get_revision_id_none(ti): dco = DeploymentConfigurationOperator(main_dag_name="main", shipyard_conf="shipyard.conf", task_id="t1") - ti = airflow.models.TaskInstance(task=mock.MagicMock(), execution_date="o") + ti = airflow.models.TaskInstance(task=mock.MagicMock(), + execution_date=None) with pytest.raises(AirflowException) as expected_exc: rid = dco.get_revision_id(ti) assert "Design_revision is not set." in str(expected_exc) diff --git a/src/bin/shipyard_airflow/tox.ini b/src/bin/shipyard_airflow/tox.ini index 41e2e503..67bdfd8d 100644 --- a/src/bin/shipyard_airflow/tox.ini +++ b/src/bin/shipyard_airflow/tox.ini @@ -9,6 +9,8 @@ deps = -r{toxinidir}/requirements.txt [testenv:py35] skipsdist=True +setenv= + SLUGIFY_USES_TEXT_UNIDECODE=yes commands = pytest \ {posargs} \ @@ -20,6 +22,8 @@ commands = [testenv:py36] skipsdist=True +setenv= + SLUGIFY_USES_TEXT_UNIDECODE=yes commands = pytest \ {posargs} \ diff --git a/src/bin/shipyard_client/requirements.txt b/src/bin/shipyard_client/requirements.txt index 1f6ac18d..e7e6a309 100644 --- a/src/bin/shipyard_client/requirements.txt +++ b/src/bin/shipyard_client/requirements.txt @@ -14,11 +14,11 @@ # CLI/Client requirements -arrow==0.10.0 +arrow==0.12.1 click==6.7 click-default-group==1.2 -keystoneauth1==3.4.0 -requests==2.18.4 -setuptools==39.0.1 +keystoneauth1==3.11.0 +requests==2.19.1 +setuptools==40.4.1 PTable==0.9.2 -pyyaml==3.12 +pyyaml==3.13