From 299b4d883daf44e175b2630d75d7e49d5c7e6bcb Mon Sep 17 00:00:00 2001 From: vshshjn7 Date: Sat, 14 Mar 2020 23:10:44 +0530 Subject: [PATCH] [TWTR] CP from 1.10+twtr (#35) * 99ee04017b27a28a6d74ae0c76ebd112b143b94f: CP from 1.10+twtr * 2e01c242ef7959c06a7541d99082ef186cf2d772: CP from 1.10.4 ([TWTR][AIRFLOW-4939] Fixup use of fallback kwarg in conf.getint) * 00cb4aebfc5aaa3abf796a204874363868b00729: [TWTR][AIRFLOW-XXXX] Cherry-pick d4a83bc and bump version (#21) * CP 51b1aee1ea0e6ce9642cfef22ad98d92eb1249ee: Relax version requiremets (#24) * CP 67a4d1c3542102bb9d40f84926e6504616a6ea62: [CX-16266] Change with reference to 1a4c164 commit in open source (#25) * CP 54bd09549cef4379000ab49a34bf17bf8fd8b3a2: [TWTR][CX-17516] Queue tasks already being handled by the executor (#26) * CP 87fcc1c54e409d1958476182c5289cc1ae7b8003: [TWTR][CX-17516] Requeue tasks in the queued state (#27) * CP 98a1ca97769d51afac559cc9c967939cd0954c58: [AIRFLOW-6625] Explicitly log using utf-8 encoding (#7247) (#31) * fixing models.py and jobs.py file fix after CP * fixing typo and version bump Co-authored-by: Vishesh Jain --- airflow/config_templates/default_airflow.cfg | 9 +- airflow/executors/base_executor.py | 4 +- airflow/jobs/scheduler_job.py | 21 +++- airflow/models/baseoperator.py | 2 +- airflow/models/dag.py | 6 ++ airflow/models/taskinstance.py | 1 + airflow/settings.py | 10 +- airflow/utils/dag_processing.py | 5 + airflow/utils/log/file_processor_handler.py | 2 + airflow/utils/log/file_task_handler.py | 2 +- airflow/version.py | 2 +- setup.py | 2 +- tests/test_sqlalchemy_config.py | 106 +++++++++++++++++++ 13 files changed, 160 insertions(+), 12 deletions(-) create mode 100644 tests/test_sqlalchemy_config.py diff --git a/airflow/config_templates/default_airflow.cfg b/airflow/config_templates/default_airflow.cfg index 156dced7be88a..ef286e6790e6a 100644 --- a/airflow/config_templates/default_airflow.cfg +++ b/airflow/config_templates/default_airflow.cfg @@ -126,6 +126,11 @@ sql_alchemy_reconnect_timeout = 300 # SqlAlchemy supports databases with the concept of multiple schemas. sql_alchemy_schema = +# Import path for connect args in SqlAlchemy. Default to an empty dict. +# This is useful when you want to configure db engine args that SqlAlchemy won't parse in connection string. +# See https://docs.sqlalchemy.org/en/13/core/engines.html#sqlalchemy.create_engine.params.connect_args +# sql_alchemy_connect_args = + # The amount of parallelism as a setting to the executor. This defines # the max number of task instances that should run simultaneously # on this airflow installation @@ -562,8 +567,8 @@ basedn = dc=example,dc=com cacert = /etc/ca/ldap_ca.crt search_scope = LEVEL -# This setting allows the use of LDAP servers that either return a -# broken schema, or do not return a schema. +# This setting allows the use of LDAP servers that either return a +# broken schema, or do not return a schema. ignore_malformed_schema = False [mesos] diff --git a/airflow/executors/base_executor.py b/airflow/executors/base_executor.py index 7c93be08a5a00..4787b9f021684 100644 --- a/airflow/executors/base_executor.py +++ b/airflow/executors/base_executor.py @@ -57,9 +57,9 @@ def queue_command(self, simple_task_instance, command, priority=1, queue=None): key = simple_task_instance.key if key not in self.queued_tasks and key not in self.running: self.log.info("Adding to queue: %s", command) - self.queued_tasks[key] = (command, priority, queue, simple_task_instance) else: - self.log.info("could not queue task %s", key) + self.log.info("Adding to queue even though already queued or running {}".format(command, key)) + self.queued_tasks[key] = (command, priority, queue, simple_task_instance) def queue_task_instance( self, diff --git a/airflow/jobs/scheduler_job.py b/airflow/jobs/scheduler_job.py index f2d0ae355c35f..0b3113fe7ae91 100644 --- a/airflow/jobs/scheduler_job.py +++ b/airflow/jobs/scheduler_job.py @@ -125,7 +125,10 @@ def _run_file_processor(result_channel, stdout = StreamLogWriter(log, logging.INFO) stderr = StreamLogWriter(log, logging.WARN) + log.info("Setting log context for file {}".format(file_path)) + # log file created here set_context(log, file_path) + log.info("Successfully set log context for file {}".format(file_path)) setproctitle("airflow scheduler - DagFileProcessor {}".format(file_path)) try: @@ -145,6 +148,7 @@ def _run_file_processor(result_channel, log.info("Started process (PID=%s) to work on %s", os.getpid(), file_path) scheduler_job = SchedulerJob(dag_ids=dag_id_white_list, log=log) + log.info("Processing file {}".format(file_path)) result = scheduler_job.process_file(file_path, pickle_dags) result_channel.send(result) end_time = time.time() @@ -167,6 +171,7 @@ def start(self): """ Launch the process and start processing the DAG. """ + self.log.info("Launching process to process DAG at {}".format(self.file_path)) self._parent_channel, _child_channel = multiprocessing.Pipe() self._process = multiprocessing.Process( target=type(self)._run_file_processor, @@ -983,10 +988,9 @@ def _find_executable_task_instances(self, simple_dag_bag, states, session=None): if self.executor.has_task(task_instance): self.log.debug( - "Not handling task %s as the executor reports it is running", + "Still handling task %s even though as the executor reports it is running", task_instance.key ) - continue executable_tis.append(task_instance) open_slots -= 1 dag_concurrency_map[dag_id] += 1 @@ -1405,8 +1409,17 @@ def _execute_helper(self): State.UP_FOR_RESCHEDULE], State.NONE) + scheduled_dag_ids = ", ".join(simple_dag_bag.dag_ids) + self.log.info('DAGs to be executed: {}'.format(scheduled_dag_ids)) + + # TODO(CX-17516): State.QUEUED has been added here which is a hack as the Celery + # Executor does not reliably enqueue tasks with the my MySQL broker, and we have + # seen tasks hang after they get queued. The effect of this hack is queued tasks + # will constantly be requeued and resent to the executor (Celery). + # This should be removed when we switch away from the MySQL Celery backend. self._execute_task_instances(simple_dag_bag, - (State.SCHEDULED,)) + (State.SCHEDULED, State.QUEUED)) + except Exception as e: self.log.error("Error queuing tasks") self.log.exception(e) @@ -1453,7 +1466,9 @@ def _execute_helper(self): sleep(sleep_length) # Stop any processors + self.log.info("Terminating DAG processors") self.processor_agent.terminate() + self.log.info("All DAG processors terminated") # Verify that all files were processed, and if so, deactivate DAGs that # haven't been touched by the scheduler as they likely have been diff --git a/airflow/models/baseoperator.py b/airflow/models/baseoperator.py index 76fb605f3e03c..2436621173720 100644 --- a/airflow/models/baseoperator.py +++ b/airflow/models/baseoperator.py @@ -349,7 +349,7 @@ def __init__( ) self._schedule_interval = schedule_interval self.retries = retries if retries is not None else \ - configuration.conf.getint('core', 'default_task_retries', fallback=0) + int(configuration.conf.get('core', 'default_task_retries', fallback=0)) self.queue = queue self.pool = pool self.sla = sla diff --git a/airflow/models/dag.py b/airflow/models/dag.py index a621ab88461cd..d3fd6f86a8a28 100644 --- a/airflow/models/dag.py +++ b/airflow/models/dag.py @@ -1289,6 +1289,8 @@ def sync_to_db(self, owner=None, sync_time=None, session=None): :return: None """ + self.log.info("Attempting to sync DAG {} to DB".format(self._dag_id)) + if owner is None: owner = self.owner if sync_time is None: @@ -1312,8 +1314,12 @@ def sync_to_db(self, owner=None, sync_time=None, session=None): session.merge(orm_dag) session.commit() + self.log.info("Synced DAG %s to DB", self._dag_id) + for subdag in self.subdags: + self.log.info("Syncing SubDAG %s", subdag._dag_id) subdag.sync_to_db(owner=owner, sync_time=sync_time, session=session) + self.log.info("Successfully synced SubDAG %s", subdag._dag_id) @staticmethod @provide_session diff --git a/airflow/models/taskinstance.py b/airflow/models/taskinstance.py index 6dfdc80a3a278..dc5ef238aeb07 100644 --- a/airflow/models/taskinstance.py +++ b/airflow/models/taskinstance.py @@ -941,6 +941,7 @@ def signal_handler(signum, frame): self.refresh_from_db(lock_for_update=True) self.state = State.SUCCESS except AirflowSkipException as e: + # This change is in reference to [AIRFLOW-5653][CX-16266] # log only if exception has any arguments to prevent log flooding if e.args: self.log.info(e) diff --git a/airflow/settings.py b/airflow/settings.py index 3c5eeb4a554dc..8b4574a3b95ad 100644 --- a/airflow/settings.py +++ b/airflow/settings.py @@ -36,6 +36,7 @@ from airflow.configuration import conf, AIRFLOW_HOME, WEBSERVER_CONFIG # NOQA F401 from airflow.contrib.kubernetes.pod import Pod from airflow.logging_config import configure_logging +from airflow.utils.module_loading import import_string from airflow.utils.sqlalchemy import setup_event_handlers log = logging.getLogger(__name__) @@ -222,7 +223,14 @@ def configure_orm(disable_connection_pool=False): # For Python2 we get back a newstr and need a str engine_args['encoding'] = engine_args['encoding'].__str__() - engine = create_engine(SQL_ALCHEMY_CONN, **engine_args) + if conf.has_option('core', 'sql_alchemy_connect_args'): + connect_args = import_string( + conf.get('core', 'sql_alchemy_connect_args') + ) + else: + connect_args = {} + + engine = create_engine(SQL_ALCHEMY_CONN, connect_args=connect_args, **engine_args) reconnect_timeout = conf.getint('core', 'SQL_ALCHEMY_RECONNECT_TIMEOUT') setup_event_handlers(engine, reconnect_timeout) diff --git a/airflow/utils/dag_processing.py b/airflow/utils/dag_processing.py index 061fac2cb0ac1..124636b615fb1 100644 --- a/airflow/utils/dag_processing.py +++ b/airflow/utils/dag_processing.py @@ -1209,10 +1209,15 @@ def heartbeat(self): processor.pid, file_path ) self._processors[file_path] = processor + + self.log.info("Number of active file processors: {}".format(len(self._processors))) # Update heartbeat count. self._run_count[self._heart_beat_key] += 1 + simple_dag_ids = ", ".join([simple_dag.dag_id for simple_dag in simple_dags]) + self.log.info("Processed DAGs: {}".format(simple_dag_ids)) + return simple_dags def _kill_timed_out_processors(self): diff --git a/airflow/utils/log/file_processor_handler.py b/airflow/utils/log/file_processor_handler.py index 098b018d47275..c0adef0a111fe 100644 --- a/airflow/utils/log/file_processor_handler.py +++ b/airflow/utils/log/file_processor_handler.py @@ -138,12 +138,14 @@ def _init_file(self, filename): if not os.path.exists(directory): try: + logging.info("Creating directory {}".format(directory)) os.makedirs(directory) except OSError: if not os.path.isdir(directory): raise if not os.path.exists(full_path): + logging.info("Creating file {}".format(full_path)) open(full_path, "a").close() return full_path diff --git a/airflow/utils/log/file_task_handler.py b/airflow/utils/log/file_task_handler.py index 111f26c279cf0..c1c32658a164a 100644 --- a/airflow/utils/log/file_task_handler.py +++ b/airflow/utils/log/file_task_handler.py @@ -52,7 +52,7 @@ def set_context(self, ti): :param ti: task instance object """ local_loc = self._init_file(ti) - self.handler = logging.FileHandler(local_loc) + self.handler = logging.FileHandler(local_loc, encoding='utf-8') self.handler.setFormatter(self.formatter) self.handler.setLevel(self.level) diff --git a/airflow/version.py b/airflow/version.py index a0a223840b9c2..e107956fb9fd0 100644 --- a/airflow/version.py +++ b/airflow/version.py @@ -18,4 +18,4 @@ # under the License. # -version = '1.10.4+twtr2' +version = '1.10.4+twtr3' diff --git a/setup.py b/setup.py index ed6548fc787ce..c6b690528cf84 100644 --- a/setup.py +++ b/setup.py @@ -339,7 +339,7 @@ def do_setup(): 'flask-login>=0.3, <0.5', 'flask-swagger==0.2.13', 'flask-wtf>=0.14.2, <0.15', - 'funcsigs==1.0.0', + 'funcsigs==1.0.0, <2.0.0', 'future>=0.16.0, <0.17', 'gunicorn>=19.5.0, <20.0', 'iso8601>=0.1.12', diff --git a/tests/test_sqlalchemy_config.py b/tests/test_sqlalchemy_config.py new file mode 100644 index 0000000000000..b908ce2537889 --- /dev/null +++ b/tests/test_sqlalchemy_config.py @@ -0,0 +1,106 @@ +# -*- coding: utf-8 -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import unittest + +from sqlalchemy.pool import NullPool + +from airflow import settings +from tests.compat import patch +from tests.test_utils.config import conf_vars + +SQL_ALCHEMY_CONNECT_ARGS = { + 'test': 43503, + 'dict': { + 'is': 1, + 'supported': 'too' + } +} + + +class TestSqlAlchemySettings(unittest.TestCase): + def setUp(self): + self.old_engine = settings.engine + self.old_session = settings.Session + self.old_conn = settings.SQL_ALCHEMY_CONN + settings.SQL_ALCHEMY_CONN = "mysql+foobar://user:pass@host/dbname?inline=param&another=param" + + def tearDown(self): + settings.engine = self.old_engine + settings.Session = self.old_session + settings.SQL_ALCHEMY_CONN = self.old_conn + + @patch('airflow.settings.setup_event_handlers') + @patch('airflow.settings.scoped_session') + @patch('airflow.settings.sessionmaker') + @patch('airflow.settings.create_engine') + def test_configure_orm_with_default_values(self, + mock_create_engine, + mock_sessionmaker, + mock_scoped_session, + mock_setup_event_handlers): + settings.configure_orm() + mock_create_engine.assert_called_once_with( + settings.SQL_ALCHEMY_CONN, + connect_args={}, + encoding='utf-8', + max_overflow=10, + pool_pre_ping=True, + pool_recycle=1800, + pool_size=5 + ) + + @patch('airflow.settings.setup_event_handlers') + @patch('airflow.settings.scoped_session') + @patch('airflow.settings.sessionmaker') + @patch('airflow.settings.create_engine') + def test_sql_alchemy_connect_args(self, + mock_create_engine, + mock_sessionmaker, + mock_scoped_session, + mock_setup_event_handlers): + config = { + ('core', 'sql_alchemy_connect_args'): 'tests.test_sqlalchemy_config.SQL_ALCHEMY_CONNECT_ARGS', + ('core', 'sql_alchemy_pool_enabled'): 'False' + } + with conf_vars(config): + settings.configure_orm() + mock_create_engine.assert_called_once_with( + settings.SQL_ALCHEMY_CONN, + connect_args=SQL_ALCHEMY_CONNECT_ARGS, + poolclass=NullPool, + encoding='utf-8' + ) + + @patch('airflow.settings.setup_event_handlers') + @patch('airflow.settings.scoped_session') + @patch('airflow.settings.sessionmaker') + @patch('airflow.settings.create_engine') + def test_sql_alchemy_invalid_connect_args(self, + mock_create_engine, + mock_sessionmaker, + mock_scoped_session, + mock_setup_event_handlers): + config = { + ('core', 'sql_alchemy_connect_args'): 'does.not.exist', + ('core', 'sql_alchemy_pool_enabled'): 'False' + } + with self.assertRaises(ImportError): + with conf_vars(config): + settings.configure_orm()