Skip to content

Commit

Permalink
[TWTR] CP from 1.10+twtr (twitter-forks#35)
Browse files Browse the repository at this point in the history
* 99ee040: CP from 1.10+twtr

* 2e01c24: CP from 1.10.4 ([TWTR][AIRFLOW-4939] Fixup use of fallback kwarg in conf.getint)

* 00cb4ae: [TWTR][AIRFLOW-XXXX] Cherry-pick d4a83bc and bump version (twitter-forks#21)

* CP 51b1aee: Relax version requiremets (twitter-forks#24)

* CP 67a4d1c: [CX-16266] Change with reference to 1a4c164 commit in open source (twitter-forks#25)

* CP 54bd095: [TWTR][CX-17516] Queue tasks already being handled by the executor (twitter-forks#26)

* CP 87fcc1c: [TWTR][CX-17516] Requeue tasks in the queued state (twitter-forks#27)

* CP 98a1ca9: [AIRFLOW-6625] Explicitly log using utf-8 encoding (apache#7247) (twitter-forks#31)

* fixing models.py and jobs.py file fix after CP

* fixing typo and version bump

Co-authored-by: Vishesh Jain <[email protected]>
  • Loading branch information
vshshjn7 and Vishesh Jain authored Mar 14, 2020
1 parent 4ce8d4c commit 299b4d8
Show file tree
Hide file tree
Showing 13 changed files with 160 additions and 12 deletions.
9 changes: 7 additions & 2 deletions airflow/config_templates/default_airflow.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,11 @@ sql_alchemy_reconnect_timeout = 300
# SqlAlchemy supports databases with the concept of multiple schemas.
sql_alchemy_schema =

# Import path for connect args in SqlAlchemy. Default to an empty dict.
# This is useful when you want to configure db engine args that SqlAlchemy won't parse in connection string.
# See https://docs.sqlalchemy.org/en/13/core/engines.html#sqlalchemy.create_engine.params.connect_args
# sql_alchemy_connect_args =

# The amount of parallelism as a setting to the executor. This defines
# the max number of task instances that should run simultaneously
# on this airflow installation
Expand Down Expand Up @@ -562,8 +567,8 @@ basedn = dc=example,dc=com
cacert = /etc/ca/ldap_ca.crt
search_scope = LEVEL

# This setting allows the use of LDAP servers that either return a
# broken schema, or do not return a schema.
# This setting allows the use of LDAP servers that either return a
# broken schema, or do not return a schema.
ignore_malformed_schema = False

[mesos]
Expand Down
4 changes: 2 additions & 2 deletions airflow/executors/base_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,9 @@ def queue_command(self, simple_task_instance, command, priority=1, queue=None):
key = simple_task_instance.key
if key not in self.queued_tasks and key not in self.running:
self.log.info("Adding to queue: %s", command)
self.queued_tasks[key] = (command, priority, queue, simple_task_instance)
else:
self.log.info("could not queue task %s", key)
self.log.info("Adding to queue even though already queued or running {}".format(command, key))
self.queued_tasks[key] = (command, priority, queue, simple_task_instance)

def queue_task_instance(
self,
Expand Down
21 changes: 18 additions & 3 deletions airflow/jobs/scheduler_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,10 @@ def _run_file_processor(result_channel,
stdout = StreamLogWriter(log, logging.INFO)
stderr = StreamLogWriter(log, logging.WARN)

log.info("Setting log context for file {}".format(file_path))
# log file created here
set_context(log, file_path)
log.info("Successfully set log context for file {}".format(file_path))
setproctitle("airflow scheduler - DagFileProcessor {}".format(file_path))

try:
Expand All @@ -145,6 +148,7 @@ def _run_file_processor(result_channel,
log.info("Started process (PID=%s) to work on %s",
os.getpid(), file_path)
scheduler_job = SchedulerJob(dag_ids=dag_id_white_list, log=log)
log.info("Processing file {}".format(file_path))
result = scheduler_job.process_file(file_path, pickle_dags)
result_channel.send(result)
end_time = time.time()
Expand All @@ -167,6 +171,7 @@ def start(self):
"""
Launch the process and start processing the DAG.
"""
self.log.info("Launching process to process DAG at {}".format(self.file_path))
self._parent_channel, _child_channel = multiprocessing.Pipe()
self._process = multiprocessing.Process(
target=type(self)._run_file_processor,
Expand Down Expand Up @@ -983,10 +988,9 @@ def _find_executable_task_instances(self, simple_dag_bag, states, session=None):

if self.executor.has_task(task_instance):
self.log.debug(
"Not handling task %s as the executor reports it is running",
"Still handling task %s even though as the executor reports it is running",
task_instance.key
)
continue
executable_tis.append(task_instance)
open_slots -= 1
dag_concurrency_map[dag_id] += 1
Expand Down Expand Up @@ -1405,8 +1409,17 @@ def _execute_helper(self):
State.UP_FOR_RESCHEDULE],
State.NONE)

scheduled_dag_ids = ", ".join(simple_dag_bag.dag_ids)
self.log.info('DAGs to be executed: {}'.format(scheduled_dag_ids))

# TODO(CX-17516): State.QUEUED has been added here which is a hack as the Celery
# Executor does not reliably enqueue tasks with the my MySQL broker, and we have
# seen tasks hang after they get queued. The effect of this hack is queued tasks
# will constantly be requeued and resent to the executor (Celery).
# This should be removed when we switch away from the MySQL Celery backend.
self._execute_task_instances(simple_dag_bag,
(State.SCHEDULED,))
(State.SCHEDULED, State.QUEUED))

except Exception as e:
self.log.error("Error queuing tasks")
self.log.exception(e)
Expand Down Expand Up @@ -1453,7 +1466,9 @@ def _execute_helper(self):
sleep(sleep_length)

# Stop any processors
self.log.info("Terminating DAG processors")
self.processor_agent.terminate()
self.log.info("All DAG processors terminated")

# Verify that all files were processed, and if so, deactivate DAGs that
# haven't been touched by the scheduler as they likely have been
Expand Down
2 changes: 1 addition & 1 deletion airflow/models/baseoperator.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,7 @@ def __init__(
)
self._schedule_interval = schedule_interval
self.retries = retries if retries is not None else \
configuration.conf.getint('core', 'default_task_retries', fallback=0)
int(configuration.conf.get('core', 'default_task_retries', fallback=0))
self.queue = queue
self.pool = pool
self.sla = sla
Expand Down
6 changes: 6 additions & 0 deletions airflow/models/dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -1289,6 +1289,8 @@ def sync_to_db(self, owner=None, sync_time=None, session=None):
:return: None
"""

self.log.info("Attempting to sync DAG {} to DB".format(self._dag_id))

if owner is None:
owner = self.owner
if sync_time is None:
Expand All @@ -1312,8 +1314,12 @@ def sync_to_db(self, owner=None, sync_time=None, session=None):
session.merge(orm_dag)
session.commit()

self.log.info("Synced DAG %s to DB", self._dag_id)

for subdag in self.subdags:
self.log.info("Syncing SubDAG %s", subdag._dag_id)
subdag.sync_to_db(owner=owner, sync_time=sync_time, session=session)
self.log.info("Successfully synced SubDAG %s", subdag._dag_id)

@staticmethod
@provide_session
Expand Down
1 change: 1 addition & 0 deletions airflow/models/taskinstance.py
Original file line number Diff line number Diff line change
Expand Up @@ -941,6 +941,7 @@ def signal_handler(signum, frame):
self.refresh_from_db(lock_for_update=True)
self.state = State.SUCCESS
except AirflowSkipException as e:
# This change is in reference to [AIRFLOW-5653][CX-16266]
# log only if exception has any arguments to prevent log flooding
if e.args:
self.log.info(e)
Expand Down
10 changes: 9 additions & 1 deletion airflow/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from airflow.configuration import conf, AIRFLOW_HOME, WEBSERVER_CONFIG # NOQA F401
from airflow.contrib.kubernetes.pod import Pod
from airflow.logging_config import configure_logging
from airflow.utils.module_loading import import_string
from airflow.utils.sqlalchemy import setup_event_handlers

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -222,7 +223,14 @@ def configure_orm(disable_connection_pool=False):
# For Python2 we get back a newstr and need a str
engine_args['encoding'] = engine_args['encoding'].__str__()

engine = create_engine(SQL_ALCHEMY_CONN, **engine_args)
if conf.has_option('core', 'sql_alchemy_connect_args'):
connect_args = import_string(
conf.get('core', 'sql_alchemy_connect_args')
)
else:
connect_args = {}

engine = create_engine(SQL_ALCHEMY_CONN, connect_args=connect_args, **engine_args)
reconnect_timeout = conf.getint('core', 'SQL_ALCHEMY_RECONNECT_TIMEOUT')
setup_event_handlers(engine, reconnect_timeout)

Expand Down
5 changes: 5 additions & 0 deletions airflow/utils/dag_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1209,10 +1209,15 @@ def heartbeat(self):
processor.pid, file_path
)
self._processors[file_path] = processor

self.log.info("Number of active file processors: {}".format(len(self._processors)))

# Update heartbeat count.
self._run_count[self._heart_beat_key] += 1

simple_dag_ids = ", ".join([simple_dag.dag_id for simple_dag in simple_dags])
self.log.info("Processed DAGs: {}".format(simple_dag_ids))

return simple_dags

def _kill_timed_out_processors(self):
Expand Down
2 changes: 2 additions & 0 deletions airflow/utils/log/file_processor_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,12 +138,14 @@ def _init_file(self, filename):

if not os.path.exists(directory):
try:
logging.info("Creating directory {}".format(directory))
os.makedirs(directory)
except OSError:
if not os.path.isdir(directory):
raise

if not os.path.exists(full_path):
logging.info("Creating file {}".format(full_path))
open(full_path, "a").close()

return full_path
2 changes: 1 addition & 1 deletion airflow/utils/log/file_task_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def set_context(self, ti):
:param ti: task instance object
"""
local_loc = self._init_file(ti)
self.handler = logging.FileHandler(local_loc)
self.handler = logging.FileHandler(local_loc, encoding='utf-8')
self.handler.setFormatter(self.formatter)
self.handler.setLevel(self.level)

Expand Down
2 changes: 1 addition & 1 deletion airflow/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,4 @@
# under the License.
#

version = '1.10.4+twtr2'
version = '1.10.4+twtr3'
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ def do_setup():
'flask-login>=0.3, <0.5',
'flask-swagger==0.2.13',
'flask-wtf>=0.14.2, <0.15',
'funcsigs==1.0.0',
'funcsigs==1.0.0, <2.0.0',
'future>=0.16.0, <0.17',
'gunicorn>=19.5.0, <20.0',
'iso8601>=0.1.12',
Expand Down
106 changes: 106 additions & 0 deletions tests/test_sqlalchemy_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
# -*- coding: utf-8 -*-
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

import unittest

from sqlalchemy.pool import NullPool

from airflow import settings
from tests.compat import patch
from tests.test_utils.config import conf_vars

SQL_ALCHEMY_CONNECT_ARGS = {
'test': 43503,
'dict': {
'is': 1,
'supported': 'too'
}
}


class TestSqlAlchemySettings(unittest.TestCase):
def setUp(self):
self.old_engine = settings.engine
self.old_session = settings.Session
self.old_conn = settings.SQL_ALCHEMY_CONN
settings.SQL_ALCHEMY_CONN = "mysql+foobar://user:pass@host/dbname?inline=param&another=param"

def tearDown(self):
settings.engine = self.old_engine
settings.Session = self.old_session
settings.SQL_ALCHEMY_CONN = self.old_conn

@patch('airflow.settings.setup_event_handlers')
@patch('airflow.settings.scoped_session')
@patch('airflow.settings.sessionmaker')
@patch('airflow.settings.create_engine')
def test_configure_orm_with_default_values(self,
mock_create_engine,
mock_sessionmaker,
mock_scoped_session,
mock_setup_event_handlers):
settings.configure_orm()
mock_create_engine.assert_called_once_with(
settings.SQL_ALCHEMY_CONN,
connect_args={},
encoding='utf-8',
max_overflow=10,
pool_pre_ping=True,
pool_recycle=1800,
pool_size=5
)

@patch('airflow.settings.setup_event_handlers')
@patch('airflow.settings.scoped_session')
@patch('airflow.settings.sessionmaker')
@patch('airflow.settings.create_engine')
def test_sql_alchemy_connect_args(self,
mock_create_engine,
mock_sessionmaker,
mock_scoped_session,
mock_setup_event_handlers):
config = {
('core', 'sql_alchemy_connect_args'): 'tests.test_sqlalchemy_config.SQL_ALCHEMY_CONNECT_ARGS',
('core', 'sql_alchemy_pool_enabled'): 'False'
}
with conf_vars(config):
settings.configure_orm()
mock_create_engine.assert_called_once_with(
settings.SQL_ALCHEMY_CONN,
connect_args=SQL_ALCHEMY_CONNECT_ARGS,
poolclass=NullPool,
encoding='utf-8'
)

@patch('airflow.settings.setup_event_handlers')
@patch('airflow.settings.scoped_session')
@patch('airflow.settings.sessionmaker')
@patch('airflow.settings.create_engine')
def test_sql_alchemy_invalid_connect_args(self,
mock_create_engine,
mock_sessionmaker,
mock_scoped_session,
mock_setup_event_handlers):
config = {
('core', 'sql_alchemy_connect_args'): 'does.not.exist',
('core', 'sql_alchemy_pool_enabled'): 'False'
}
with self.assertRaises(ImportError):
with conf_vars(config):
settings.configure_orm()

0 comments on commit 299b4d8

Please sign in to comment.