-
Notifications
You must be signed in to change notification settings - Fork 3.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Replace rabbitmq with redis #6034
Changes from all commits
c8eeaca
355fb12
558e928
2a2c34f
3fec697
50b56aa
dc6c353
5818dcc
0883739
feac93f
3f2d757
ea29f4b
403e9bb
be58906
e94bb44
45ce6d7
c06b630
03b7302
f5193e5
3c5c9c6
14320bc
3b9e67e
0da94ad
b6b9802
d6594ab
8350bb3
e25bd93
093d204
9e5fe7f
2b59af3
3f5e2a3
d58df0f
770b457
1caa2e0
b58c71b
59c9de2
89163f2
18f5dd6
87de0cf
7c3cbe6
f1ee963
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,10 +5,12 @@ | |
import inspect | ||
import logging | ||
import time | ||
import uuid | ||
import urllib.parse | ||
|
||
# Django | ||
from django.conf import settings | ||
from django.core.cache import cache | ||
from django.db import connection | ||
from django.db.models.fields import FieldDoesNotExist | ||
from django.db.models.fields.related import OneToOneRel | ||
|
@@ -973,14 +975,19 @@ def post(self, request, *args, **kwargs): | |
if hasattr(new_obj, 'admin_role') and request.user not in new_obj.admin_role.members.all(): | ||
new_obj.admin_role.members.add(request.user) | ||
if sub_objs: | ||
# store the copied object dict into memcached, because it's | ||
# often too large for postgres' notification bus | ||
# (which has a default maximum message size of 8k) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is that not a problem for other stuff, like job events? and how does this work on a cluster where a different instance may be performing the deep copy? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This limitation is specific to only dispatcher tasks (which use postgres listen/notify). Job events use redis. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
see: https://github.com/ansible/awx/pull/6034/files#diff-9d4ea1dd908b35fb92eaede4bd10bb46R2856 We made it so that this task is routed to dispatchers on the same instance as uwsgi (they don't get distributed to other nodes). |
||
key = 'deep-copy-{}'.format(str(uuid.uuid4())) | ||
cache.set(key, sub_objs, timeout=3600) | ||
permission_check_func = None | ||
if hasattr(type(self), 'deep_copy_permission_check_func'): | ||
permission_check_func = ( | ||
type(self).__module__, type(self).__name__, 'deep_copy_permission_check_func' | ||
) | ||
trigger_delayed_deep_copy( | ||
self.model.__module__, self.model.__name__, | ||
obj.pk, new_obj.pk, request.user.pk, sub_objs, | ||
obj.pk, new_obj.pk, request.user.pk, key, | ||
permission_check_func=permission_check_func | ||
) | ||
serializer = self._get_copy_return_serializer(new_obj) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
import datetime | ||
import asyncio | ||
import logging | ||
import aioredis | ||
import redis | ||
import re | ||
|
||
from prometheus_client import ( | ||
generate_latest, | ||
Gauge, | ||
Counter, | ||
Enum, | ||
CollectorRegistry, | ||
) | ||
|
||
from django.conf import settings | ||
|
||
|
||
BROADCAST_WEBSOCKET_REDIS_KEY_NAME = 'broadcast_websocket_stats' | ||
|
||
|
||
logger = logging.getLogger('awx.main.analytics.broadcast_websocket') | ||
|
||
|
||
def dt_to_seconds(dt): | ||
return int((dt - datetime.datetime(1970,1,1)).total_seconds()) | ||
|
||
|
||
def now_seconds(): | ||
return dt_to_seconds(datetime.datetime.now()) | ||
|
||
|
||
# Second granularity; Per-minute | ||
class FixedSlidingWindow(): | ||
def __init__(self, start_time=None): | ||
self.buckets = dict() | ||
self.start_time = start_time or now_seconds() | ||
|
||
def cleanup(self, now_bucket=None): | ||
now_bucket = now_bucket or now_seconds() | ||
if self.start_time + 60 <= now_bucket: | ||
self.start_time = now_bucket + 60 + 1 | ||
|
||
# Delete old entries | ||
for k in list(self.buckets.keys()): | ||
if k < self.start_time: | ||
del self.buckets[k] | ||
|
||
def record(self, ts=None): | ||
ts = ts or datetime.datetime.now() | ||
now_bucket = int((ts - datetime.datetime(1970,1,1)).total_seconds()) | ||
|
||
val = self.buckets.get(now_bucket, 0) | ||
self.buckets[now_bucket] = val + 1 | ||
|
||
self.cleanup(now_bucket) | ||
|
||
def render(self): | ||
self.cleanup() | ||
return sum(self.buckets.values()) or 0 | ||
|
||
|
||
class BroadcastWebsocketStatsManager(): | ||
def __init__(self, event_loop, local_hostname): | ||
self._local_hostname = local_hostname | ||
|
||
self._event_loop = event_loop | ||
self._stats = dict() | ||
self._redis_key = BROADCAST_WEBSOCKET_REDIS_KEY_NAME | ||
|
||
def new_remote_host_stats(self, remote_hostname): | ||
self._stats[remote_hostname] = BroadcastWebsocketStats(self._local_hostname, | ||
remote_hostname) | ||
return self._stats[remote_hostname] | ||
|
||
def delete_remote_host_stats(self, remote_hostname): | ||
del self._stats[remote_hostname] | ||
|
||
async def run_loop(self): | ||
try: | ||
redis_conn = await aioredis.create_redis_pool(settings.BROKER_URL) | ||
while True: | ||
stats_data_str = ''.join(stat.serialize() for stat in self._stats.values()) | ||
await redis_conn.set(self._redis_key, stats_data_str) | ||
|
||
await asyncio.sleep(settings.BROADCAST_WEBSOCKET_STATS_POLL_RATE_SECONDS) | ||
except Exception as e: | ||
logger.warn(e) | ||
await asyncio.sleep(settings.BROADCAST_WEBSOCKET_STATS_POLL_RATE_SECONDS) | ||
self.start() | ||
|
||
def start(self): | ||
self.async_task = self._event_loop.create_task(self.run_loop()) | ||
return self.async_task | ||
|
||
@classmethod | ||
def get_stats_sync(cls): | ||
''' | ||
Stringified verion of all the stats | ||
''' | ||
redis_conn = redis.Redis.from_url(settings.BROKER_URL) | ||
return redis_conn.get(BROADCAST_WEBSOCKET_REDIS_KEY_NAME) | ||
|
||
|
||
class BroadcastWebsocketStats(): | ||
def __init__(self, local_hostname, remote_hostname): | ||
self._local_hostname = local_hostname | ||
self._remote_hostname = remote_hostname | ||
self._registry = CollectorRegistry() | ||
|
||
# TODO: More robust replacement | ||
self.name = self.safe_name(self._local_hostname) | ||
self.remote_name = self.safe_name(self._remote_hostname) | ||
|
||
self._messages_received_total = Counter(f'awx_{self.remote_name}_messages_received_total', | ||
'Number of messages received, to be forwarded, by the broadcast websocket system', | ||
registry=self._registry) | ||
self._messages_received = Gauge(f'awx_{self.remote_name}_messages_received', | ||
'Number forwarded messages received by the broadcast websocket system, for the duration of the current connection', | ||
registry=self._registry) | ||
self._connection = Enum(f'awx_{self.remote_name}_connection', | ||
'Websocket broadcast connection', | ||
states=['disconnected', 'connected'], | ||
registry=self._registry) | ||
self._connection_start = Gauge(f'awx_{self.remote_name}_connection_start', | ||
'Time the connection was established', | ||
registry=self._registry) | ||
|
||
self._messages_received_per_minute = Gauge(f'awx_{self.remote_name}_messages_received_per_minute', | ||
'Messages received per minute', | ||
registry=self._registry) | ||
self._internal_messages_received_per_minute = FixedSlidingWindow() | ||
|
||
def safe_name(self, s): | ||
# Replace all non alpha-numeric characters with _ | ||
return re.sub('[^0-9a-zA-Z]+', '_', s) | ||
|
||
def unregister(self): | ||
self._registry.unregister(f'awx_{self.remote_name}_messages_received') | ||
self._registry.unregister(f'awx_{self.remote_name}_connection') | ||
|
||
def record_message_received(self): | ||
self._internal_messages_received_per_minute.record() | ||
self._messages_received.inc() | ||
self._messages_received_total.inc() | ||
|
||
def record_connection_established(self): | ||
self._connection.state('connected') | ||
self._connection_start.set_to_current_time() | ||
self._messages_received.set(0) | ||
|
||
def record_connection_lost(self): | ||
self._connection.state('disconnected') | ||
|
||
def get_connection_duration(self): | ||
return (datetime.datetime.now() - self._connection_established_ts).total_seconds() | ||
|
||
def render(self): | ||
msgs_per_min = self._internal_messages_received_per_minute.render() | ||
self._messages_received_per_minute.set(msgs_per_min) | ||
|
||
def serialize(self): | ||
self.render() | ||
|
||
registry_data = generate_latest(self._registry).decode('UTF-8') | ||
return registry_data |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Did you make sure nobody on the eng team is still using this target? I wouldn't think so, given that we're now running everything in
supervisord
, which is much more flexible.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hey, I still use this occasionally :(