Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MA-157 Move context back into properties. #6172

Merged
merged 1 commit into from
Dec 10, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 23 additions & 19 deletions common/djangoapps/track/views/segmentio.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
ERROR_MISSING_USER_ID = 'Required user_id missing from context'
ERROR_USER_NOT_EXIST = 'Specified user does not exist'
ERROR_INVALID_USER_ID = 'Unable to parse userId as an integer'
ERROR_MISSING_DATA = 'The data field must be specified in the properties dictionary'
ERROR_MISSING_NAME = 'The name field must be specified in the properties dictionary'
ERROR_MISSING_TIMESTAMP = 'Required timestamp field not found'
ERROR_MISSING_RECEIVED_AT = 'Required receivedAt field not found'
Expand Down Expand Up @@ -119,7 +120,7 @@ def track_segmentio_event(request): # pylint: disable=too-many-statements
full_segment_event = request.json

# We mostly care about the properties
segment_event = full_segment_event.get('properties', {})
segment_properties = full_segment_event.get('properties', {})

# Start with the context provided by segment.io in the "client" field if it exists
# We should tightly control which fields actually get included in the event emitted.
Expand All @@ -136,32 +137,38 @@ def track_segmentio_event(request): # pylint: disable=too-many-statements
else:
context['event_source'] = event_source

# Ignore types that are unsupported
if 'name' not in segment_properties:
raise EventValidationError(ERROR_MISSING_NAME)

if 'data' not in segment_properties:
raise EventValidationError(ERROR_MISSING_DATA)

# Ignore event types and names that are unsupported
segment_event_type = full_segment_event.get('type')
segment_event_name = segment_properties['name']
allowed_types = [a.lower() for a in getattr(settings, 'TRACKING_SEGMENTIO_ALLOWED_TYPES', [])]
if not segment_event_type or segment_event_type.lower() not in allowed_types:
disallowed_substring_names = [
a.lower() for a in getattr(settings, 'TRACKING_SEGMENTIO_DISALLOWED_SUBSTRING_NAMES', [])
]
if (
not segment_event_type or
(segment_event_type.lower() not in allowed_types) or
any(disallowed_subs_name in segment_event_name.lower() for disallowed_subs_name in disallowed_substring_names)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would prefer we had a different error message for this since I see the wrong type and a filtered name as two very different problems.

In fact, we may not want to log any warnings for an ignored name since that will likely cause a lot of log noise.

We don't expect to receive events with an unhandled type very often.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mulby There will also be many events with types other than "track". In particular, all "screen" and "identity" event types are ignored and are listed in the logs.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So should we not raise an error for disallowed Types as well?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mulby As discussed, we'll keep this as an error message for now, in case it will be helpful to debug any webhook issues on prod. We will remove this error message a few weeks after enabling on prod.

):
raise EventValidationError(WARNING_IGNORED_TYPE)

if segment_context:
# copy required fields from segment's context dict to our custom context dict
for context_field_name, segment_context_field_name in [
('course_id', 'course_id'),
('open_in_browser_url', 'open_in_browser_url'),
('agent', 'userAgent')
]:
if segment_context_field_name in segment_context:
context[context_field_name] = segment_context[segment_context_field_name]

# copy the entire segment's context dict as a sub-field of our custom context dict
context['client'] = dict(segment_context)
context['agent'] = segment_context.get('userAgent', '')

# remove duplicate and unnecessary fields from our copy
for field in ('traits', 'integrations', 'userAgent', 'course_id', 'open_in_browser_url'):
for field in ('traits', 'integrations', 'userAgent'):
if field in context['client']:
del context['client'][field]

# Overlay any context provided in the properties
context.update(segment_event.get('context', {}))
context.update(segment_properties.get('context', {}))

user_id = full_segment_event.get('userId')
if not user_id:
Expand Down Expand Up @@ -203,13 +210,10 @@ def track_segmentio_event(request): # pylint: disable=too-many-statements
else:
raise EventValidationError(ERROR_MISSING_RECEIVED_AT)

if 'name' not in segment_event:
raise EventValidationError(ERROR_MISSING_NAME)

context['ip'] = segment_event.get('context', {}).get('ip', '')
context['ip'] = segment_properties.get('context', {}).get('ip', '')

with tracker.get_tracker().context('edx.segmentio', context):
tracker.emit(segment_event['name'], segment_event.get('data', {}))
tracker.emit(segment_event_name, segment_properties.get('data', {}))


def parse_iso8601_timestamp(timestamp):
Expand Down
39 changes: 28 additions & 11 deletions common/djangoapps/track/views/tests/test_segmentio.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def test_decorated(self, *args, **kwargs):
TRACKING_SEGMENTIO_WEBHOOK_SECRET=SECRET,
TRACKING_IGNORE_URL_PATTERNS=[ENDPOINT],
TRACKING_SEGMENTIO_ALLOWED_TYPES=['track'],
TRACKING_SEGMENTIO_DISALLOWED_SUBSTRING_NAMES=['.bi.'],
TRACKING_SEGMENTIO_SOURCE_MAP={'test-app': 'mobile'},
EVENT_TRACKING_PROCESSORS=MOBILE_SHIM_PROCESSOR,
)
Expand Down Expand Up @@ -97,6 +98,11 @@ def create_request(self, key=None, **kwargs):
def test_segmentio_ignore_actions(self, action):
self.post_segmentio_event(action=action)

@data('edx.bi.some_name', 'EDX.BI.CAPITAL_NAME')
@expect_failure_with_message(segmentio.WARNING_IGNORED_TYPE)
def test_segmentio_ignore_names(self, name):
self.post_segmentio_event(name=name)

def post_segmentio_event(self, **kwargs):
"""Post a fake segment.io event to the view that processes it"""
request = self.create_request(
Expand All @@ -114,6 +120,9 @@ def create_segmentio_event(self, **kwargs):
"properties": {
'name': kwargs.get('name', str(sentinel.name)),
'data': kwargs.get('data', {}),
'context': {
'course_id': kwargs.get('course_id') or '',
}
},
"channel": 'server',
"context": {
Expand All @@ -122,7 +131,6 @@ def create_segmentio_event(self, **kwargs):
"version": "unknown"
},
'userAgent': str(sentinel.user_agent),
'course_id': kwargs.get('course_id') or '',
},
"receivedAt": "2014-08-27T16:33:39.100Z",
"timestamp": "2014-08-27T16:33:39.215Z",
Expand All @@ -139,10 +147,7 @@ def create_segmentio_event(self, **kwargs):
}

if 'context' in kwargs:
sample_event['context'].update(kwargs['context'])

if 'open_in_browser_url' in kwargs:
sample_event['context']['open_in_browser_url'] = kwargs['open_in_browser_url']
sample_event['properties']['context'].update(kwargs['context'])

return sample_event

Expand Down Expand Up @@ -231,6 +236,18 @@ def test_missing_name(self):

segmentio.track_segmentio_event(request)

@expect_failure_with_message(segmentio.ERROR_MISSING_DATA)
def test_missing_data(self):
sample_event_raw = self.create_segmentio_event()
del sample_event_raw['properties']['data']
request = self.create_request(
data=json.dumps(sample_event_raw),
content_type='application/json'
)
User.objects.create(pk=USER_ID, username=str(sentinel.username))

segmentio.track_segmentio_event(request)

@expect_failure_with_message(segmentio.ERROR_MISSING_TIMESTAMP)
def test_missing_timestamp(self):
sample_event_raw = self.create_event_without_fields('timestamp')
Expand Down Expand Up @@ -305,8 +322,8 @@ def test_video_event(self, name, event_type):
data=self.create_segmentio_event_json(
name=name,
data=input_payload,
open_in_browser_url='https://testserver/courses/foo/bar/baz/courseware/Week_1/Activity/2',
context={
'open_in_browser_url': 'https://testserver/courses/foo/bar/baz/courseware/Week_1/Activity/2',
'course_id': course_id,
'application': {
'name': 'edx.mobileapp.android',
Expand Down Expand Up @@ -344,11 +361,11 @@ def test_video_event(self, name, event_type):
'name': 'test-app',
'version': 'unknown'
},
'application': {
'name': 'edx.mobileapp.android',
'version': '29',
'component': 'videoplayer'
}
},
'application': {
'name': 'edx.mobileapp.android',
'version': '29',
'component': 'videoplayer'
},
'received_at': datetime.strptime("2014-08-27T16:33:39.100Z", "%Y-%m-%dT%H:%M:%S.%fZ"),
},
Expand Down
9 changes: 8 additions & 1 deletion lms/envs/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,8 +397,15 @@
# Event tracking
TRACKING_BACKENDS.update(AUTH_TOKENS.get("TRACKING_BACKENDS", {}))
EVENT_TRACKING_BACKENDS.update(AUTH_TOKENS.get("EVENT_TRACKING_BACKENDS", {}))
TRACKING_SEGMENTIO_WEBHOOK_SECRET = AUTH_TOKENS.get("TRACKING_SEGMENTIO_WEBHOOK_SECRET", TRACKING_SEGMENTIO_WEBHOOK_SECRET)
TRACKING_SEGMENTIO_WEBHOOK_SECRET = AUTH_TOKENS.get(
"TRACKING_SEGMENTIO_WEBHOOK_SECRET",
TRACKING_SEGMENTIO_WEBHOOK_SECRET
)
TRACKING_SEGMENTIO_ALLOWED_TYPES = ENV_TOKENS.get("TRACKING_SEGMENTIO_ALLOWED_TYPES", TRACKING_SEGMENTIO_ALLOWED_TYPES)
TRACKING_SEGMENTIO_DISALLOWED_SUBSTRING_NAMES = ENV_TOKENS.get(
"TRACKING_SEGMENTIO_DISALLOWED_SUBSTRING_NAMES",
TRACKING_SEGMENTIO_DISALLOWED_SUBSTRING_NAMES
)
TRACKING_SEGMENTIO_SOURCE_MAP = ENV_TOKENS.get("TRACKING_SEGMENTIO_SOURCE_MAP", TRACKING_SEGMENTIO_SOURCE_MAP)

# Student identity verification settings
Expand Down
1 change: 1 addition & 0 deletions lms/envs/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,7 @@

TRACKING_SEGMENTIO_WEBHOOK_SECRET = None
TRACKING_SEGMENTIO_ALLOWED_TYPES = ['track']
TRACKING_SEGMENTIO_DISALLOWED_SUBSTRING_NAMES = ['.bi.']
TRACKING_SEGMENTIO_SOURCE_MAP = {
'analytics-android': 'mobile',
'analytics-ios': 'mobile',
Expand Down