Skip to content

Commit

Permalink
GDPR: Anonymize IP Address in tracking logs (openedx#160)
Browse files Browse the repository at this point in the history
identical to PR openedx#113 

* Adds support for objuscating last two octets of ip address in tracking logs

* Cleaning up change

* Fixing a couple edge case events

* live oauth support (openedx#115)

* Added settings import to fix tests

* Fixes tests

* Remove vscode folder

* Fixing quality errors

* simpler obfuscation method

* cleanup2 simpler pr template (openedx#130)

* simpler pr template

* more accurate phrasing

* more specific phrasing
  • Loading branch information
sdolenc authored Dec 9, 2017
1 parent d164af1 commit fb215f5
Show file tree
Hide file tree
Showing 4 changed files with 123 additions and 13 deletions.
6 changes: 1 addition & 5 deletions common/djangoapps/track/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,11 +202,7 @@ def get_username(self, request):

def get_request_ip_address(self, request):
"""Gets the IP address of the request"""
ip_address = get_ip(request)
if ip_address is not None:
return ip_address
else:
return ''
return views.get_request_ip(request)

def process_response(self, _request, response):
"""Exit the context if it exists."""
Expand Down
18 changes: 18 additions & 0 deletions common/djangoapps/track/tests/test_middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,24 @@ def test_default_request_context(self):
'client_id': None,
})

@override_settings(FEATURES={'SQUELCH_PII_IN_LOGS': True})
def test_default_request_context_without_personal_data(self):
context = self.get_context_for_path('/courses/')
self.assertEquals(context, {
'accept_language': '',
'referer': '',
'user_id': '',
'session': '',
'username': '',
'ip': '127.0.x.x',
'host': 'testserver',
'agent': '',
'path': '/courses/',
'org_id': '',
'course_id': '',
'client_id': None,
})

def test_no_forward_for_header_ip_context(self):
request = self.request_factory.get('/courses/')
remote_addr = '127.0.0.1'
Expand Down
53 changes: 46 additions & 7 deletions common/djangoapps/track/views/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import pytz

from django.conf import settings
from django.contrib.auth.decorators import login_required
from django.http import HttpResponse
from django.shortcuts import redirect
Expand Down Expand Up @@ -32,12 +33,50 @@ def _get_request_header(request, header_name, default=''):
return default


def get_request_ip(request):
"""Wrapper for helper method to get request ip."""
return _get_request_ip(request)


def _get_request_ip(request, default=''):
"""Helper method to get IP from a request's META dict, if present."""
if request is not None and hasattr(request, 'META'):
return get_ip(request)
else:
return default
"""
Helper method to get IP from a request's META dict, if present.
If SQUELCH_PII_IN_LOGS is True:
Anonymize the ip address to the first two octets.
This gives enough data to be useful for Analysis
without explicitly identifying user
e.g. 127.0.0.1 => 127.0.X.X
"""
if request is None:
return default

def _anonymize_if_needed(ip_address_str):
if settings.FEATURES.get('SQUELCH_PII_IN_LOGS', False):
return _get_anonymous_ip(ip_address_str)
else:
return ip_address_str

if hasattr(request, 'META'):
ip_address = get_ip(request)
request_ip = _anonymize_if_needed(ip_address)
elif request.get('ip'):
request_ip = _anonymize_if_needed(request.get('ip'))
else:
request_ip = default

return request_ip


def _get_anonymous_ip(ip_address_str):
"""Helper method to obbuscate the last two octets of an ip address"""
try:
ip_comps = ip_address_str.split('.')
first = '.'.join(ip_comps[0:2])
ip_address = '{}.x.x'.format(first)
except:
ip_address = 'unknown'

return ip_address


def _get_request_value(request, value_name, default=''):
Expand Down Expand Up @@ -99,7 +138,7 @@ def server_track(request, event_type, event, page=None):
# define output:
event = {
"username": username,
"ip": _get_request_ip(request),
"ip": get_request_ip(request),
"referer": _get_request_header(request, 'HTTP_REFERER'),
"accept_language": _get_request_header(request, 'HTTP_ACCEPT_LANGUAGE'),
"event_source": "server",
Expand Down Expand Up @@ -149,7 +188,7 @@ def task_track(request_info, task_info, event_type, event, page=None):
with eventtracker.get_tracker().context('edx.course.task', contexts.course_context_from_url(page)):
event = {
"username": request_info.get('username', 'unknown'),
"ip": request_info.get('ip', 'unknown'),
"ip": _get_request_ip(request_info, 'unknown'),
"event_source": "task",
"event_type": event_type,
"event": full_event,
Expand Down
59 changes: 58 additions & 1 deletion common/djangoapps/track/views/tests/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from django.contrib.auth.models import User
from django.test.client import RequestFactory
from django.test.utils import override_settings
from django.test.utils import modify_settings, override_settings

from track import views
from track.middleware import TrackMiddleware
Expand Down Expand Up @@ -171,6 +171,27 @@ def test_server_track(self):
}
self.assert_mock_tracker_call_matches(expected_event)

@override_settings(FEATURES={'SQUELCH_PII_IN_LOGS': True})
def test_server_track_without_personal_data(self):
request = self.request_factory.get(self.path_with_course)
views.server_track(request, str(sentinel.event_type), '{}')

expected_event = {
'accept_language': '',
'referer': '',
'username': 'anonymous',
'ip': '127.0.x.x',
'event_source': 'server',
'event_type': str(sentinel.event_type),
'event': '{}',
'agent': '',
'page': None,
'time': FROZEN_TIME,
'host': 'testserver',
'context': {},
}
self.assert_mock_tracker_call_matches(expected_event)

def assert_mock_tracker_call_matches(self, expected_event):
self.assertEqual(len(self.mock_tracker.send.mock_calls), 1)
actual_event = self.mock_tracker.send.mock_calls[0][1][0]
Expand Down Expand Up @@ -297,3 +318,39 @@ def test_task_track(self):
},
}
self.assert_mock_tracker_call_matches(expected_event)

@override_settings(FEATURES={'SQUELCH_PII_IN_LOGS': True})
def test_task_track(self):
request_info = {
'accept_language': '',
'referer': '',
'username': 'anonymous',
'ip': '127.0.0.1',
'agent': 'agent',
'host': 'testserver',
}

task_info = {
sentinel.task_key: sentinel.task_value
}
expected_event_data = dict(task_info)
expected_event_data.update(self.event)

views.task_track(request_info, task_info, str(sentinel.event_type), self.event)

expected_event = {
'username': 'anonymous',
'ip': '127.0.x.x',
'event_source': 'task',
'event_type': str(sentinel.event_type),
'event': expected_event_data,
'agent': 'agent',
'page': None,
'time': FROZEN_TIME,
'host': 'testserver',
'context': {
'course_id': '',
'org_id': ''
},
}
self.assert_mock_tracker_call_matches(expected_event)

0 comments on commit fb215f5

Please sign in to comment.