-
-
Notifications
You must be signed in to change notification settings - Fork 3.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #4131 from davidfischer/server-side-analytics
Server side analytics
- Loading branch information
Showing
9 changed files
with
2,649 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
"""App init""" | ||
|
||
default_app_config = 'readthedocs.analytics.apps.AnalyticsAppConfig' # noqa |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
"""Django app config for the analytics app.""" | ||
|
||
from __future__ import absolute_import | ||
from django.apps import AppConfig | ||
|
||
|
||
class AnalyticsAppConfig(AppConfig): | ||
|
||
"""Analytics app init code""" | ||
|
||
name = 'readthedocs.analytics' | ||
verbose_name = 'Analytics' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
"""Tasks for Read the Docs' analytics""" | ||
|
||
from __future__ import absolute_import | ||
|
||
from django.conf import settings | ||
|
||
from readthedocs import get_version | ||
from readthedocs.worker import app | ||
|
||
from .utils import send_to_analytics | ||
|
||
|
||
DEFAULT_PARAMETERS = { | ||
'v': '1', # analytics version (always 1) | ||
'aip': '1', # anonymize IP | ||
'tid': settings.GLOBAL_ANALYTICS_CODE, | ||
|
||
# User data | ||
'uip': None, # User IP address | ||
'ua': None, # User agent | ||
|
||
# Application info | ||
'an': 'Read the Docs', | ||
'av': get_version(), # App version | ||
} | ||
|
||
|
||
@app.task(queue='web') | ||
def analytics_pageview(url, title=None, **kwargs): | ||
""" | ||
Send a pageview to Google Analytics | ||
:see: https://developers.google.com/analytics/devguides/collection/protocol/v1/parameters | ||
:param url: the URL of the pageview | ||
:param title: the title of the page being viewed | ||
:param kwargs: extra pageview parameters to send to GA | ||
""" | ||
data = { | ||
't': 'pageview', | ||
'dl': url, # URL of the pageview (required) | ||
'dt': title, # Title of the page | ||
} | ||
data.update(DEFAULT_PARAMETERS) | ||
data.update(kwargs) | ||
send_to_analytics(data) | ||
|
||
|
||
@app.task(queue='web') | ||
def analytics_event(event_category, event_action, event_label=None, event_value=None, **kwargs): | ||
""" | ||
Send an analytics event to Google Analytics | ||
:see: https://developers.google.com/analytics/devguides/collection/protocol/v1/devguide#event | ||
:param event_category: the category of the event | ||
:param event_action: the action of the event (use action words like "click") | ||
:param event_label: an optional string to differentiate the event | ||
:param event_value: an optional numeric value for the event | ||
:param kwargs: extra event parameters to send to GA | ||
""" | ||
data = { | ||
't': 'event', # GA event - don't change | ||
'ec': event_category, # Event category (required) | ||
'ea': event_action, # Event action (required) | ||
'el': event_label, # Event label | ||
'ev': event_value, # Event value (numeric) | ||
} | ||
data.update(DEFAULT_PARAMETERS) | ||
data.update(kwargs) | ||
send_to_analytics(data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
from __future__ import absolute_import, unicode_literals | ||
|
||
from django.test import TestCase | ||
|
||
from .utils import anonymize_ip_address, anonymize_user_agent | ||
|
||
|
||
class UtilsTests(TestCase): | ||
def test_anonymize_ip(self): | ||
self.assertEqual(anonymize_ip_address('127.0.0.1'), '127.0.0.0') | ||
self.assertEqual(anonymize_ip_address('127.127.127.127'), '127.127.0.0') | ||
self.assertEqual( | ||
anonymize_ip_address('3ffe:1900:4545:3:200:f8ff:fe21:67cf'), | ||
'3ffe:1900:4545:3:200:f8ff:fe21:0', | ||
) | ||
self.assertEqual( | ||
anonymize_ip_address('fe80::200:f8ff:fe21:67cf'), | ||
'fe80::200:f8ff:fe21:0', | ||
) | ||
|
||
def test_anonymize_ua(self): | ||
ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36' | ||
self.assertEqual( | ||
anonymize_user_agent(ua), | ||
ua, | ||
) | ||
|
||
self.assertEqual( | ||
anonymize_user_agent('Some rare user agent'), | ||
'Rare user agent', | ||
) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
"""Utilities related to analytics""" | ||
|
||
from __future__ import absolute_import, unicode_literals | ||
import hashlib | ||
import logging | ||
|
||
from django.conf import settings | ||
from django.utils.encoding import force_text, force_bytes | ||
from django.utils.crypto import get_random_string | ||
import requests | ||
from user_agents import parse | ||
|
||
try: | ||
# Python 3.3+ only | ||
import ipaddress | ||
except ImportError: | ||
from .vendor import ipaddress | ||
|
||
log = logging.getLogger(__name__) # noqa | ||
|
||
|
||
def get_client_ip(request): | ||
"""Gets the real IP based on a request object""" | ||
ip_address = request.META.get('REMOTE_ADDR') | ||
|
||
# Get the original IP address (eg. "X-Forwarded-For: client, proxy1, proxy2") | ||
x_forwarded_for = request.META.get('HTTP_X_FORWARDED_FOR', '').split(',')[0] | ||
if x_forwarded_for: | ||
ip_address = x_forwarded_for | ||
|
||
return ip_address | ||
|
||
|
||
def anonymize_ip_address(ip_address): | ||
"""Anonymizes an IP address by zeroing the last 2 bytes""" | ||
# Used to anonymize an IP by zero-ing out the last 2 bytes | ||
ip_mask = int('0xFFFFFFFFFFFFFFFFFFFFFFFFFFFF0000', 16) | ||
|
||
try: | ||
ip_obj = ipaddress.ip_address(force_text(ip_address)) | ||
except ValueError: | ||
return None | ||
|
||
anonymized_ip = ipaddress.ip_address(int(ip_obj) & ip_mask) | ||
return anonymized_ip.compressed | ||
|
||
|
||
def anonymize_user_agent(user_agent): | ||
"""Anonymizes rare user agents""" | ||
# If the browser family is not recognized, this is a rare user agent | ||
parsed_ua = parse(user_agent) | ||
if parsed_ua.browser.family == 'Other' or parsed_ua.os.family == 'Other': | ||
return 'Rare user agent' | ||
|
||
return user_agent | ||
|
||
|
||
def send_to_analytics(data): | ||
"""Sends data to Google Analytics""" | ||
if data.get('uip') and data.get('ua'): | ||
data['uid'] = generate_client_id(data['uip'], data['ua']) | ||
|
||
if 'uip' in data: | ||
# Anonymize IP address if applicable | ||
data['uip'] = anonymize_ip_address(data['uip']) | ||
|
||
if 'ua' in data: | ||
# Anonymize user agent if it is rare | ||
data['ua'] = anonymize_user_agent(data['ua']) | ||
|
||
resp = None | ||
log.debug('Sending data to analytics: %s', data) | ||
try: | ||
resp = requests.post( | ||
'https://www.google-analytics.com/collect', | ||
data=data, | ||
timeout=3, # seconds | ||
) | ||
except requests.Timeout: | ||
log.warning('Timeout sending to Google Analytics') | ||
|
||
if resp and not resp.ok: | ||
log.warning('Unknown error sending to Google Analytics') | ||
|
||
|
||
def generate_client_id(ip_address, user_agent): | ||
""" | ||
Create an advertising ID | ||
This simplifies things but essentially if a user has the same IP and same UA, | ||
this will treat them as the same user for analytics purposes | ||
""" | ||
salt = b'advertising-client-id' | ||
|
||
hash_id = hashlib.sha256() | ||
hash_id.update(force_bytes(settings.SECRET_KEY)) | ||
hash_id.update(salt) | ||
if ip_address: | ||
hash_id.update(force_bytes(ip_address)) | ||
if user_agent: | ||
hash_id.update(force_bytes(user_agent)) | ||
|
||
if not ip_address and not user_agent: | ||
# Since no IP and no UA were specified, | ||
# there's no way to distinguish sessions. | ||
# Instead, just treat every user differently | ||
hash_id.update(force_bytes(get_random_string())) | ||
|
||
return hash_id.hexdigest() |
Empty file.
Oops, something went wrong.