Skip to content

Commit

Permalink
Merge pull request #4131 from davidfischer/server-side-analytics
Browse files Browse the repository at this point in the history
Server side analytics
  • Loading branch information
ericholscher authored Jun 7, 2018
2 parents ba901b8 + b425ce9 commit 732b37c
Show file tree
Hide file tree
Showing 9 changed files with 2,649 additions and 0 deletions.
3 changes: 3 additions & 0 deletions readthedocs/analytics/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"""App init"""

default_app_config = 'readthedocs.analytics.apps.AnalyticsAppConfig' # noqa
12 changes: 12 additions & 0 deletions readthedocs/analytics/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
"""Django app config for the analytics app."""

from __future__ import absolute_import
from django.apps import AppConfig


class AnalyticsAppConfig(AppConfig):

"""Analytics app init code"""

name = 'readthedocs.analytics'
verbose_name = 'Analytics'
69 changes: 69 additions & 0 deletions readthedocs/analytics/tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
"""Tasks for Read the Docs' analytics"""

from __future__ import absolute_import

from django.conf import settings

from readthedocs import get_version
from readthedocs.worker import app

from .utils import send_to_analytics


DEFAULT_PARAMETERS = {
'v': '1', # analytics version (always 1)
'aip': '1', # anonymize IP
'tid': settings.GLOBAL_ANALYTICS_CODE,

# User data
'uip': None, # User IP address
'ua': None, # User agent

# Application info
'an': 'Read the Docs',
'av': get_version(), # App version
}


@app.task(queue='web')
def analytics_pageview(url, title=None, **kwargs):
"""
Send a pageview to Google Analytics
:see: https://developers.google.com/analytics/devguides/collection/protocol/v1/parameters
:param url: the URL of the pageview
:param title: the title of the page being viewed
:param kwargs: extra pageview parameters to send to GA
"""
data = {
't': 'pageview',
'dl': url, # URL of the pageview (required)
'dt': title, # Title of the page
}
data.update(DEFAULT_PARAMETERS)
data.update(kwargs)
send_to_analytics(data)


@app.task(queue='web')
def analytics_event(event_category, event_action, event_label=None, event_value=None, **kwargs):
"""
Send an analytics event to Google Analytics
:see: https://developers.google.com/analytics/devguides/collection/protocol/v1/devguide#event
:param event_category: the category of the event
:param event_action: the action of the event (use action words like "click")
:param event_label: an optional string to differentiate the event
:param event_value: an optional numeric value for the event
:param kwargs: extra event parameters to send to GA
"""
data = {
't': 'event', # GA event - don't change
'ec': event_category, # Event category (required)
'ea': event_action, # Event action (required)
'el': event_label, # Event label
'ev': event_value, # Event value (numeric)
}
data.update(DEFAULT_PARAMETERS)
data.update(kwargs)
send_to_analytics(data)
32 changes: 32 additions & 0 deletions readthedocs/analytics/tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from __future__ import absolute_import, unicode_literals

from django.test import TestCase

from .utils import anonymize_ip_address, anonymize_user_agent


class UtilsTests(TestCase):
def test_anonymize_ip(self):
self.assertEqual(anonymize_ip_address('127.0.0.1'), '127.0.0.0')
self.assertEqual(anonymize_ip_address('127.127.127.127'), '127.127.0.0')
self.assertEqual(
anonymize_ip_address('3ffe:1900:4545:3:200:f8ff:fe21:67cf'),
'3ffe:1900:4545:3:200:f8ff:fe21:0',
)
self.assertEqual(
anonymize_ip_address('fe80::200:f8ff:fe21:67cf'),
'fe80::200:f8ff:fe21:0',
)

def test_anonymize_ua(self):
ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36'
self.assertEqual(
anonymize_user_agent(ua),
ua,
)

self.assertEqual(
anonymize_user_agent('Some rare user agent'),
'Rare user agent',
)

109 changes: 109 additions & 0 deletions readthedocs/analytics/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
"""Utilities related to analytics"""

from __future__ import absolute_import, unicode_literals
import hashlib
import logging

from django.conf import settings
from django.utils.encoding import force_text, force_bytes
from django.utils.crypto import get_random_string
import requests
from user_agents import parse

try:
# Python 3.3+ only
import ipaddress
except ImportError:
from .vendor import ipaddress

log = logging.getLogger(__name__) # noqa


def get_client_ip(request):
"""Gets the real IP based on a request object"""
ip_address = request.META.get('REMOTE_ADDR')

# Get the original IP address (eg. "X-Forwarded-For: client, proxy1, proxy2")
x_forwarded_for = request.META.get('HTTP_X_FORWARDED_FOR', '').split(',')[0]
if x_forwarded_for:
ip_address = x_forwarded_for

return ip_address


def anonymize_ip_address(ip_address):
"""Anonymizes an IP address by zeroing the last 2 bytes"""
# Used to anonymize an IP by zero-ing out the last 2 bytes
ip_mask = int('0xFFFFFFFFFFFFFFFFFFFFFFFFFFFF0000', 16)

try:
ip_obj = ipaddress.ip_address(force_text(ip_address))
except ValueError:
return None

anonymized_ip = ipaddress.ip_address(int(ip_obj) & ip_mask)
return anonymized_ip.compressed


def anonymize_user_agent(user_agent):
"""Anonymizes rare user agents"""
# If the browser family is not recognized, this is a rare user agent
parsed_ua = parse(user_agent)
if parsed_ua.browser.family == 'Other' or parsed_ua.os.family == 'Other':
return 'Rare user agent'

return user_agent


def send_to_analytics(data):
"""Sends data to Google Analytics"""
if data.get('uip') and data.get('ua'):
data['uid'] = generate_client_id(data['uip'], data['ua'])

if 'uip' in data:
# Anonymize IP address if applicable
data['uip'] = anonymize_ip_address(data['uip'])

if 'ua' in data:
# Anonymize user agent if it is rare
data['ua'] = anonymize_user_agent(data['ua'])

resp = None
log.debug('Sending data to analytics: %s', data)
try:
resp = requests.post(
'https://www.google-analytics.com/collect',
data=data,
timeout=3, # seconds
)
except requests.Timeout:
log.warning('Timeout sending to Google Analytics')

if resp and not resp.ok:
log.warning('Unknown error sending to Google Analytics')


def generate_client_id(ip_address, user_agent):
"""
Create an advertising ID
This simplifies things but essentially if a user has the same IP and same UA,
this will treat them as the same user for analytics purposes
"""
salt = b'advertising-client-id'

hash_id = hashlib.sha256()
hash_id.update(force_bytes(settings.SECRET_KEY))
hash_id.update(salt)
if ip_address:
hash_id.update(force_bytes(ip_address))
if user_agent:
hash_id.update(force_bytes(user_agent))

if not ip_address and not user_agent:
# Since no IP and no UA were specified,
# there's no way to distinguish sessions.
# Instead, just treat every user differently
hash_id.update(force_bytes(get_random_string()))

return hash_id.hexdigest()
Empty file.
Loading

0 comments on commit 732b37c

Please sign in to comment.