From 0ca8f053583d4a5a519ded9a5543274c0ee0f37d Mon Sep 17 00:00:00 2001 From: David Fischer Date: Mon, 10 Sep 2018 14:41:52 -0700 Subject: [PATCH] Do not track support --- .envs/sample.env | 2 + adserver/templates/adserver/dnt-policy.txt | 218 +++++++++++++++++++++ adserver/tests.py | 51 ++++- adserver/urls.py | 9 +- adserver/views.py | 35 ++++ config/settings/base.py | 6 + config/settings/production.py | 4 + 7 files changed, 321 insertions(+), 4 deletions(-) create mode 100644 adserver/templates/adserver/dnt-policy.txt diff --git a/.envs/sample.env b/.envs/sample.env index 02d72e25..1b552885 100644 --- a/.envs/sample.env +++ b/.envs/sample.env @@ -8,6 +8,8 @@ WEB_CONCURRENCY=4 # Ad server settings ADSERVER_HTTPS=False ADSERVER_ADMIN_URL=admin +ADSERVER_DO_NOT_TRACK=True +ADSERVER_PRIVACY_POLICY_URL= # Using Sqlite is fine for a local test #DATABASE_URL=sqlite:///db.sqlite3 diff --git a/adserver/templates/adserver/dnt-policy.txt b/adserver/templates/adserver/dnt-policy.txt new file mode 100644 index 00000000..ad946d1f --- /dev/null +++ b/adserver/templates/adserver/dnt-policy.txt @@ -0,0 +1,218 @@ +Do Not Track Compliance Policy + +Version 1.0 + +This domain complies with user opt-outs from tracking via the "Do Not Track" +or "DNT" header [http://www.w3.org/TR/tracking-dnt/]. This file will always +be posted via HTTPS at https://example-domain.com/.well-known/dnt-policy.txt +to indicate this fact. + +SCOPE + +This policy document allows an operator of a Fully Qualified Domain Name +("domain") to declare that it respects Do Not Track as a meaningful privacy +opt-out of tracking, so that privacy-protecting software can better determine +whether to block or anonymize communications with this domain. This policy is +intended first and foremost to be posted on domains that publish ads, widgets, +images, scripts and other third-party embedded hypertext (for instance on +widgets.example.com), but it can be posted on any domain, including those users +visit directly (such as www.example.com). The policy may be applied to some +domains used by a company, site, or service, and not to others. Do Not Track +may be sent by any client that uses the HTTP protocol, including websites, +mobile apps, and smart devices like TVs. Do Not Track also works with all +protocols able to read HTTP headers, including SPDY. + +NOTE: This policy contains both Requirements and Exceptions. Where possible +terms are defined in the text, but a few additional definitions are included +at the end. + +REQUIREMENTS + +When this domain receives Web requests from a user who enables DNT by actively +choosing an opt-out setting in their browser or by installing software that is +primarily designed to protect privacy ("DNT User"), we will take the following +measures with respect to those users' data, subject to the Exceptions, also +listed below: + +1. END USER IDENTIFIERS: + + a. If a DNT User has logged in to our service, all user identifiers, such as + unique or nearly unique cookies, "supercookies" and fingerprints are + discarded as soon as the HTTP(S) response is issued. + + Data structures which associate user identifiers with accounts may be + employed to recognize logged in users per Exception 4 below, but may not + be associated with records of the user's activities unless otherwise + excepted. + + b. If a DNT User is not logged in to our service, we will take steps to ensure + that no user identifiers are transmitted to us at all. + +2. LOG RETENTION: + + a. Logs with DNT Users' identifiers removed (but including IP addresses and + User Agent strings) may be retained for a period of 10 days or less, + unless an Exception (below) applies. This period of time balances privacy + concerns with the need to ensure that log processing systems have time to + operate; that operations engineers have time to monitor and fix technical + and performance problems; and that security and data aggregation systems + have time to operate. + + b. These logs will not be used for any other purposes. + +3. OTHER DOMAINS: + + a. If this domain transfers identifiable user data about DNT Users to + contractors, affiliates or other parties, or embeds from or posts data to + other domains, we will either: + + b. ensure that the operators of those domains abide by this policy overall + by posting it at /.well-known/dnt-policy.txt via HTTPS on the domains in + question, + + OR + + ensure that the recipient's policies and practices require the recipient + to respect the policy for our DNT Users' data. + + OR + + obtain a contractual commitment from the recipient to respect this policy + for our DNT Users' data. + + NOTE: if an “Other Domain” does not receive identifiable user information + from the domain because such information has been removed, because the + Other Domain does not log that information, or for some other reason, these + requirements do not apply. + + c. "Identifiable" means any records which are not Anonymized or otherwise + covered by the Exceptions below. + +4. PERIODIC REASSERTION OF COMPLIANCE: + + At least once every 12 months, we will take reasonable steps commensurate + with the size of our organization and the nature of our service to confirm + our ongoing compliance with this document, and we will publicly reassert our + compliance. + +5. USER NOTIFICATION: + + a. If we are required by law to retain or disclose user identifiers, we will + attempt to provide the users with notice (unless we are prohibited or it + would be futile) that a request for their information has been made in + order to give the users an opportunity to object to the retention or + disclosure. + + b. We will attempt to provide this notice by email, if the users have given + us an email address, and by postal mail if the users have provided a + postal address. + + c. If the users do not challenge the disclosure request, we may be legally + required to turn over their information. + + d. We may delay notice if we, in good faith, believe that an emergency + involving danger of death or serious physical injury to any person + requires disclosure without delay of information relating to the + emergency. + +EXCEPTIONS + +Data from DNT Users collected by this domain may be logged or retained only in +the following specific situations: + +1. CONSENT / "OPT BACK IN" + + a. DNT Users are opting out from tracking across the Web. It is possible + that for some feature or functionality, we will need to ask a DNT User to + "opt back in" to be tracked by us across the entire Web. + + b. If we do that, we will take reasonable steps to verify that the users who + select this option have genuinely intended to opt back in to tracking. + One way to do this is by performing scientifically reasonable user + studies with a representative sample of our users, but smaller + organizations can satisfy this requirement by other means. + + c. Where we believe that we have opt back in consent, our server will + send a tracking value status header "Tk: C" as described in section 6.2 + of the W3C Tracking Preference Expression draft: + + http://www.w3.org/TR/tracking-dnt/#tracking-status-value + +2. TRANSACTIONS + + If a DNT User actively and knowingly enters a transaction with our + services (for instance, clicking on a clearly-labeled advertisement, + posting content to a widget, or purchasing an item), we will retain + necessary data for as long as required to perform the transaction. This + may for example include keeping auditing information for clicks on + advertising links; keeping a copy of posted content and the name of the + posting user; keeping server-side session IDs to recognize logged in + users; or keeping a copy of the physical address to which a purchased + item will be shipped. By their nature, some transactions will require data + to be retained indefinitely. + +3. TECHNICAL AND SECURITY LOGGING: + + a. If, during the processing of the initial request (for unique identifiers) + or during the subsequent 10 days (for IP addresses and User Agent strings), + we obtain specific information that causes our employees or systems to + believe that a request is, or is likely to be, part of a security attack, + spam submission, or fraudulent transaction, then logs of those requests + are not subject to this policy. + + b. If we encounter technical problems with our site, then, in rare + circumstances, we may retain logs for longer than 10 days, if that is + necessary to diagnose and fix those problems, but this practice will not be + routinized and we will strive to delete such logs as soon as possible. + +4. AGGREGATION: + + a. We may retain and share anonymized datasets, such as aggregate records of + readership patterns; statistical models of user behavior; graphs of system + variables; data structures to count active users on monthly or yearly + bases; database tables mapping authentication cookies to logged in + accounts; non-unique data structures constructed within browsers for tasks + such as ad frequency capping or conversion tracking; or logs with truncated + and/or encrypted IP addresses and simplified User Agent strings. + + b. "Anonymized" means we have conducted risk mitigation to ensure + that the dataset, plus any additional information that is in our + possession or likely to be available to us, does not allow the + reconstruction of reading habits, online or offline activity of groups of + fewer than 5000 individuals or devices. + + c. If we generate anonymized datasets under this exception we will publicly + document our anonymization methods in sufficient detail to allow outside + experts to evaluate the effectiveness of those methods. + +5. ERRORS: + +From time to time, there may be errors by which user data is temporarily +logged or retained in violation of this policy. If such errors are +inadvertent, rare, and made in good faith, they do not constitute a breach +of this policy. We will delete such data as soon as practicable after we +become aware of any error and take steps to ensure that it is deleted by any +third-party who may have had access to the data. + +ADDITIONAL DEFINITIONS + +"Fully Qualified Domain Name" means a domain name that addresses a computer +connected to the Internet. For instance, example1.com; www.example1.com; +ads.example1.com; and widgets.example2.com are all distinct FQDNs. + +"Supercookie" means any technology other than an HTTP Cookie which can be used +by a server to associate identifiers with the clients that visit it. Examples +of supercookies include Flash LSO cookies, DOM storage, HTML5 storage, or +tricks to store information in caches or etags. + +"Risk mitigation" means an engineering process that evaluates the possibility +and likelihood of various adverse outcomes, considers the available methods of +making those adverse outcomes less likely, and deploys sufficient mitigations +to bring the probability and harm from adverse outcomes below an acceptable +threshold. + +"Reading habits" includes amongst other things lists of visited DNS names, if +those domains pertain to specific topics or activities, but records of visited +DNS names are not reading habits if those domain names serve content of a very +diverse and general nature, thereby revealing minimal information about the +opinions, interests or activities of the user. diff --git a/adserver/tests.py b/adserver/tests.py index 9a30df3b..5978b65d 100644 --- a/adserver/tests.py +++ b/adserver/tests.py @@ -1,3 +1,50 @@ -from django.test import TestCase # noqa +import hashlib +import json -# Create your tests here. +from django.test import TestCase, override_settings +from django.urls import reverse + + +class DoNotTrackTest(TestCase): + def setUp(self): + self.dnt_status_url = reverse("adserver:dnt-status") + self.dnt_policy_url = reverse("adserver:dnt-policy") + + @override_settings(ADSERVER_DO_NOT_TRACK=False) + def test_dnt_disabled(self): + for url in (self.dnt_status_url, self.dnt_policy_url): + resp = self.client.get(url) + self.assertEqual(resp.status_code, 404) + + @override_settings(ADSERVER_DO_NOT_TRACK=True) + def test_dnt_status(self): + resp = self.client.get(self.dnt_status_url) + self.assertEqual(resp.status_code, 200) + self.assertEqual(resp["Content-Type"], "application/tracking-status+json") + + # Can't use response.json() because the content-type is non-standard + data = json.loads(resp.content) + self.assertEqual(data["tracking"], "T") + self.assertFalse("policy" in data) + + resp = self.client.get(self.dnt_status_url, HTTP_DNT="1") + data = json.loads(resp.content) + self.assertEqual(data["tracking"], "N") + + privacy_policy_url = "http://example.com/policy.txt" + with override_settings(ADSERVER_PRIVACY_POLICY_URL=privacy_policy_url): + resp = self.client.get(self.dnt_status_url, HTTP_DNT="1") + data = json.loads(resp.content) + self.assertEqual(data["policy"], privacy_policy_url) + + @override_settings(ADSERVER_DO_NOT_TRACK=True) + def test_dnt_policy(self): + resp = self.client.get(self.dnt_policy_url) + self.assertEqual(resp.status_code, 200) + + # Verify the hashes match + # https://github.com/EFForg/dnt-guide#12-how-to-assert-dnt-compliance + # https://github.com/EFForg/dnt-policy/blob/master/dnt-policies.json + shasum = hashlib.new("sha1") + shasum.update(resp.content) + self.assertEqual(shasum.hexdigest(), "a18e8dba6848d3fc241b03b88291cb75a3cfec3b") diff --git a/adserver/urls.py b/adserver/urls.py index 21463279..98a689cc 100644 --- a/adserver/urls.py +++ b/adserver/urls.py @@ -1,6 +1,11 @@ from django.conf.urls import url -from .views import dashboard +from .views import dashboard, do_not_track, do_not_track_policy -urlpatterns = [url("^$", dashboard)] +urlpatterns = [ + url("^$", dashboard), + # Do not Track + url(r"^\.well-known/dnt/$", do_not_track, name="dnt-status"), + url(r"^\.well-known/dnt-policy.txt$", do_not_track_policy, name="dnt-policy"), +] diff --git a/adserver/views.py b/adserver/views.py index b8c1a1d9..20e455c0 100644 --- a/adserver/views.py +++ b/adserver/views.py @@ -1,6 +1,41 @@ from django.conf import settings from django.contrib.auth.decorators import login_required from django.shortcuts import render +from django.http import JsonResponse, Http404 + + +def do_not_track(request): + """ + Returns the Do Not Track status for the user + + https://w3c.github.io/dnt/drafts/tracking-dnt.html#status-representation + + :raises: Http404 if ``settings.ADSERVER_DO_NOT_TRACK`` is ``False`` + """ + if not settings.ADSERVER_DO_NOT_TRACK: + raise Http404 + + dnt_header = request.META.get("HTTP_DNT") + + data = {"tracking": "N" if dnt_header == "1" else "T"} + if settings.ADSERVER_PRIVACY_POLICY_URL: + data["policy"] = settings.ADSERVER_PRIVACY_POLICY_URL + + return JsonResponse(data, content_type="application/tracking-status+json") + + +def do_not_track_policy(request): + """ + Returns the Do Not Track policy + + https://github.com/EFForg/dnt-guide#12-how-to-assert-dnt-compliance + + :raises: Http404 if ``settings.ADSERVER_DO_NOT_TRACK`` is ``False`` + """ + if not settings.ADSERVER_DO_NOT_TRACK: + raise Http404 + + return render(request, "adserver/dnt-policy.txt", content_type="text/plain") @login_required diff --git a/config/settings/base.py b/config/settings/base.py index 4880887e..0c1605a2 100644 --- a/config/settings/base.py +++ b/config/settings/base.py @@ -202,7 +202,13 @@ # Ad server specific settings ############################################################################ +# The URL where the Django admin is served ADSERVER_ADMIN_URL = "admin" +# Whether Do Not Track is enabled for the ad server +ADSERVER_DO_NOT_TRACK = False + +ADSERVER_PRIVACY_POLICY_URL = None + with open(os.path.join(BASE_DIR, "package.json")) as fd: ADSERVER_VERSION = json.load(fd)["version"] diff --git a/config/settings/production.py b/config/settings/production.py index b1780018..393f4722 100644 --- a/config/settings/production.py +++ b/config/settings/production.py @@ -11,6 +11,8 @@ # Ad server settings ADSERVER_HTTPS=(bool, False), ADSERVER_ADMIN_URL=(str, ""), + ADSERVER_DO_NOT_TRACK=(bool, False), + ADSERVER_PRIVACY_POLICY_URL=(str, None), ) # @@ -46,3 +48,5 @@ # Ad server settings ADSERVER_ADMIN_URL = env("ADSERVER_ADMIN_URL") +ADSERVER_DO_NOT_TRACK = env("ADSERVER_DO_NOT_TRACK") +ADSERVER_PRIVACY_POLICY_URL = env("ADSERVER_PRIVACY_POLICY_URL")