From 975c95ae3803dafc2ee7957311f8d205b4767c21 Mon Sep 17 00:00:00 2001 From: Pat Phongsvirajati Date: Fri, 18 Oct 2019 09:21:36 -0400 Subject: [PATCH 1/7] removing pages that are no longer in use on transition --- .../management/data/transition_pages.json | 161 ------------------ 1 file changed, 161 deletions(-) diff --git a/fec/search/management/data/transition_pages.json b/fec/search/management/data/transition_pages.json index 496496b176..dcca0a7226 100644 --- a/fec/search/management/data/transition_pages.json +++ b/fec/search/management/data/transition_pages.json @@ -1,18 +1,4 @@ [ - { - "document_id": "transition-1", - "path": "http://transition.fec.gov/pubrec/publicrecordsoffice.shtml#using", - "created": "04/01/2017", - "language": "en", - "title": "Public Records Office" - }, - { - "document_id": "transition-2", - "path": "http://transition.fec.gov/info/hearings.shtml", - "created": "04/01/2017", - "language": "en", - "title": "Public Hearings" - }, { "document_id": "transition-3", "path": "http://transition.fec.gov/af/af.shtml", @@ -20,13 +6,6 @@ "language": "en", "title": "FEC Administrative Fine Program" }, - { - "document_id": "transition-4", - "path": "http://transition.fec.gov/af/AFPRegulations.shtml", - "created": "04/01/2017", - "language": "en", - "title": "AFP Regulations" - }, { "document_id": "transition-5", "path": "http://transition.fec.gov/af/af_calc.shtml", @@ -69,13 +48,6 @@ "language": "en", "title": "Help Complying with the Federal Campaign Finance Law (FECA)" }, - { - "document_id": "transition-11", - "path": "http://transition.fec.gov/elecfil/electron.shtml", - "created": "04/01/2017", - "language": "en", - "title": "Electronic Filing" - }, { "document_id": "transition-12", "path": "http://transition.fec.gov/elecfil/electron.shtml", @@ -83,34 +55,6 @@ "language": "en", "title": "Electronic Filing" }, - { - "document_id": "transition-13", - "path": "http://transition.fec.gov/info/forms.shtml", - "created": "04/01/2017", - "language": "en", - "title": "FEC Reporting Forms" - }, - { - "document_id": "transition-14", - "path": "http://transition.fec.gov/info/ElectionDate/", - "created": "04/01/2017", - "language": "en", - "title": "Federal Election Compliance Information" - }, - { - "document_id": "transition-15", - "path": "http://transition.fec.gov/info/publications.shtml", - "created": "04/01/2017", - "language": "en", - "title": "FEC Publications" - }, - { - "document_id": "transition-16", - "path": "http://transition.fec.gov/info/publications.shtml", - "created": "04/01/2017", - "language": "en", - "title": "FEC Publications" - }, { "document_id": "transition-17", "path": "http://transition.fec.gov/pages/brochures/brochures.shtml", @@ -118,48 +62,6 @@ "language": "en", "title": "FEC Brochures" }, - { - "document_id": "transition-18", - "path": "http://transition.fec.gov/pages/bcra/bcra_update.shtml", - "created": "04/01/2017", - "language": "en", - "title": "Bipartisan Campaign Reform Act of 2002" - }, - { - "document_id": "transition-19", - "path": "http://transition.fec.gov/info/outreach.shtml", - "created": "04/01/2017", - "language": "en", - "title": "FEC Educational Outreach" - }, - { - "document_id": "transition-20", - "path": "http://transition.fec.gov/info/elearning.shtml", - "created": "04/01/2017", - "language": "en", - "title": "FEC Educational Outreach" - }, - { - "document_id": "transition-21", - "path": "http://transition.fec.gov/info/outreach.shtml#conferences", - "created": "04/01/2017", - "language": "en", - "title": "FEC Educational Outreach" - }, - { - "document_id": "transition-22", - "path": "http://transition.fec.gov/info/outreach.shtml#roundtables", - "created": "04/01/2017", - "language": "en", - "title": "FEC Educational Outreach" - }, - { - "document_id": "transition-23", - "path": "http://transition.fec.gov/info/outreach.shtml#appearances", - "created": "04/01/2017", - "language": "en", - "title": "FEC Educational Outreach" - }, { "document_id": "transition-24", "path": "http://transition.fec.gov/rad/index.shtml", @@ -222,68 +124,5 @@ "created": "04/01/2017", "language": "en", "title": "Federal Election Commission Home Page" - }, - { - "document_id": "transition-33", - "path": "http://transition.fec.gov/ans/answers.shtml", - "created": "04/01/2017", - "language": "en", - "title": "Quick Answers" - }, - { - "document_id": "transition-34", - "path": "http://transition.fec.gov/ans/answers_general.shtml", - "created": "04/01/2017", - "language": "en", - "title": "Quick Answers-General Questions" - }, - { - "document_id": "transition-35", - "path": "http://transition.fec.gov/ans/answers_disclosure.shtml", - "created": "04/01/2017", - "language": "en", - "title": "Quick Answers - Disclosure" - }, - { - "document_id": "transition-36", - "path": "http://transition.fec.gov/ans/answers_compliance.shtml", - "created": "04/01/2017", - "language": "en", - "title": "Quick Answers - Compliance" - }, - { - "document_id": "transition-37", - "path": "http://transition.fec.gov/ans/answers_filing.shtml", - "created": "04/01/2017", - "language": "en", - "title": "Quick Answers - Filing" - }, - { - "document_id": "transition-38", - "path": "http://transition.fec.gov/ans/answers_candidate.shtml", - "created": "04/01/2017", - "language": "en", - "title": "Quick Answers - Candidate" - }, - { - "document_id": "transition-39", - "path": "http://transition.fec.gov/ans/answers_pac.shtml", - "created": "04/01/2017", - "language": "en", - "title": "Quick Answers - PAC" - }, - { - "document_id": "transition-40", - "path": "http://transition.fec.gov/ans/answers_party.shtml", - "created": "04/01/2017", - "language": "en", - "title": "Quick Answers - Party" - }, - { - "document_id": "transition-41", - "path": "http://transition.fec.gov/ans/answers_public_funding.shtml", - "created": "04/01/2017", - "language": "en", - "title": "Quick Answers: Public Funding" } ] From d103f6d3f5a55f3c4c4882853b69b031c170d6d9 Mon Sep 17 00:00:00 2001 From: Pat Phongsvirajati Date: Fri, 18 Oct 2019 09:22:19 -0400 Subject: [PATCH 2/7] changing web app scrape URLs from beta to www --- fec/search/management/data/web_app_pages.json | 44 +++++++++---------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/fec/search/management/data/web_app_pages.json b/fec/search/management/data/web_app_pages.json index d4cc441b22..3068714009 100644 --- a/fec/search/management/data/web_app_pages.json +++ b/fec/search/management/data/web_app_pages.json @@ -2,7 +2,7 @@ { "document_id": "app-1", "title": "Campaign finance data home", - "path": "https://beta.fec.gov/data/", + "path": "https://www.fec.gov/data/", "created": "2017-04-01", "language": "en", "promote": true, @@ -12,7 +12,7 @@ { "document_id": "app-2", "title": "Advanced data", - "path": "https://beta.fec.gov/data/advanced/", + "path": "https://www.fec.gov/data/advanced/", "created": "2017-04-01", "language": "en", "promote": true, @@ -22,7 +22,7 @@ { "document_id": "app-3", "title": "Browse receipts", - "path": "https://beta.fec.gov/data/receipts/", + "path": "https://www.fec.gov/data/receipts/", "created": "2017-04-01", "language": "en", "promote": true, @@ -32,7 +32,7 @@ { "document_id": "app-4", "title": "Browse individual contributions", - "path": "https://beta.fec.gov/data/receipts/individual-contributions", + "path": "https://www.fec.gov/data/receipts/individual-contributions", "created": "2017-04-01", "language": "en", "promote": true, @@ -42,7 +42,7 @@ { "document_id": "app-5", "title": "Browse disbursements", - "path": "https://beta.fec.gov/data/disbursements", + "path": "https://www.fec.gov/data/disbursements", "created": "2017-04-01", "language": "en", "promote": true, @@ -52,7 +52,7 @@ { "document_id": "app-6", "title": "Browse independent expenditures", - "path": "https://beta.fec.gov/data/independent-expenditures/", + "path": "https://www.fec.gov/data/independent-expenditures/", "created": "2017-04-01", "language": "en", "promote": true, @@ -62,7 +62,7 @@ { "document_id": "app-7", "title": "Browse party coordinated expenditures", - "path": "https://beta.fec.gov/data/party-coordinated-expenditures/", + "path": "https://www.fec.gov/data/party-coordinated-expenditures/", "created": "2017-04-01", "language": "en", "promote": true, @@ -72,7 +72,7 @@ { "document_id": "app-8", "title": "Browse electioneering communications", - "path": "https://beta.fec.gov/data/electioneering-communications/", + "path": "https://www.fec.gov/data/electioneering-communications/", "created": "2017-04-01", "language": "en", "promote": true, @@ -82,7 +82,7 @@ { "document_id": "app-9", "title": "Browse communication costs", - "path": "https://beta.fec.gov/data/communication-costs/", + "path": "https://www.fec.gov/data/communication-costs/", "created": "2017-04-01", "language": "en", "promote": true, @@ -92,7 +92,7 @@ { "document_id": "app-10", "title": "Browse loans", - "path": "https://beta.fec.gov/data/loans/", + "path": "https://www.fec.gov/data/loans/", "created": "2017-04-01", "language": "en", "promote": true, @@ -102,7 +102,7 @@ { "document_id": "app-12", "title": "Browse all candidates", - "path": "https://beta.fec.gov/data/candidates/", + "path": "https://www.fec.gov/data/candidates/", "created": "2017-04-01", "language": "en", "promote": true, @@ -112,7 +112,7 @@ { "document_id": "app-13", "title": "Browse candidates for president", - "path": "https://beta.fec.gov/data/candidates/president/", + "path": "https://www.fec.gov/data/candidates/president/", "created": "2017-04-01", "language": "en", "promote": true, @@ -122,7 +122,7 @@ { "document_id": "app-14", "title": "Browse candidates for Senate", - "path": "https://beta.fec.gov/data/candidates/senate/", + "path": "https://www.fec.gov/data/candidates/senate/", "created": "2017-04-01", "language": "en", "promote": true, @@ -132,7 +132,7 @@ { "document_id": "app-15", "title": "Browse candidates for House of Representatives", - "path": "https://beta.fec.gov/data/candidates/house/", + "path": "https://www.fec.gov/data/candidates/house/", "created": "2017-04-01", "language": "en", "promote": true, @@ -142,7 +142,7 @@ { "document_id": "app-16", "title": "Browse all committees", - "path": "https://beta.fec.gov/data/committees/", + "path": "https://www.fec.gov/data/committees/", "created": "2017-04-01", "language": "en", "promote": true, @@ -152,7 +152,7 @@ { "document_id": "app-17", "title": "Browse all filings", - "path": "https://beta.fec.gov/data/filings/", + "path": "https://www.fec.gov/data/filings/", "created": "2017-04-01", "language": "en", "promote": true, @@ -162,7 +162,7 @@ { "document_id": "app-18", "title": "Browse presidential committee reports", - "path": "https://beta.fec.gov/data/reports/presidential/", + "path": "https://www.fec.gov/data/reports/presidential/", "created": "2017-04-01", "language": "en", "promote": true, @@ -172,7 +172,7 @@ { "document_id": "app-19", "title": "Browse House and Senate committee reports", - "path": "https://beta.fec.gov/data/reports/house-senate/", + "path": "https://www.fec.gov/data/reports/house-senate/", "created": "2017-04-01", "language": "en", "promote": true, @@ -182,7 +182,7 @@ { "document_id": "app-20", "title": "Browse PAC and Party committee reports", - "path": "https://beta.fec.gov/data/reports/pac-party/", + "path": "https://www.fec.gov/data/reports/pac-party/", "created": "2017-04-01", "language": "en", "promote": true, @@ -192,7 +192,7 @@ { "document_id": "app-21", "title": "Raising breakdown", - "path": "https://beta.fec.gov/data/raising/", + "path": "https://www.fec.gov/data/raising/", "created": "2017-04-01", "language": "en", "promote": true, @@ -202,7 +202,7 @@ { "document_id": "app-22", "title": "Spending breakdown", - "path": "https://beta.fec.gov/data/spending/", + "path": "https://www.fec.gov/data/spending/", "created": "2017-04-01", "language": "en", "promote": true, @@ -212,7 +212,7 @@ { "document_id": "app-23", "title": "Find candidates and elections by location", - "path": "https://beta.fec.gov/data/elections/", + "path": "https://www.fec.gov/data/elections/", "created": "2017-04-01", "language": "en", "promote": true, From e3fbf0cd603478111cec2184a8eca7e7e7f2f48a Mon Sep 17 00:00:00 2001 From: Pat Phongsvirajati Date: Fri, 18 Oct 2019 10:26:34 -0400 Subject: [PATCH 3/7] sprucing up documentation, removed references to non-existent drawer transition --- fec/search/management/instructions.md | 28 ++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/fec/search/management/instructions.md b/fec/search/management/instructions.md index 88d4ab7f23..f7347dd970 100644 --- a/fec/search/management/instructions.md +++ b/fec/search/management/instructions.md @@ -1,19 +1,18 @@ # DigitalGov search index instructions -The site-wide search of fec.gov uses the General Service Administration's [DigitalGov Search tool](search.digitalgov.gov) for the search engine. We use the [i14y API](https://search.digitalgov.gov/developer/i14y.html) for maintaining the search index, which feeds into ElasticSearch. +The site-wide search of fec.gov uses the General Service Administration's [Search.gov tool](https://search.gov/) for the search engine. We use the [i14y API](https://search.gov/developer/i14y.html) for maintaining the search index, which feeds into ElasticSearch. For more information about i14Y, you can read the [technical documentation here](http://gsa.github.io/slate/). These instructions are for explaining how to manually update the index. ## Getting set up The DigitalGov Search "site" we use is called `betafec_api` (though that can change). If you're trying to access the admin panel, you will need someone to add you as a contributor to that site. This is where all the admin panel controls and analytics live. -The i14y search works by setting up one or more "drawers", which are basically collections of pages for the index. All drawers serve the same search. We have two drawers set up: `main`, which includes all CMS and web app pages, and `transition` which includes all transition.fec.gov pages. +The i14y search works by setting up one or more "drawers", which are basically collections of pages for the index. All drawers serve the same search. We have one drawer set up: `main`, which includes all CMS, web app pages, and transition.fec.gov pages. -Each drawer has its own key, so to push updates to them you will need to add the drawers' keys to your local env: +The main drawer has its own key, so to push updates to it you will need to add the drawer key to your local env: ``` export DIGITALGOV_DRAWER_KEY_MAIN=
-export DIGTIALGOV_DRAWER_KEY_TRANSITION: ``` i14y accepts HTTP requests (POST / PUT / DELETE) with data structured like: @@ -33,9 +32,9 @@ i14y accepts HTTP requests (POST / PUT / DELETE) with data structured like: } ``` -You must pass either a `description` or `content` when adding a new document. The search engine searches the `title`, `description`, `content` and `tags`, but only matches in the `content` or `title` are actually displayed in the search results. +It is recommended to pass `description` or `content` when adding a new document. The search engine searches the `title`, `description`, `content` and `tags`, but only matches in the `content` or `title` are actually displayed in the search results. -You can push updates manually with cURL, but for convenience we've set up a few Django management commands that make things a little simpler, which are detailed below. +You can push updates manually with `cURL`, but for convenience we've set up a few Django management commands that make things a little simpler, which are detailed below. ## Adding CMS pages to the index The search indexes are not automatically updated when content changes on the site, so we need to run a manual script to update the indexes. @@ -63,7 +62,7 @@ The search indexes are not automatically updated when content changes on the sit 3. **Push the indexes to i14y** Run `fec/manage.py index_pages`. This will take each item in `output.json` and attempt a POST request to i14Y. If there is not already a page in the index with the same `document_id`, it will add it. If a page with the same `document_id` is already there, it will update it with whatever data is in this version. -Once `index_pages` has run, you can log in to search.digitalgov.gov and see the new pages under "Content" > "i14Y Drawers" > "Main". +Once `index_pages` has run, you can log in to search.gov and see the new pages under "Content" > "i14Y Drawers" > click "show" for the Main drawer. Once the data is there, it will work when running a search on the site. @@ -101,9 +100,16 @@ Similar to adding web app pages, transition pages need to be manually identified Optionally, you can add `description`, `tags`, or `promoted` fields. 2. **Scrape the content:** Run `fec/manage.py scrape_transition_pages`. This script will read `data/transition_pages.json` and call each URL and scrape the content in the `#fec_mainContent` or `#fec_mainContentWide` `
`s. Optionally, you could pass in a different path to a JSON file with the optional `--path_to_json` argument. This script will output the results to `output.json`. It's generally a good idea to read over this file and make sure things look right. -3. **Index the pages:** Run `fec/manage.py index_pages -transition`. This command works the same as it does for adding CMS pages, but with the `-transition` flag it will put them in the transition drawer. +3. **Index the pages:** Run `fec/manage.py index_pages`. This command works the same as it does for adding CMS pages. -## Additional DigitalGov configuration -**Best bets:** of the really great features of DigitalGov Search is what's called "Best bets". These are basically search suggestions that you can manually add (or add in bulk by uploading a spreadsheet) which map a URL to a specific set of keywords. Any Best Bet will be returned at the top of the search results. +## Additional Search.gov configuration +**Best bets:** of the really great features of Search.gov is what's called "Best bets". These are basically search suggestions that you can manually add (or add in bulk by uploading a spreadsheet) which map a URL to a specific set of keywords. Any Best Bet will be returned at the top of the search results. -**Deleting pages:** To remove pages from the index, you'll need to make a DELETE request with the `document_id` you want to delete. [More info in the docs](http://gsa.github.io/slate/#delete-a-document). +**Deleting pages:** To remove pages from the index, you'll need to make a DELETE request with the `document_id` you want to delete. The `document_id` can be found within the following files: output.json, web_app_pages.json, and transition_pages.json. The IDs within each json file are unique based on type. + +This is done through a curl: + +```curl "https://i14y.usa.gov/api/v1/documents/{document_id}" -XDELETE -u main:$DIGITALGOV_DRAWER_KEY_MAIN +``` + +[More info in the docs](http://gsa.github.io/slate/#delete-a-document). From 990739bb0b39fac386520997100bb816aa4bc727 Mon Sep 17 00:00:00 2001 From: Pat Phongsvirajati Date: Fri, 18 Oct 2019 10:27:15 -0400 Subject: [PATCH 4/7] removed transition drawer credentials option since it's never used or needed --- fec/search/management/commands/index_pages.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fec/search/management/commands/index_pages.py b/fec/search/management/commands/index_pages.py index fb730e064a..54bbaea7dd 100644 --- a/fec/search/management/commands/index_pages.py +++ b/fec/search/management/commands/index_pages.py @@ -32,11 +32,6 @@ def add_arguments(self, parser): def handle(self, *args, **options): self.stdout.write(self.style.WARNING('Indexing pages')) - # If we're putting in the transition drawer, use those creds - if options['transition']: - drawer = 'transition' - key = DIGITALGOV_DRAWER_KEY_TRANSITION - if options['json_file_path']: file_name = options['json_file_path'] else: From 97953e0fe64e963db3930b9c6e22dc2bcd43ea07 Mon Sep 17 00:00:00 2001 From: Pat Phongsvirajati Date: Fri, 18 Oct 2019 11:24:28 -0400 Subject: [PATCH 5/7] removing transition drawer argument as it's not used --- fec/search/management/commands/index_pages.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fec/search/management/commands/index_pages.py b/fec/search/management/commands/index_pages.py index 54bbaea7dd..badd814bf4 100644 --- a/fec/search/management/commands/index_pages.py +++ b/fec/search/management/commands/index_pages.py @@ -23,12 +23,6 @@ def add_arguments(self, parser): help='Path to JSON file to load' ) - parser.add_argument( - '-transition', - action='store_true', - help="Add this flag to add to the transition drawer" - ) - def handle(self, *args, **options): self.stdout.write(self.style.WARNING('Indexing pages')) From 15ed52a8e6893921be9232cb26154c501953b523 Mon Sep 17 00:00:00 2001 From: Pat Phongsvirajati Date: Fri, 18 Oct 2019 11:26:40 -0400 Subject: [PATCH 6/7] removing transition drawer var --- fec/search/management/commands/index_pages.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/fec/search/management/commands/index_pages.py b/fec/search/management/commands/index_pages.py index badd814bf4..f6b64aee4d 100644 --- a/fec/search/management/commands/index_pages.py +++ b/fec/search/management/commands/index_pages.py @@ -8,8 +8,6 @@ from home.models import Page - -DIGITALGOV_DRAWER_KEY_TRANSITION = settings.FEC_DIGITALGOV_DRAWER_KEY_TRANSITION drawer = settings.DIGITALGOV_DRAWER_HANDLE key = settings.FEC_DIGITALGOV_DRAWER_KEY_MAIN From c5742ee16db0cd503076b95a0467649cf9fc4db8 Mon Sep 17 00:00:00 2001 From: Pat Phongsvirajati Date: Fri, 18 Oct 2019 15:04:46 -0400 Subject: [PATCH 7/7] add /introduction-campaign-finance/ and it's descendants to the CMS scraper --- fec/fec/constants.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fec/fec/constants.py b/fec/fec/constants.py index 452b605c1e..8674b1bc87 100644 --- a/fec/fec/constants.py +++ b/fec/fec/constants.py @@ -213,7 +213,8 @@ SEARCH_DESCENDANTS_OF = [ '/home/legal-resources/', '/home/help-candidates-and-committees/', - '/home/press/' + '/home/press/', + '/home/introduction-campaign-finance/' ] # These are the parent pages for which we want *only* direct children