Skip to content

Commit

Permalink
Improved auto-population script (#1679)
Browse files Browse the repository at this point in the history
* Faster data loading scripts

- Group related database updates into commits
- Create quick-populate script to handle loading a basic database

* Add secondary devcontainer

- allow prebuilt codespace to have loaded data
  • Loading branch information
ajparsons authored Mar 9, 2023
1 parent 1bef4cb commit 32882e6
Show file tree
Hide file tree
Showing 7 changed files with 102 additions and 2 deletions.
21 changes: 21 additions & 0 deletions .devcontainer/auto-quick-setup/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"name": "theyworkforyou",

"dockerComposeFile": "../../docker-compose.yml",
"service": "twfy",
"workspaceFolder": "/twfy",
"initializeCommand": [
".devcontainer/initializeCommand"
],
"onCreateCommand": "script/quick-populate",
"portsAttributes": {"8000": {"label": "TWFY"}},
"forwardPorts": [8000],

"extensions": [
"ms-vscode.test-adapter-converter",
"ms-azuretools.vscode-docker",
"bmewburn.vscode-intelephense-client",
"felixfbecker.php-debug",
"recca0120.vscode-phpunit"
]
}
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ Start a new codespace on Github by selecting the Code dropdown (top right), and

This will setup the Docker container and environment. Once finished, the link to the site should be avaliable in the ports tab of the terminal panel.

To populate with a minimal amount of data, run `scripts/quick-populate` (about 1 hour).

### DEPRECATED: Developing with Vagrant

Please note that we are not currently supporting the Vagrant environment, and may remove it
Expand All @@ -66,6 +68,7 @@ You will need the latest versions of VirtualBox and Vagrant, then:

See INSTALL.md for instructions on downloading and importing Parlparse data (members, debates, votes, etc).


#### Compiling Static Assets

If you're working on a page which uses the redesign, you will need to compile
Expand Down
3 changes: 3 additions & 0 deletions classes/PartyCohort.php
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,7 @@ public static function getHashforPerson($person_id){
public static function calculatePositions($quiet=true){
// get current hashes available
$cohorts = PartyCohort::getCohorts();
$db = new \ParlDB;
$policies = new Policies;
$n_cohorts = count($cohorts);

Expand All @@ -448,9 +449,11 @@ public static function calculatePositions($quiet=true){

$cohort_count++;

$db->conn->beginTransaction();
foreach ( $positions as $position ) {
$cohort->cache_position( $position );
}
$db->conn->commit();
if (!$quiet) {
print("$cohort_count/$n_cohorts\n");
}
Expand Down
23 changes: 22 additions & 1 deletion scripts/json2db.pl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,14 @@

my $parldata = mySociety::Config::get('RAWDATA');

my $verbose = 0;
for( @ARGV ){
if( $_ eq "--verbose" ){
$verbose = 1;
last;
}
}

use DBI;
use File::Slurp::Unicode;
use JSON::XS;
Expand Down Expand Up @@ -67,12 +75,18 @@

$policy_count++;

if ($verbose){
print("processing motions for $dreamid\n");
}
process_motions($policy, $dreamid);
}

# And recently changed ones
my $policy_file = $motionsdir . "recently-changed-divisions.json";
if (-f $policy_file) {
if ($verbose){
print("processing recently changed divisions\n");
}
my $policy_json = read_file($policy_file);
my $policy = $json->decode($policy_json);
process_motions($policy);
Expand All @@ -82,9 +96,13 @@

sub process_motions {
my ($policy, $dreamid) = @_;

# Set AutoCommit off
$dbh->{AutoCommit} = 0;
for my $motion ( @{ $policy->{aspects} } ) {
$motion_count++;
if ($verbose && $motion_count % 10 == 0){
print("$motion_count\n");
};
my ($motion_num) = $motion->{motion}->{id} =~ /pw-\d+-\d+-\d+-(\d+)/;
my ($house) = $motion->{motion}->{organization_id} =~ /uk\.parliament\.(\w+)/;

Expand Down Expand Up @@ -249,4 +267,7 @@ sub process_motions {
}

}
$dbh->commit();
# Set AutoCommit on
$dbh->{AutoCommit} = 1;
}
11 changes: 10 additions & 1 deletion scripts/load-people
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ verbose("End");

# ---

my ($dbh, $memberadd, $memberexist, $membercheck, $nameadd, $nameupdate, $namefetch, $namedelete, $gradd, $grdelete);
my ($dbh, $memberadd, $memberexist, $membercheck, $nameadd, $nameupdate, $namefetch, $namedelete, $gradd, $grdelete, $start_transaction, $query_commit, );

sub db_connect {
#DBI->trace(1);
Expand Down Expand Up @@ -139,13 +139,18 @@ sub load_constituencies {
}

sub load_people {

# disable autocommit to load multiple people and memberships at once
$dbh->{AutoCommit} = 0;

my $j = decode_json(read_file($pwmembers . 'people.json'));
foreach (@{$j->{organizations}}) {
$organizations{$_->{id}} = $_;
}
foreach (@{$j->{posts}}) {
$posts{$_->{id}} = $_;
}
$dbh->commit();
foreach (@{$j->{memberships}}) {
if ($_->{redirect}) {
$gradd->execute($_->{id}, $_->{redirect});
Expand All @@ -160,6 +165,10 @@ sub load_people {
load_names($_);
}
}
$dbh->commit();

$dbh->{AutoCommit} = 1;

}

my %member_ids = ();
Expand Down
11 changes: 11 additions & 0 deletions scripts/mpinfoin.pl
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,10 @@
my $personinfoupdate = $dbh->prepare('update personinfo set data_value=? where person_id=? and data_key=?');
my $consinfoadd = $dbh->prepare("insert into consinfo (constituency, data_key, data_value) values (?, ?, ?) on duplicate key update data_value=?");


# Set AutoCommit off
$dbh->{AutoCommit} = 0;

# Write to database - members
foreach my $mp_id (keys %$memberinfohash) {
(my $mp_id_num = $mp_id) =~ s#uk.org.publicwhip/(member|lord)/##;
Expand All @@ -172,6 +176,7 @@
}
}
}
$dbh->commit();

# Write to database - people
foreach my $person_id (keys %$personinfohash) {
Expand All @@ -187,6 +192,7 @@
}
}
}
$dbh->commit();

# Write to database - cons
foreach my $constituency (keys %$consinfohash) {
Expand All @@ -196,6 +202,11 @@
$consinfoadd->execute($constituency, $key, $value, $value);
}
}
$dbh->commit();


# Set AutoCommit on
$dbh->{AutoCommit} = 1;

# just temporary to check cron working
# print "mpinfoin done\n";
Expand Down
32 changes: 32 additions & 0 deletions scripts/quick-populate
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/bin/bash
# For a fresh install get a basic set of data in

echo 'Cloning parlparse'
git clone https://github.com/mysociety/parlparse data/parlparse --depth 1

# See http://parser.theyworkforyou.com/hansard.html for details on download specific ranges
echo 'Downloading minimal data'
rsync -az --progress --exclude '.svn' --exclude 'tmp/' --relative data.theyworkforyou.com::parldata/scrapedxml/debates/debates2022-01* data/pwdata
rsync -az --progress --exclude '.svn' --exclude 'tmp/' --relative data.theyworkforyou.com::parldata/scrapedjson data/pwdata
rsync -az --progress --exclude '.svn' --exclude 'tmp/' --relative data.theyworkforyou.com::parldata/people.json data/pwdata
rsync -az --progress --exclude '.svn' --exclude 'tmp/' --relative data.theyworkforyou.com::parldata/scrapedxml/divisionsonly/divisions2022-01* data/pwdata
rsync -az --progress --exclude '.svn' --exclude 'tmp/' --relative data.theyworkforyou.com::parldata/scrapedxml/regmem data/pwdata

echo 'Load people into database'
scripts/load-people

echo 'Load divisions/policies from json'
scripts/json2db.pl --verbose

echo 'Load Jan 2022 debates and divisions'
scripts/xml2db.pl --debates --all

# this only imports public whip, run with different arguments to pull in members expenses
echo 'Importing MP extra info for voting comparison'
scripts/mpinfoin.pl publicwhip

echo 'Generate party positions on issues'
php scripts/generate_party_positions.php

echo 'Generate search index'
search/index.pl all

0 comments on commit 32882e6

Please sign in to comment.