diff --git a/feeder/feeder.py b/feeder/feeder.py index 4df32af3d7..dab1b5ec0a 100644 --- a/feeder/feeder.py +++ b/feeder/feeder.py @@ -177,6 +177,20 @@ # Set up requests session = requests.session() +REQUIRED_PRODUCTION_FIELDS = [ + 'coal' +] +def validate_production(obj, country_code): + if not 'datetime' in obj: + raise Exception('datetime was not returned for %s' % country_code) + if obj.get('countryCode', None) != country_code: + raise Exception("Country codes %s and %s don't match" % (obj.get('countryCode', None), country_code)) + if arrow.get(obj['datetime']) > arrow.now(): + raise Exception("Data from %s can't be in the future" % country_code) + for key in REQUIRED_PRODUCTION_FIELDS: + if not key in obj.get('production', {}): + raise Exception("Production key %s is required for %s" % (key, country_code)) + def db_upsert(col, obj, database_key): try: createdAt = arrow.now().datetime @@ -209,12 +223,7 @@ def fetch_productions(): with statsd.StatsdTimer('fetch_one_production'): obj = parser(country_code, session) if not obj: continue - if not 'datetime' in obj: - raise Exception('datetime was not returned for %s' % country_code) - if obj.get('countryCode', None) != country_code: - raise Exception("Country codes %s and %s don't match" % (obj.get('countryCode', None), country_code)) - if arrow.get(obj['datetime']) > arrow.now(): - raise Exception("Data from %s can't be in the future" % country_code) + validate_production(obj, country_code) # Data quality check for k, v in obj['production'].iteritems(): if v is None: continue diff --git a/feeder/migrate_db.py b/feeder/migrate_db.py index c57f5f6f4c..8b9851baf8 100644 --- a/feeder/migrate_db.py +++ b/feeder/migrate_db.py @@ -1,4 +1,5 @@ import pymongo +from feeder import validate_production def migrate(db): print 'Starting data migration..' @@ -28,4 +29,10 @@ def migrate(db): except pymongo.errors.DuplicateKeyError: pass # Delete in old collection col_old.remove({'_id': row['_id']}) + # ** Validate production data + for row in col_production.find(): + try: validate_production(row, row.get('countryCode', None)) + except: + print 'Warning: row %s did not pass validation' % row['_id'] + print row print 'Migration done.'