From a9490f676f78166776f27f82e6e3e92d6a16e549 Mon Sep 17 00:00:00 2001 From: Jehangir Amjad Date: Thu, 1 Feb 2024 06:21:16 -0800 Subject: [PATCH 1/2] Multiprocessing fork bug fix --- scripts/us_usda/quickstats/process.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/us_usda/quickstats/process.py b/scripts/us_usda/quickstats/process.py index 23479bca61..0681f9c576 100644 --- a/scripts/us_usda/quickstats/process.py +++ b/scripts/us_usda/quickstats/process.py @@ -28,6 +28,7 @@ from itertools import repeat import json import multiprocessing +from multiprocessing import get_context import os import sys @@ -93,7 +94,7 @@ def process_survey_data(year, svs, out_dir, usda_api_key): pool_size = max(2, multiprocessing.cpu_count() - 1) - with multiprocessing.Pool(pool_size) as pool: + with get_context("spawn").Pool(pool_size) as pool: pool.starmap( fetch_and_write, zip(county_names, repeat(year), repeat(svs), repeat(out_dir), @@ -201,8 +202,8 @@ def get_survey_county_data(year, county, out_dir, usda_api_key): print('# api records for', county, '=', len(response['data'])) return response - -@limits(calls=10, period=60) +# TODO: determine if the rate limiter needs to be re-enabled. +# @limits(calls=10, period=60) def get_data(params): return requests.get(f'{API_BASE}/api_GET', params=params).json() From 5b566936e04a2b6cc44ab7494361754627a798e1 Mon Sep 17 00:00:00 2001 From: Jehangir Amjad Date: Thu, 1 Feb 2024 07:28:35 -0800 Subject: [PATCH 2/2] lint fixes --- scripts/us_usda/quickstats/process.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/us_usda/quickstats/process.py b/scripts/us_usda/quickstats/process.py index 0681f9c576..e8312452ca 100644 --- a/scripts/us_usda/quickstats/process.py +++ b/scripts/us_usda/quickstats/process.py @@ -202,6 +202,7 @@ def get_survey_county_data(year, county, out_dir, usda_api_key): print('# api records for', county, '=', len(response['data'])) return response + # TODO: determine if the rate limiter needs to be re-enabled. # @limits(calls=10, period=60) def get_data(params):