From 3e16d8a38f0b925a835a1065968f581266d4f3da Mon Sep 17 00:00:00 2001 From: Kuru S Date: Fri, 13 Dec 2024 08:26:42 +0000 Subject: [PATCH] us_hud_income 20241213 changes --- scripts/us_hud/income/__init__.py | 0 scripts/us_hud/income/match_bq.csv | 192 +----------------- scripts/us_hud/income/process.py | 31 +-- scripts/us_hud/income/process_test.py | 60 +++--- .../income/testdata/Section8-FY2006.xls | Bin 0 -> 26112 bytes scripts/us_hud/income/testdata/__init__.py | 0 .../income/testdata/expected_output.csv | 50 +++++ .../income/testdata/expected_output_2006.csv | 2 - .../income/testdata/output/output_test.csv | 50 +++++ .../us_hud/income/testdata/output_2006.csv | 2 - .../income/testdata/test_input_2006.csv | 2 - 11 files changed, 156 insertions(+), 233 deletions(-) delete mode 100644 scripts/us_hud/income/__init__.py create mode 100644 scripts/us_hud/income/testdata/Section8-FY2006.xls delete mode 100644 scripts/us_hud/income/testdata/__init__.py create mode 100644 scripts/us_hud/income/testdata/expected_output.csv delete mode 100644 scripts/us_hud/income/testdata/expected_output_2006.csv create mode 100644 scripts/us_hud/income/testdata/output/output_test.csv delete mode 100644 scripts/us_hud/income/testdata/output_2006.csv delete mode 100644 scripts/us_hud/income/testdata/test_input_2006.csv diff --git a/scripts/us_hud/income/__init__.py b/scripts/us_hud/income/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/scripts/us_hud/income/match_bq.csv b/scripts/us_hud/income/match_bq.csv index 20e0286b24..1433df8eb1 100644 --- a/scripts/us_hud/income/match_bq.csv +++ b/scripts/us_hud/income/match_bq.csv @@ -1,189 +1,3 @@ -fips,city -geoId/02110,geoId/0236400 -geoId/02220,geoId/0270540 -geoId/02275,geoId/0286380 -geoId/0900108070,geoId/0908000 -geoId/0900118500,geoId/0918430 -geoId/0900156060,geoId/0955990 -geoId/0900168170,geoId/0968100 -geoId/0900173070,geoId/0973000 -geoId/0900174190,geoId/0974260 -geoId/0900308490,geoId/0908420 -geoId/0900322630,geoId/0922700 -geoId/0900337070,geoId/0937000 -geoId/0900350440,geoId/0950370 -geoId/0900382590,geoId/0982660 -geoId/0900576570,geoId/0976500 -geoId/0900747360,geoId/0947290 -geoId/0900901220,geoId/0901150 -geoId/0900919550,geoId/0919480 -geoId/0900946520,geoId/0946450 -geoId/0900947535,geoId/0947515 -geoId/0900949950,geoId/0949880 -geoId/0900952070,geoId/0952000 -geoId/0900980070,geoId/0980000 -geoId/0900982870,geoId/0982800 -geoId/0901152350,geoId/0952280 -geoId/0901156270,geoId/0956200 -geoId/2300102060,geoId/2302060 -geoId/2300138740,geoId/2338740 -geoId/2300310565,geoId/2310565 -geoId/2300360825,geoId/2360825 -geoId/2300560545,geoId/2360545 -geoId/2300571990,geoId/2371990 -geoId/2300582105,geoId/2382105 -geoId/2300923200,geoId/2323200 -geoId/2301102100,geoId/2302100 -geoId/2301127085,geoId/2327085 -geoId/2301130550,geoId/2330550 -geoId/2301180740,geoId/2380740 -geoId/2301363590,geoId/2363590 -geoId/2301902795,geoId/2302795 -geoId/2301906925,geoId/2306925 -geoId/2301955225,geoId/2355225 -geoId/2302303355,geoId/2303355 -geoId/2302703950,geoId/2303950 -geoId/2302909585,geoId/2309585 -geoId/2302921730,geoId/2321730 -geoId/2303104860,geoId/2304860 -geoId/2303164675,geoId/2364675 -geoId/2303165725,geoId/2365725 -geoId/24510,geoId/2404000 -geoId/2500346225,geoId/2546225 -geoId/2500353960,geoId/2553960 -geoId/2500502690,geoId/2502690 -geoId/2500523000,geoId/2523000 -geoId/2500545000,geoId/2545000 -geoId/2500562430,geoId/2562465 -geoId/2500569170,geoId/2569170 -geoId/2500905595,geoId/2505595 -geoId/2500916250,geoId/2516285 -geoId/2500926150,geoId/2526150 -geoId/2500929405,geoId/2529405 -geoId/2500934550,geoId/2534550 -geoId/2500937490,geoId/2537490 -geoId/2500938400,geoId/2538435 -geoId/2500943580,geoId/2543615 -geoId/2500945245,geoId/2545245 -geoId/2500952490,geoId/2552490 -geoId/2500959105,geoId/2559105 -geoId/2500960015,geoId/2560050 -geoId/2500968645,geoId/2568680 -geoId/2501313660,geoId/2513660 -geoId/2501330840,geoId/2530840 -geoId/2501336300,geoId/2536335 -geoId/2501352144,geoId/2552144 -geoId/2501367000,geoId/2567000 -geoId/2501376030,geoId/2576030 -geoId/2501546330,geoId/2546330 -geoId/2501701605,geoId/2501640 -geoId/2501705070,geoId/2505105 -geoId/2501709840,geoId/2509875 -geoId/2501711000,geoId/2511000 -geoId/2501721990,geoId/2521990 -geoId/2501724960,geoId/2524960 -geoId/2501735215,geoId/2535250 -geoId/2501737000,geoId/2537000 -geoId/2501737875,geoId/2537875 -geoId/2501738715,geoId/2538715 -geoId/2501739625,geoId/2539660 -geoId/2501739835,geoId/2539835 -geoId/2501740115,geoId/2540115 -geoId/2501745560,geoId/2545560 -geoId/2501756130,geoId/2556165 -geoId/2501762535,geoId/2562535 -geoId/2501767665,geoId/2567700 -geoId/2501772215,geoId/2572250 -geoId/2501772600,geoId/2572600 -geoId/2501780510,geoId/2580545 -geoId/2501781035,geoId/2581035 -geoId/2502109175,geoId/2509210 -geoId/2502130455,geoId/2530420 -geoId/2502141690,geoId/2541725 -geoId/2502144105,geoId/2544140 -geoId/2502150250,geoId/2550285 -geoId/2502155745,geoId/2555745 -geoId/2502155955,geoId/2555990 -geoId/2502174175,geoId/2574210 -geoId/2502178972,geoId/2578972 -geoId/2502300170,geoId/2500135 -geoId/2502309000,geoId/2509000 -geoId/2502331645,geoId/2531680 -geoId/2502507000,geoId/2507000 -geoId/2502513205,geoId/2513205 -geoId/2502556585,geoId/2556585 -geoId/2502581005,geoId/2581005 -geoId/2502723875,geoId/2523875 -geoId/2502725485,geoId/2525485 -geoId/2502735075,geoId/2535075 -geoId/2502763345,geoId/2563345 -geoId/2502782000,geoId/2582000 -geoId/29510,geoId/2965000 -geoId/32510,geoId/3209700 -geoId/3300140180,geoId/3340180 -geoId/3300539300,geoId/3339300 -geoId/3300705140,geoId/3305140 -geoId/3300941300,geoId/3341300 -geoId/3301145140,geoId/3345140 -geoId/3301150260,geoId/3350260 -geoId/3301314200,geoId/3314200 -geoId/3301327380,geoId/3327380 -geoId/3301562900,geoId/3362900 -geoId/3301718820,geoId/3318820 -geoId/3301765140,geoId/3365140 -geoId/3301769940,geoId/3369940 -geoId/3301912900,geoId/3312900 -geoId/4400374300,geoId/4474300 -geoId/4400549960,geoId/4449960 -geoId/4400714140,geoId/4414140 -geoId/4400719180,geoId/4419180 -geoId/4400722960,geoId/4422960 -geoId/4400754640,geoId/4454640 -geoId/4400759000,geoId/4459000 -geoId/4400780780,geoId/4480780 -geoId/5000174650,geoId/5074650 -geoId/5000710675,geoId/5010675 -geoId/5000766175,geoId/5066175 -geoId/5000785150,geoId/5085150 -geoId/5001161675,geoId/5061675 -geoId/5001948850,geoId/5048850 -geoId/5002161225,geoId/5061225 -geoId/5002303175,geoId/5003175 -geoId/5002346000,geoId/5046000 -geoId/51510,geoId/5101000 -geoId/51520,geoId/5109816 -geoId/51530,geoId/5111032 -geoId/51550,geoId/5116000 -geoId/51570,geoId/5118448 -geoId/51580,geoId/5119728 -geoId/51590,geoId/5121344 -geoId/51595,geoId/5125808 -geoId/51600,geoId/5126496 -geoId/51610,geoId/5127200 -geoId/51620,geoId/5129600 -geoId/51630,geoId/5129744 -geoId/51640,geoId/5130208 -geoId/51650,geoId/5135000 -geoId/51660,geoId/5135624 -geoId/51670,geoId/5138424 -geoId/51678,geoId/5145512 -geoId/51680,geoId/5147672 -geoId/51683,geoId/5148952 -geoId/51685,geoId/5148968 -geoId/51690,geoId/5149784 -geoId/51700,geoId/5156000 -geoId/51710,geoId/5157000 -geoId/51720,geoId/5157688 -geoId/51730,geoId/5161832 -geoId/51735,geoId/5163768 -geoId/51740,geoId/5164000 -geoId/51750,geoId/5165392 -geoId/51760,geoId/5167000 -geoId/51770,geoId/5168000 -geoId/51775,geoId/5170000 -geoId/51790,geoId/5175216 -geoId/51800,geoId/5176432 -geoId/51810,geoId/5182000 -geoId/51820,geoId/5183680 -geoId/51830,geoId/5186160 -geoId/51840,geoId/5186720 +fips,city +1001,City1 +1002,City2 diff --git a/scripts/us_hud/income/process.py b/scripts/us_hud/income/process.py index ed31d90232..1b8f81a0fc 100644 --- a/scripts/us_hud/income/process.py +++ b/scripts/us_hud/income/process.py @@ -19,7 +19,6 @@ Usage: python3 process.py ''' - import csv import datetime import os @@ -97,18 +96,18 @@ def compute_150(df, person): lambda x: round(x[f'l80_{person}'] / 80 * 150), axis=1) -def process(year, matches, output_data, input_folder): +def process(year, matches, input_folder): '''Generate cleaned data and accumulate it in output_data.''' url = get_url(year) if year == 2023 or year == 2024: try: filename = f"Section8-FY{year}.xlsx" - download_file(url, filename, input_folder) + # Read the Excel file and process the generator output with open(os.path.join(input_folder, filename), 'rb') as f: rows = iter_excel_calamine(f) - data = [row for row in rows] - df = pd.DataFrame(data) + data = list(rows) # Convert the generator to a list of rows + df = pd.DataFrame(data) # Now create the DataFrame except Exception as e: logging.fatal(f'Error in the process method : {year}: {url} {e}.') return @@ -116,7 +115,6 @@ def process(year, matches, output_data, input_folder): # For other years, download via URL try: filename = f"Section8-FY{year}.xls" - download_file(url, filename, input_folder) df = pd.read_excel(os.path.join(input_folder, filename)) except Exception as e: logging.fatal(f'Error in the process method : {url} {e}.') @@ -133,7 +131,7 @@ def process(year, matches, output_data, input_folder): ]] # Format FIPS codes - df['fips'] = df.apply(lambda x: 'dcs:geoId/' + str(x['fips']).zfill(10), + df['fips'] = df.apply(lambda x: 'dcs:geoId/' + str(x['fips']).zfill(5), axis=1) df['fips'] = df.apply(lambda x: x['fips'][:-5] if x['fips'][-5:] == '99999' else x['fips'], @@ -151,9 +149,7 @@ def process(year, matches, output_data, input_folder): if not df_match.empty: df_match['fips'] = df_match.apply(lambda x: matches[x['fips']], axis=1) df = pd.concat([df, df_match]) - - # Append this year's data to the output_data list - output_data.append(df) + return df def main(argv): @@ -165,6 +161,7 @@ def main(argv): # Ensure the output directory exists if not os.path.exists(FLAGS.income_output_dir): os.makedirs(FLAGS.income_output_dir) + today = datetime.date.today() # List to accumulate all data @@ -173,10 +170,18 @@ def main(argv): # Define input folder for downloaded files input_folder = 'input' - # Process data for years 2006 to the current year + # First, download all files for years 2006 to current year + for year in range(2006, today.year + 1): + url = get_url(year) + if url: + filename = f"Section8-FY{year}.xlsx" if year >= 2016 else f"Section8-FY{year}.xls" + download_file(url, filename, input_folder) + + # Now process the data after all files are downloaded for year in range(2006, today.year + 1): - print(year) - process(year, matches, output_data, input_folder) + print(f"Processing data for year: {year}") + df = process(year, matches, input_folder) + output_data.append(df) # Concatenate all DataFrames in output_data into one single DataFrame final_df = pd.concat(output_data, ignore_index=True) diff --git a/scripts/us_hud/income/process_test.py b/scripts/us_hud/income/process_test.py index 6a2e68f13d..879e7f6763 100644 --- a/scripts/us_hud/income/process_test.py +++ b/scripts/us_hud/income/process_test.py @@ -1,4 +1,4 @@ -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,45 +11,55 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -'''Tests for process.py. -Usage: python3 -m unittest discover -v -s ../ -p "process_test.py" -''' import os -import pandas as pd -import sys import unittest -from unittest.mock import patch +import filecmp + +# Set the absolute path for the test data directory +TEST_DIR = '/usr/local/google/home/rbhande/Documents/income/data-master/scripts/us_hud/income/testdata' +OUTPUT_DIR = '/usr/local/google/home/rbhande/Documents/income/data-master/scripts/us_hud/income/testdata/output' # Directory to save output + +# Ensure the module is loaded correctly +import sys sys.path.append( os.path.dirname(os.path.dirname(os.path.dirname( os.path.abspath(__file__))))) -from us_hud.income import process - -module_dir_ = os.path.dirname(__file__) -TEST_DIR = os.path.join(module_dir_, 'testdata') +from us_hud.income import process class ProcessTest(unittest.TestCase): def test_get_url(self): + """Test the get_url function and check if it returns the correct URL for the given year.""" + year = 2022 + print(f"Checking URL for year: {year}") self.assertEqual( - process.get_url(2022), + process.get_url(year), 'https://www.huduser.gov/portal/datasets/il/il22/Section8-FY22.xlsx' ) - self.assertEqual(process.get_url(1997), '') - - def test_compute_150(self): - pass + print(f"Success! Correct URL for year {year}.") + year = 1997 + print(f"Checking URL for year: {year}") + self.assertEqual(process.get_url(year), '') - @patch('pandas.read_excel') - def test_process(self, mock_df): - mock_df.return_value = pd.DataFrame( - pd.read_csv(os.path.join(TEST_DIR, 'test_input_2006.csv'))) + def test_process_with_dynamic_csv(self): matches = {'dcs:geoId/02110': 'dcs:geoId/0236400'} - process.process(2006, matches, TEST_DIR) - with open(os.path.join(TEST_DIR, 'output_2006.csv')) as result: - with open(os.path.join(TEST_DIR, - 'expected_output_2006.csv')) as expected: - self.assertEqual(result.read(), expected.read()) + output_data = [] + input_folder = TEST_DIR + + if not os.path.exists(OUTPUT_DIR): + os.makedirs(OUTPUT_DIR) + print("Calling process() function...") + df = process.process(2006, matches, input_folder) + df.to_csv(os.path.join(OUTPUT_DIR, "output_test.csv"), index=False) + same = filecmp.cmp(os.path.join(OUTPUT_DIR, "output_test.csv"), + os.path.join(TEST_DIR, "expected_output.csv")) + # Assert that the files are identical + self.assertTrue(same) + + +if __name__ == '__main__': + unittest.main() diff --git a/scripts/us_hud/income/testdata/Section8-FY2006.xls b/scripts/us_hud/income/testdata/Section8-FY2006.xls new file mode 100644 index 0000000000000000000000000000000000000000..f4bb143a443b6bdb80bd43af6c5d624893595ae9 GIT binary patch literal 26112 zcmeI43wRw>wZ|tX@3cviv}x0{O_Q`u+B9jClUGv(+N3QdG!)bFs%SkYJ#CYo=HN*J zwDKrX)LTWR;Dd-fBI>m$YE;nhDp6630!Bdz_af%{xCTEcxhRqg^#1>A&+IvSnnJ-V zU*pGVXU(j&XYbkjx7U8m?3q0JNY>N0ek<>Hmgn~lE8cqbXsVSIV~@jeg`;O$)-oK^ z_Uh52M;$W`-~LS^0=Hn@P5g)JM~ zbZj%Q1N*482SiW&HxnKcTc8t(8QWi+$oa2# z&fC{_3A`RFhR5%KI@wa4?tSh2(}BYi`X|b2EwfcZ`H3s z@(_q)9u8rUIR!^?){U@cSleOx2K0XLeW6@Y3WCAv;KOtKyszTp#s{OQL(?^1iiMM^d>t0cy^{b(fR+n_S?X; zfW@0A8e$~bFfbHulQ$0qO&hH&g%Xgv^@g3YovUlV6NV{PHg;7E%^Ogh3D z0J{$Q8`F>7s;ntTn8EvT?G)>(ILtIs4x!~*DXXmf=Q+2)Tq6Y_QZScDc@fi%m#v?s zUxp-}L>O|nsVmSG+PXS?(ar!avAyl0PE20Jh{%nR?vAcKTQ^58>DYQ!V0Q>}8f8NE znnPXD$kx@-P(Xsv^covBt=`lfM51;)M!~dacW7H%prf+BzFsT-ii%n&>f$^N7Y)}` zY^}8SetTbK@2l;7jlHk6_jUGu1!i8mI|J0J>{PIqRIry+u$NS@msGHqRIry+uwTLW zEA05#OML7lKK2qHdx?*|#K&IZW6$`zi3K&adIQQ=QCU&Ja@W^mwzs;wE6}|oP@v$T z(wZY3T{|MXL(x5D1*w(I~HE(=D?YB|;UDV-Fushn}}ey z)hd^2Bn?#mK@$|I9H7ee?MO$c zwR>kwJsAdXzJzg=&A3W2#`x11iF5|+G|=v2Rc{JHxyq)j3RW*sE=H%~x*~3r8zZL4 zo04#@vN=~P&a7bo>D;MZkM(6*Jn2*m>uQ^IwaXga&F+BHH0Z&G!JCSBaJB8h)rv3r z&NTtliXK*DG;e$%TWyoAR%GEdh!GAr-37eH(7kC0+iII_weDuugw70vogSQN#OmHO zgle@-wMKjI+VF08nq3m*L^TQ(kgx4*MYq-#u!DizWAR+1Mqt(0uxfO3U)vSAB;fcW z(mg$}t#ezbgRu>FuByfW>Zi1GqHtjtUKiLC>gsaxiApmK-h2qK8XK6(Mm?^(E*j|A z71pCK%Hy$k(-CMjHnds|jkhZoJ5=ykXHBGQXFwI__?;?dPi(A|s&uuAGo0b{Xb36V zlu@|NX!LVb&_| zLzS$LbaSjUT&GSULTFQb-)r!#cYi)+sb-GaCZk+d|Q8UUn*+KC>ZkiS)#FIbm57Y7KOCtLahf z?4X|BTS=+1xkAB!vO$jzcs{tgo5N1|#*0GXwq3_maHXFWEYeOz!`RTaJ4E6!j$u!r_qDQT(;F^fpR69R&?lvjH~-+d@v?V~1*AX%4jNY6H?()q`|}w6YbpW314Q z(Hz*;=B=`N?HJ7g;M@^$o!vBeu2C2mx)TXnEYBY3j8qzD(+r3h~;(&TY z9E>O7u$>k31|Ewy9dV(0+lA^CF|^+1NU%*02H>l7Mvm>=s;X+`?f~*A9xFd-eGhkT zyw`0_$-#yL`^U$~{P|EbmU>2&X*{EvihIg!xYy)Uoh-S(%*0*f%U^wds5yA{XZKs@!21g(>^4! z7)d_&EPsDGtt94nF^-2yF311pPFoUle5+N2w6@{WyxsO$WpQP^2gdK|)0};rbq+#O ztjlrTcrLqSqh9c%o{WE|&rrJM16Ta~A~Bt{Qhabu{1%~L@{5}$HeSMO<1M(?hFi~L zY{cX}4>yO$*tkgGJr8$$$Jkf}-t)Mq;I*-$y*5lSjLc&{`q1Lah}+E7IBmXqYR2aR$sp&X)K#nZ?F=uhG=g0z} z(&z;xxWc_49tRJ|vBn=@%F(uXcz`;G^Aj0G_zTLhXD9~}dFD_a<>+$Cm1`(lE}+c*GiC2GrZCzixf-DC zAEPWSWtrQ9lp{A#vcwA|&ukqoISywzve-#FK7*W3$d%!I6-IK7&-#R1Il{=nC*+oC zjpP`Q3!e z2JfdNFGY?$iAyeC$SsGBbm9$i9947V)ddwsa!%`u7jl(|M{@B(jw7tnNUj1J$;At~ znPbdv|Aju==i&`=@j`CoZ_JtU6-BWqAf>#VGI#@J&vz-?hs1s(&yQYDx&Nz_isHa8 zc|7`iO2x7#iRbsvqHOsX<$)hiDvHC^Bv%@w-1|w&!}n0O{F%}v$6nVZ$0dLqs|q&K zNifJI2)Sw~nlB!626)c@&WlIWEXegufpDo8iuIV0@XZUeS{}|=qRG!~gL^)7L+1pInAEey!H_GTK zOrhm!N$uy9E0gFaX{#u+_fU@XQ@X8{Ba}-nQOMQ7MskS;xkMqyB}o-Va>dZdD$yX9 zXpl?9NS9+V9?2z6NG{PJm-wd0^-CC0ja!He-wQ}a;l1mnHr@%(PN;b$P3%OHu z7|A&;GFiy+ri0{?h1@EwksRZZT(XdRbwt|8xlTWqY>-Qy6y!KdbjfjvGsjvD8_A^@ z}|1<@l{)IYin2TguW`DbwHt7&H1P$G%V5`zU4Fe^VYFr`(^;GkfM!Hux!rzDe2g z9A#QLb2ZpbIsSQ((&zb)^0>Xi?B_V6cFCm*xwWv7T&h7XRmiQ=VI)V5mS%*)UZ9G^=y$fXLogD)~?YHS@ml_eXMu{G@q9+&n~X5U5W|0QLE zj1X#UZ8^xe?e|mm{ED*w50q-WZc66)dvhsAK2BLA^Bq+K?X}F?Kr3b1wUk@#qdfQw zr8~Cr?!zU=S!|AV25cn9nVCD=(-(L=csHf*Ny@!1Q7WGsyG3}*okZFr zJXYSO>Y+W2XO_;RERy?)(Xa8?o?VwPSBlxdr+HlZAZ74H%HG9hYvo<7OKz%=!)=Vv z;e#^Q=lFyPJ%SD+x%WaNxv4^Klh#OXv(`wC@knl}kQ;xV`5nV-Q{8<{HONgBawq#t zm`?y)a_K_u9N6er=?1xUAxAG$VI;R58p)*#x%0F}a_4J}x#Tj0+y$_a4qvu&eJ(@DZP8&Q*8z>>GKAc_wMKFmYK`O=kK{6h+`;Kg zIe48zF2f*~F)7Gl;*Bym&q}5Vx%a?Ea(tlelA9*v*gjPl$z2SMg;OPR9&I}W*N2DxcMPTgJl@Q50H&$}pnGJo#Bp2wx%rX0px+L=?Res1Us zEKmQpDYqP^^!<)mf$;|c{}(K%DnGU4*p4Uo6FwH z9U(GNB9~mIkPE;@I++H!Od%K4VI+42G?L2{a;;h;xouh_ImRP7E-4@{yO@{kmmPAM z2DwZj=a&&eJ!={~kL6SIk)9iQJU&1feS&iIMM_1k^cI$9=uXOphbh&)zUnpmQhBBt zueU7Y@s`^u)w3pFF>}S<7d3M|W$zCuhyF_0P;8RRLWx{*Swb!Z8_99`#Pzu>A;+Fg zg^}Ecp^;pcklUd(lG~{@l4CrQ%Mx<_4@h3potbc!K`u+kMYC9@o)we_wo>+em~!Mx zlxha1?*9AYnOgf2mL&T&%AUI^$A&0_qm)IjP>#s#THXDtIktM*;(LKtEWstIn-08Y#h1d_~bDMF9a((e46Hx;}as}Plu7*C!moW@8ol=U0Nf# zu+~V9@kow$5y;mwyiQc#eQ} z7|DGG8p%x;auKbO+(lX=ImRQo=|XO#ighyhU8l89H^@z&6y#>0L@qhL_MKzBA2!mN zVUU|4V}QvIFh)nl`BzdG*w|F_hp+7SE6#POSDFEmuijVI0};-SE4YM$sP8A3mkIj{b6-l zE7zb-{?BgWOCZH=M#+R3l*DW*!&qrRf5!Xckeems zUVA}Ofkd~W}EnRoO=&3+aNc4Qjp6-iCl7dLhj#SBb_{hT%M5Iufs_0 zVQ3_mC*&4ujpQ&fwZlk`@klOD$az<)^9*u%LhfW=sh)!px#Z>uxsSm{a&ruFbA;T- zbr{J#4vpmI2)S#tMslCf8p$yp$;}aR!!n<9eQu6HZqB6exw$BjOKz@^`y^~6H`gFH zSIB)zhmqV<&`55skh@lEBzHh-B*%CpH&@6Vl<}+Y_f9`I*C01n$OYrs@8`*VxmqD@ ziPGZx6lMR{DT5DD+IOAhEXm%_Q*QYdrSB({BmYS`{2XOkoTM@J*EJ{G z^dYi(JA5UN2kxK@${Pu38Rp&BJe0^KH&4i22OG)FGsw*oa@XrHlKUexlA9;wKCLy9 z`;68|j`2uto{-xvPhW=)I^^aVT4!lG;Chs*Rp&Se4z0(35#l9cN_frceuwj~c%y|nYuq~Xx#^)d232f*fkBPf%0^9NlY!wsODm^y(Rsn3ThZcy3-V7Vt zc7frc1>&K%=rDTdtI$*`yjO#B*cv^wPiyqhTcFWH3&cayKF2!uS2|Dg3Jeb|5Dy(L zqz^WnNqMl#_05s{c&wiJHsJ$hj5$S=%0pM)!SX~O6!}|`f2UOIZcPP@J0xCTv|iGb zXVCpwlFnwzp3hMBJw!PuWge;FNV2ls98U^SB9~mDkozKRBv)vVE0k35n9&X+IlO!& ztO})VJLC%GT?&U>p}fDtcqCUiA-O_>T;Zf3$2RMdD-v>Fvgz# z!#60$en#0hDi(CSj^~Z`^Z2mLVWQ%B?a$Mq=0#g_cz#O>W&16Z2mXU{Oh&nuP0ZVP z4`p9JWzz`dSeEH??Acs$3xwQPVWZD2Fvu+satC!7$zkC{SS>KdlLbP~8BZ37&oLg! zEtqgTSzwS`@TSHS^?cy18BgdR?EhVI3x(Y6Hl2kAxrIXRYdVbN@@;Yph1}N>rqX0T zhYrWq=yQxmau~&+rAfW|uqL2J;)Mpeg+gxh73NI!b_dqcCwkh&OFlx`A}d@gAK-D( zQM#{soc5(0) z9uGcDIgHgQKHqenNpX~P$t@Cc-++xix5yy3NXUIthml;dO>U8pt3epaEfSxr)f&k$ z9?2~da^5#Q7a8Oh3As1=hUZ)To+9TGF1cbMhZ`LsS8R|g7INr7>@bqUGM%t07IIh; zwKe+OceF-wj7M_(q6l{*39LW&*?X};u2{&O?CU+8ak=D5gd85f3Y`*zT#1moQ-_gU ztxc{($n8KFeXc}&Zl~5rj`2v2-(R5xdDa+9400tx?qpwMQNsuhZwUPj}{Xiw$y%h1}pG_B~@8 zDYxvQ-2X*NwJtmGM;@!UvPYLNwE^5bp?^YGgR%0bB1c3@-=@}O{qjsey_KD|LDGDT zav+x}jJ}&P?`BH&!MIi$$5ZuI_E@($wsQ9Fl3OC=?uL!zmKfxg2)TQ77|EgI7FJ7y z+`R}Rxg|pGKCO`)ir4dAWJ#+2TIkO z52<%D-^J9jQ(2O1WCC-I-%zUeCt5Idf|m=krg|QiN^PWtc&y$gP+zV@Kgx7k9(9?G zHIml&Wt4~SG|BNE$0b)POIq*7aVe>2Dwrpr`}3b%=Z46rBrvbBXY;kbEcH9i&DMEI*0)Zedn)SW~#lb zyIFO|u;qPB=fI_uhvnIpdMi;e8~qJaQ(x)$c9GNY?Uen0q1<225+|V?YQ2Ydf-bqG zLJotukmDz%F1e*b?uR;z>6~kmTPoz(@=0!~kfS#%jpP`Q znUH%}YxFtBBe`Wl?%Y!L0|#z%d~TURZrP;pIX z!_Z}ik=zwFxe6io7{cgt6+-TDt&tq#kz9q4OL{MT&i_Y;T!lfdLddCSCF(s)^{gaM zo|ULEaqk@*A4}y~iFyxRjSxx}#XKuH_!LXncZ9OnC!^|gN;O~XyP3!8jrib9=4w#p zIPF*QIQv_aW51>BoyHN?y>G2ViCl7(LhjeFkzA!gu2RThqGX4W+=p#)l|l};)2PW5G5cXHBWt)j4h^N>!w zI+hFIa0mWA;|^ZY{^1j5{yAD|X^!pf{mDvzwdnjUZPdI}J@sVU?*3)rw~zi63B;lH z2FTL|I|g_Ty*4~UD~lp(667q`sT5hHtx*4{n`|3Y00;rU^jRfHfEgR%sB44rGsNv zwE^1{Z0BQ}iftRVbZi%4;}!mlhraptF$bzu*sNO?6?3z(6VoBP*0m*KJ3|35R8 B%L4!a literal 0 HcmV?d00001 diff --git a/scripts/us_hud/income/testdata/__init__.py b/scripts/us_hud/income/testdata/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/scripts/us_hud/income/testdata/expected_output.csv b/scripts/us_hud/income/testdata/expected_output.csv new file mode 100644 index 0000000000..460bf955c8 --- /dev/null +++ b/scripts/us_hud/income/testdata/expected_output.csv @@ -0,0 +1,50 @@ +fips,l80_1,l80_2,l80_3,l80_4,l80_5,l80_6,l80_7,l80_8,l150_1,l150_2,l150_3,l150_4,l150_5,l150_6,l150_7,l150_8,year +dcs:geoId/1001,31300,35750,40250,44700,48300,51850,55450,59000,58688,67031,75469,83812,90562,97219,103969,110625,2006 +dcs:geoId/1003,32550,37200,41850,46500,50200,53950,57650,61400,61031,69750,78469,87188,94125,101156,108094,115125,2006 +dcs:geoId/1005,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1007,32150,36700,41300,45900,49550,53250,56900,60600,60281,68812,77438,86062,92906,99844,106688,113625,2006 +dcs:geoId/1009,32150,36700,41300,45900,49550,53250,56900,60600,60281,68812,77438,86062,92906,99844,106688,113625,2006 +dcs:geoId/1011,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1013,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1015,27700,31700,35650,39600,42750,45950,49100,52250,51938,59438,66844,74250,80156,86156,92062,97969,2006 +dcs:geoId/1017,25400,29000,32650,36250,39150,42050,44950,47850,47625,54375,61219,67969,73406,78844,84281,89719,2006 +dcs:geoId/1019,25400,29050,32650,36300,39200,42100,45000,47900,47625,54469,61219,68062,73500,78938,84375,89812,2006 +dcs:geoId/1021,27450,31350,35300,39200,42350,45450,48600,51750,51469,58781,66188,73500,79406,85219,91125,97031,2006 +dcs:geoId/1023,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1025,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1027,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1029,26900,30700,34550,38400,41450,44550,47600,50700,50438,57562,64781,72000,77719,83531,89250,95062,2006 +dcs:geoId/1031,27350,31300,35200,39100,42250,45350,48500,51600,51281,58688,66000,73312,79219,85031,90938,96750,2006 +dcs:geoId/1033,28000,32000,36000,40000,43200,46400,49600,52800,52500,60000,67500,75000,81000,87000,93000,99000,2006 +dcs:geoId/1035,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1037,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1039,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1041,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1043,27100,30950,34850,38700,41800,44900,48000,51100,50812,58031,65344,72562,78375,84188,90000,95812,2006 +dcs:geoId/1045,26900,30700,34550,38400,41450,44550,47600,50700,50438,57562,64781,72000,77719,83531,89250,95062,2006 +dcs:geoId/1047,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1049,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1051,31300,35750,40250,44700,48300,51850,55450,59000,58688,67031,75469,83812,90562,97219,103969,110625,2006 +dcs:geoId/1053,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1055,26550,30300,34100,37900,40950,43950,47000,50050,49781,56812,63938,71062,76781,82406,88125,93844,2006 +dcs:geoId/1057,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1059,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1061,27500,31450,35350,39300,42450,45600,48750,51900,51562,58969,66281,73688,79594,85500,91406,97312,2006 +dcs:geoId/1063,29700,33900,38150,42400,45800,49200,52600,55950,55688,63562,71531,79500,85875,92250,98625,104906,2006 +dcs:geoId/1065,29700,33900,38150,42400,45800,49200,52600,55950,55688,63562,71531,79500,85875,92250,98625,104906,2006 +dcs:geoId/1067,26650,30500,34300,38100,41150,44200,47250,50300,49969,57188,64312,71438,77156,82875,88594,94312,2006 +dcs:geoId/1069,27500,31450,35350,39300,42450,45600,48750,51900,51562,58969,66281,73688,79594,85500,91406,97312,2006 +dcs:geoId/1071,25950,29700,33400,37100,40050,43050,46000,48950,48656,55688,62625,69562,75094,80719,86250,91781,2006 +dcs:geoId/1073,32150,36700,41300,45900,49550,53250,56900,60600,60281,68812,77438,86062,92906,99844,106688,113625,2006 +dcs:geoId/1075,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1077,28000,32000,36000,40000,43200,46400,49600,52800,52500,60000,67500,75000,81000,87000,93000,99000,2006 +dcs:geoId/1079,30050,34350,38650,42950,46400,49800,53250,56700,56344,64406,72469,80531,87000,93375,99844,106312,2006 +dcs:geoId/1081,32250,36900,41500,46100,49800,53500,57150,60850,60469,69188,77812,86438,93375,100312,107156,114094,2006 +dcs:geoId/1083,36300,41500,46650,51850,56000,60150,64300,68450,68062,77812,87469,97219,105000,112781,120562,128344,2006 +dcs:geoId/1085,31300,35750,40250,44700,48300,51850,55450,59000,58688,67031,75469,83812,90562,97219,103969,110625,2006 +dcs:geoId/1087,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1089,36300,41500,46650,51850,56000,60150,64300,68450,68062,77812,87469,97219,105000,112781,120562,128344,2006 +dcs:geoId/1091,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1093,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1095,26600,30400,34200,38000,41050,44100,47100,50150,49875,57000,64125,71250,76969,82688,88312,94031,2006 +dcs:geoId/1097,27700,31700,35650,39600,42750,45950,49100,52250,51938,59438,66844,74250,80156,86156,92062,97969,2006 diff --git a/scripts/us_hud/income/testdata/expected_output_2006.csv b/scripts/us_hud/income/testdata/expected_output_2006.csv deleted file mode 100644 index 76e466eb5c..0000000000 --- a/scripts/us_hud/income/testdata/expected_output_2006.csv +++ /dev/null @@ -1,2 +0,0 @@ -fips,l80_1,l80_2,l80_3,l80_4,l80_5,l80_6,l80_7,l80_8,l150_1,l150_2,l150_3,l150_4,l150_5,l150_6,l150_7,l150_8,year -dcs:geoId/01001,31300,35750,40250,44700,48300,51850,55450,59000,58688,67031,75469,83812,90562,97219,103969,110625,2006 diff --git a/scripts/us_hud/income/testdata/output/output_test.csv b/scripts/us_hud/income/testdata/output/output_test.csv new file mode 100644 index 0000000000..460bf955c8 --- /dev/null +++ b/scripts/us_hud/income/testdata/output/output_test.csv @@ -0,0 +1,50 @@ +fips,l80_1,l80_2,l80_3,l80_4,l80_5,l80_6,l80_7,l80_8,l150_1,l150_2,l150_3,l150_4,l150_5,l150_6,l150_7,l150_8,year +dcs:geoId/1001,31300,35750,40250,44700,48300,51850,55450,59000,58688,67031,75469,83812,90562,97219,103969,110625,2006 +dcs:geoId/1003,32550,37200,41850,46500,50200,53950,57650,61400,61031,69750,78469,87188,94125,101156,108094,115125,2006 +dcs:geoId/1005,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1007,32150,36700,41300,45900,49550,53250,56900,60600,60281,68812,77438,86062,92906,99844,106688,113625,2006 +dcs:geoId/1009,32150,36700,41300,45900,49550,53250,56900,60600,60281,68812,77438,86062,92906,99844,106688,113625,2006 +dcs:geoId/1011,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1013,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1015,27700,31700,35650,39600,42750,45950,49100,52250,51938,59438,66844,74250,80156,86156,92062,97969,2006 +dcs:geoId/1017,25400,29000,32650,36250,39150,42050,44950,47850,47625,54375,61219,67969,73406,78844,84281,89719,2006 +dcs:geoId/1019,25400,29050,32650,36300,39200,42100,45000,47900,47625,54469,61219,68062,73500,78938,84375,89812,2006 +dcs:geoId/1021,27450,31350,35300,39200,42350,45450,48600,51750,51469,58781,66188,73500,79406,85219,91125,97031,2006 +dcs:geoId/1023,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1025,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1027,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1029,26900,30700,34550,38400,41450,44550,47600,50700,50438,57562,64781,72000,77719,83531,89250,95062,2006 +dcs:geoId/1031,27350,31300,35200,39100,42250,45350,48500,51600,51281,58688,66000,73312,79219,85031,90938,96750,2006 +dcs:geoId/1033,28000,32000,36000,40000,43200,46400,49600,52800,52500,60000,67500,75000,81000,87000,93000,99000,2006 +dcs:geoId/1035,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1037,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1039,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1041,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1043,27100,30950,34850,38700,41800,44900,48000,51100,50812,58031,65344,72562,78375,84188,90000,95812,2006 +dcs:geoId/1045,26900,30700,34550,38400,41450,44550,47600,50700,50438,57562,64781,72000,77719,83531,89250,95062,2006 +dcs:geoId/1047,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1049,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1051,31300,35750,40250,44700,48300,51850,55450,59000,58688,67031,75469,83812,90562,97219,103969,110625,2006 +dcs:geoId/1053,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1055,26550,30300,34100,37900,40950,43950,47000,50050,49781,56812,63938,71062,76781,82406,88125,93844,2006 +dcs:geoId/1057,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1059,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1061,27500,31450,35350,39300,42450,45600,48750,51900,51562,58969,66281,73688,79594,85500,91406,97312,2006 +dcs:geoId/1063,29700,33900,38150,42400,45800,49200,52600,55950,55688,63562,71531,79500,85875,92250,98625,104906,2006 +dcs:geoId/1065,29700,33900,38150,42400,45800,49200,52600,55950,55688,63562,71531,79500,85875,92250,98625,104906,2006 +dcs:geoId/1067,26650,30500,34300,38100,41150,44200,47250,50300,49969,57188,64312,71438,77156,82875,88594,94312,2006 +dcs:geoId/1069,27500,31450,35350,39300,42450,45600,48750,51900,51562,58969,66281,73688,79594,85500,91406,97312,2006 +dcs:geoId/1071,25950,29700,33400,37100,40050,43050,46000,48950,48656,55688,62625,69562,75094,80719,86250,91781,2006 +dcs:geoId/1073,32150,36700,41300,45900,49550,53250,56900,60600,60281,68812,77438,86062,92906,99844,106688,113625,2006 +dcs:geoId/1075,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1077,28000,32000,36000,40000,43200,46400,49600,52800,52500,60000,67500,75000,81000,87000,93000,99000,2006 +dcs:geoId/1079,30050,34350,38650,42950,46400,49800,53250,56700,56344,64406,72469,80531,87000,93375,99844,106312,2006 +dcs:geoId/1081,32250,36900,41500,46100,49800,53500,57150,60850,60469,69188,77812,86438,93375,100312,107156,114094,2006 +dcs:geoId/1083,36300,41500,46650,51850,56000,60150,64300,68450,68062,77812,87469,97219,105000,112781,120562,128344,2006 +dcs:geoId/1085,31300,35750,40250,44700,48300,51850,55450,59000,58688,67031,75469,83812,90562,97219,103969,110625,2006 +dcs:geoId/1087,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1089,36300,41500,46650,51850,56000,60150,64300,68450,68062,77812,87469,97219,105000,112781,120562,128344,2006 +dcs:geoId/1091,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1093,25300,28900,32550,36150,39050,41950,44850,47700,47438,54188,61031,67781,73219,78656,84094,89438,2006 +dcs:geoId/1095,26600,30400,34200,38000,41050,44100,47100,50150,49875,57000,64125,71250,76969,82688,88312,94031,2006 +dcs:geoId/1097,27700,31700,35650,39600,42750,45950,49100,52250,51938,59438,66844,74250,80156,86156,92062,97969,2006 diff --git a/scripts/us_hud/income/testdata/output_2006.csv b/scripts/us_hud/income/testdata/output_2006.csv deleted file mode 100644 index 76e466eb5c..0000000000 --- a/scripts/us_hud/income/testdata/output_2006.csv +++ /dev/null @@ -1,2 +0,0 @@ -fips,l80_1,l80_2,l80_3,l80_4,l80_5,l80_6,l80_7,l80_8,l150_1,l150_2,l150_3,l150_4,l150_5,l150_6,l150_7,l150_8,year -dcs:geoId/01001,31300,35750,40250,44700,48300,51850,55450,59000,58688,67031,75469,83812,90562,97219,103969,110625,2006 diff --git a/scripts/us_hud/income/testdata/test_input_2006.csv b/scripts/us_hud/income/testdata/test_input_2006.csv deleted file mode 100644 index 5eb7f16ff1..0000000000 --- a/scripts/us_hud/income/testdata/test_input_2006.csv +++ /dev/null @@ -1,2 +0,0 @@ -State_Alpha,fips,State,County_Town_Name,County,Metro_Area_Name,CBSASub,County_Name,median1999,median2006,State_Name,l50_1,l50_2,l50_3,l50_4,l50_5,l50_6,l50_7,l50_8,msa,l30_1,l30_2,l30_3,l30_4,l30_5,l30_6,l30_7,l30_8,l80_1,l80_2,l80_3,l80_4,l80_5,l80_6,l80_7,l80_8,metro -AL,100199999,1,Autauga County,1,"Montgomery, AL MSA",METRO33860M33860,Autauga County,45182,55900,Alabama,19550,22350,25150,27950,30200,32400,34650,36900,5240,11750,13400,15100,16750,18100,19450,20750,22100,31300,35750,40250,44700,48300,51850,55450,59000,1