From ac1fa3ab0ecd6c47ef37d1c95dfec7bb64b889a5 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Mon, 11 Dec 2023 14:15:18 -0500 Subject: [PATCH] 8 --- fastparquet/test/test_pd_optional_types.py | 65 +++++++++++----------- 1 file changed, 33 insertions(+), 32 deletions(-) diff --git a/fastparquet/test/test_pd_optional_types.py b/fastparquet/test/test_pd_optional_types.py index bb162354..3600c80a 100644 --- a/fastparquet/test/test_pd_optional_types.py +++ b/fastparquet/test/test_pd_optional_types.py @@ -10,46 +10,46 @@ import numpy.random as random -EXPECTED_SERIES_INT8 = pd.Series(random.uniform(low=-128, high=127,size=100)).round() -EXPECTED_SERIES_INT16 = pd.Series(random.uniform(low=-32768, high=32767,size=100)).round() -EXPECTED_SERIES_INT32 = pd.Series(random.uniform(low=-2147483648, high=2147483647,size=100)).round() -EXPECTED_SERIES_INT64 = pd.Series(random.uniform(low=-9223372036854775808, high=9223372036854775807,size=100)).round() -EXPECTED_SERIES_UINT8 = pd.Series(random.uniform(low=0, high=255,size=100)).round() -EXPECTED_SERIES_UINT16 = pd.Series(random.uniform(low=0, high=65535,size=100)).round() -EXPECTED_SERIES_UINT32 = pd.Series(random.uniform(low=0, high=4294967295,size=100)).round() -EXPECTED_SERIES_UINT64 = pd.Series(random.uniform(low=0, high=18446744073709551615,size=100)).round() -EXPECTED_SERIES_BOOL = pd.Series(random.choice([False, True], 100)) -EXPECTED_SERIES_STRING = pd.Series(random.choice([ +EXPECTED_SERIES_INT8 = random.uniform(low=-128, high=127, size=100).round() +EXPECTED_SERIES_INT16 = random.uniform(low=-32768, high=32767, size=100).round() +EXPECTED_SERIES_INT32 = random.uniform(low=-2147483648, high=2147483647, size=100).round() +EXPECTED_SERIES_INT64 = random.uniform(low=-9223372036854775808, high=9223372036854775807, size=100).round() +EXPECTED_SERIES_UINT8 = random.uniform(low=0, high=255, size=100).round() +EXPECTED_SERIES_UINT16 = random.uniform(low=0, high=65535, size=100).round() +EXPECTED_SERIES_UINT32 = random.uniform(low=0, high=4294967295, size=100).round() +EXPECTED_SERIES_UINT64 = random.uniform(low=0, high=18446744073709551615, size=100).round() +EXPECTED_SERIES_BOOL = random.choice([False, True], 100) +EXPECTED_SERIES_STRING = random.choice([ 'You', 'are', 'my', 'fire', 'The', 'one', 'desire', 'Believe', 'when', 'I', 'say', 'I', 'want', 'it', 'that', 'way' - ], 100)) + ], 100) -EXPECTED_SERIES_INT8.loc[20:30] = np.nan -EXPECTED_SERIES_INT16.loc[20:30] = np.nan -EXPECTED_SERIES_INT32.loc[20:30] = np.nan -EXPECTED_SERIES_INT64.loc[20:30] = np.nan -EXPECTED_SERIES_UINT8.loc[20:30] = np.nan -EXPECTED_SERIES_UINT16.loc[20:30] = np.nan -EXPECTED_SERIES_UINT32.loc[20:30] = np.nan -EXPECTED_SERIES_UINT64.loc[20:30] = np.nan -EXPECTED_SERIES_BOOL.loc[20:30] = np.nan -EXPECTED_SERIES_STRING.loc[20:30] = np.nan +EXPECTED_SERIES_INT8[20:30] = np.nan +EXPECTED_SERIES_INT16[20:30] = np.nan +EXPECTED_SERIES_INT32[20:30] = np.nan +EXPECTED_SERIES_INT64[20:30] = np.nan +EXPECTED_SERIES_UINT8[20:30] = np.nan +EXPECTED_SERIES_UINT16[20:30] = np.nan +EXPECTED_SERIES_UINT32[20:30] = np.nan +EXPECTED_SERIES_UINT64[20:30] = np.nan +EXPECTED_SERIES_BOOL[20:30] = np.nan +EXPECTED_SERIES_STRING[20:30] = np.nan TEST = pd.DataFrame({ - 'int8': EXPECTED_SERIES_INT8.astype('Int8'), - 'int16': EXPECTED_SERIES_INT16.astype('Int16'), - 'int32': EXPECTED_SERIES_INT32.astype('Int32'), - 'int64': EXPECTED_SERIES_INT64.astype('Int64'), - 'uint8': EXPECTED_SERIES_UINT8.astype('UInt8'), - 'uint16': EXPECTED_SERIES_UINT16.astype('UInt16'), - 'uint32': EXPECTED_SERIES_UINT32.astype('UInt32'), - 'uint64': EXPECTED_SERIES_UINT64.astype('UInt64'), - 'bool': EXPECTED_SERIES_BOOL.astype('boolean'), - 'string': EXPECTED_SERIES_STRING.astype('string') + 'int8': pd.Series(EXPECTED_SERIES_INT8, dtype='Int8'), + 'int16': pd.Series(EXPECTED_SERIES_INT16, dtype='Int16'), + 'int32': pd.Series(EXPECTED_SERIES_INT32, dtype='Int32'), + 'int64': pd.Series(EXPECTED_SERIES_INT64, dtype='Int64'), + 'uint8': pd.Series(EXPECTED_SERIES_UINT8, dtype='UInt8'), + 'uint16': pd.Series(EXPECTED_SERIES_UINT16, dtype='UInt16'), + 'uint32': pd.Series(EXPECTED_SERIES_UINT32, dtype='UInt32'), + 'uint64': pd.Series(EXPECTED_SERIES_UINT64, dtype='UInt64'), + 'bool': pd.Series(EXPECTED_SERIES_BOOL, dtype='boolean'), + 'string': pd.Series(EXPECTED_SERIES_STRING, dtype='string') }) @@ -80,7 +80,8 @@ 'string': 'BYTE_ARRAY' } -@pytest.mark.parametrize('comp', (None,'snappy', 'gzip')) + +@pytest.mark.parametrize('comp', (None, 'snappy', 'gzip')) @pytest.mark.parametrize('scheme', ('simple', 'hive')) def test_write_nullable_columns(tempdir, scheme, comp): fname = os.path.join(tempdir, 'test_write_nullable_columns.parquet')