Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DAE-116] Add method bulk drop #49

Merged
merged 5 commits into from
Feb 23, 2021
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion docs/source/getstarted.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ Click on the following links to open the [examples](https://github.com/quintoand

**[#7 Get partition keys names from a table](https://github.com/quintoandar/hive-metastore-client/blob/main/examples/get_partition_keys_names.py)**

**[#8 Bulk drop partitions values from a table](https://github.com/quintoandar/hive-metastore-client/blob/main/examples/bulk_drop_partitions.py)**

## Available methods

You can see all the Hive Metastore server available methods by looking at the
Expand All @@ -47,4 +49,5 @@ the [`HiveMetastoreClient`](https://github.com/quintoandar/hive-metastore-client
- [`create_database_if_not_exists`](https://hive-metastore-client.readthedocs.io/en/latest/hive_metastore_client.html#hive_metastore_client.hive_metastore_client.HiveMetastoreClient.create_database_if_not_exists)
- [`create_external_table`](https://hive-metastore-client.readthedocs.io/en/latest/hive_metastore_client.html#hive_metastore_client.hive_metastore_client.HiveMetastoreClient.create_external_table)
- [`get_partition_keys_objects`](https://hive-metastore-client.readthedocs.io/en/latest/hive_metastore_client.html#hive_metastore_client.hive_metastore_client.HiveMetastoreClient.get_partition_keys_objects)
- [`get_partition_keys_names`](https://hive-metastore-client.readthedocs.io/en/latest/hive_metastore_client.html#hive_metastore_client.hive_metastore_client.HiveMetastoreClient.get_partition_keys_names)
- [`get_partition_keys_names`](https://hive-metastore-client.readthedocs.io/en/latest/hive_metastore_client.html#hive_metastore_client.hive_metastore_client.HiveMetastoreClient.get_partition_keys_names)
- [`bulk_drop_partitions`](https://hive-metastore-client.readthedocs.io/en/latest/hive_metastore_client.html#hive_metastore_client.hive_metastore_client.HiveMetastoreClient.bulk_drop_partitions)
20 changes: 20 additions & 0 deletions examples/bulk_drop_partitions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from hive_metastore_client import HiveMetastoreClient

HIVE_HOST = "<ADD_HIVE_HOST_HERE>"
HIVE_PORT = 9083

DATABASE_NAME = "database_name"
TABLE_NAME = "table_name"

partition_list = [
["2020", "1", "28"],
["2020", "1", "29"],
["2020", "1", "30"],
["2020", "1", "31"],
]

with HiveMetastoreClient(HIVE_HOST, HIVE_PORT) as hive_client:
# Dropping various partitions at once
hive_client.bulk_drop_partitions(
DATABASE_NAME, TABLE_NAME, partition_list, delete_data=False
)
37 changes: 37 additions & 0 deletions hive_metastore_client/hive_metastore_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
Database,
AlreadyExistsException,
Table,
NoSuchObjectException,
)


Expand Down Expand Up @@ -259,3 +260,39 @@ def get_partition_keys_names(self, db_name: str, table_name: str) -> List[str]:
db_name=db_name, table_name=table_name
)
return [partition.name for partition in partition_keys]

def bulk_drop_partitions(
self,
db_name: str,
table_name: str,
partition_list: List[List[str]],
delete_data: bool = False,
) -> None:
"""
Drops the partitions values from the partition list.

This methods simulates a bulk drop for the user, since the server only
supports an unitary drop.
If some partition cannot be dropped an exception will be thrown in the
end of execution.

:param db_name: database name of the table
:param table_name: table name
:param partition_list: the partitions to be dropped
:param delete_data: indicates whether the data respective to the
partition should be dropped in the source.
:raises: NoSuchObjectException
"""
partitions_not_dropped = []
for partition_values in partition_list:
try:
self.drop_partition(db_name, table_name, partition_values, delete_data)
except NoSuchObjectException:
partitions_not_dropped.append(partition_values)

if partitions_not_dropped:
raise NoSuchObjectException(
"m=bulk_drop_partitions, partitions_not_dropped="
f"{partitions_not_dropped}, msg=Some partition values were not "
"dropped because they do not exist."
)
42 changes: 42 additions & 0 deletions tests/unit/hive_metastore_client/test_hive_metastore_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,3 +434,45 @@ def test_get_partition_keys_names_with_partitioned_table(
mocked_get_partition_keys_objects.assert_called_once_with(
db_name=database_name, table_name=table_name
)

@mock.patch.object(HiveMetastoreClient, "drop_partition", return_value=None)
def test_bulk_drop_partitions(self, mock_drop_partition, hive_metastore_client):
# arrange
db_name = "db_name"
table_name = "table_name"
partition_list = [["1995", "9", "22"], ["2013", "2", "14"], ["2021", "1", "1"]]

# act
hive_metastore_client.bulk_drop_partitions(
db_name, table_name, partition_list, mock.ANY
)

# assert
assert mock_drop_partition.call_count == len(partition_list)

@mock.patch.object(HiveMetastoreClient, "drop_partition", return_value=None)
def test_bulk_drop_partitions_with_errors(
self, mock_drop_partition, hive_metastore_client
):
# arrange
db_name = "db_name"
table_name = "table_name"
partition_list = [["1995", "9", "22"], ["2021", "1", "1"], ["2021", "1", "2"]]
mock_drop_partition.side_effect = [
None,
NoSuchObjectException(),
NoSuchObjectException(),
]

# assert
with raises(NoSuchObjectException) as e:
felipemiquelim marked this conversation as resolved.
Show resolved Hide resolved
# act
hive_metastore_client.bulk_drop_partitions(
db_name, table_name, partition_list, mock.ANY
)

assert mock_drop_partition.call_count == len(partition_list)
assert e.value == NoSuchObjectException(
"m=bulk_drop_partitions, partitions_not_dropped=[['2021', '1', '1'], ['2021', '1', '2']],"
" msg=Some partition values were not dropped because they do not exist."
)