-
Notifications
You must be signed in to change notification settings - Fork 54
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix: handle empty string case in extraction util (#54)
- Loading branch information
Showing
5 changed files
with
194 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
# Copyright 2023 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# https://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
from utils import truncate_complete_text | ||
from unittest.mock import MagicMock, patch | ||
|
||
from google.cloud import logging | ||
|
||
|
||
def test_truncate_complete_test(): | ||
complete_text = """ | ||
This is a test paper | ||
Abstract | ||
This is an abstract. An abstract provides an overview of the | ||
academic paper. A good abstract is usually about 150 words long. They can | ||
sometimes be longer. They can sometimes be shorter. An abstract should help the | ||
reader get the gist of the academic paper without having to read the entire | ||
paper. | ||
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor | ||
incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis | ||
nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. | ||
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu | ||
fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in | ||
culpa qui officia deserunt mollit anim id est laborum. | ||
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor | ||
incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis | ||
nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. | ||
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu | ||
fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in | ||
culpa qui officia deserunt mollit anim id est laborum. | ||
Conclusion | ||
This is a conclusion. It describes the results of the academic paper that | ||
precedes it. | ||
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor | ||
incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis | ||
nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. | ||
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu | ||
fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in | ||
culpa qui officia deserunt mollit anim id est laborum. | ||
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor | ||
incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis | ||
nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. | ||
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu | ||
fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in | ||
culpa qui officia deserunt mollit anim id est laborum. | ||
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor | ||
incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis | ||
nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. | ||
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu | ||
fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in | ||
culpa qui officia deserunt mollit anim id est laborum. | ||
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor | ||
incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis | ||
nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. | ||
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu | ||
fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in | ||
culpa qui officia deserunt mollit anim id est laborum. | ||
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor | ||
incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis | ||
nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. | ||
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu | ||
fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in | ||
culpa qui officia deserunt mollit anim id est laborum. | ||
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor | ||
incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis | ||
nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. | ||
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu | ||
fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in | ||
culpa qui officia deserunt mollit anim id est laborum. | ||
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor | ||
incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis | ||
nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. | ||
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu | ||
fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in | ||
culpa qui officia deserunt mollit anim id est laborum. | ||
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor | ||
incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis | ||
nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. | ||
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu | ||
fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in | ||
culpa qui officia deserunt mollit anim id est laborum. | ||
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor | ||
incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis | ||
nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. | ||
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu | ||
fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in | ||
culpa qui officia deserunt mollit anim id est laborum. | ||
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor | ||
incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis | ||
nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. | ||
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu | ||
fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in | ||
culpa qui officia deserunt mollit anim id est laborum. | ||
""" | ||
|
||
extracted_text = truncate_complete_text(complete_text=complete_text, logger_name="fake_logger") | ||
|
||
assert "this is an abstract" in extracted_text | ||
assert "this is a conclusion" in extracted_text | ||
|
||
|
||
@patch.object(logging, "Client") | ||
def test_truncate_complete_text_no_abstract_or_conclusion(mock_logging): | ||
|
||
# Set up | ||
mock_logging_client = MagicMock(spec=logging.Client) | ||
mock_logging.return_value = mock_logging_client | ||
|
||
mock_logger = MagicMock(spec=logging.Logger) | ||
mock_logging_client.logger = mock_logger | ||
|
||
# Act | ||
extracted_text = truncate_complete_text("This is a bad input", "fake_logger") | ||
|
||
# Assert | ||
mock_logger.assert_called() | ||
assert "this is a bad input" in extracted_text |