From 76cfcd886019cfb1ad817356c51ec722fb2c797a Mon Sep 17 00:00:00 2001 From: rly0nheart <74001397+rly0nheart@users.noreply.github.com> Date: Sun, 20 Oct 2024 17:21:53 +0200 Subject: [PATCH 1/2] 3.0.0 --- pyproject.toml | 4 +- src/karmakaze/__init__.py | 4 +- src/karmakaze/_main.py | 555 +++---------------------- src/karmakaze/{_tools.py => _utils.py} | 0 tests/test_parse.py | 93 ----- tests/test_sanitise.py | 88 ---- tests/test_sanitise_and_parse.py | 82 ++++ 7 files changed, 144 insertions(+), 682 deletions(-) rename src/karmakaze/{_tools.py => _utils.py} (100%) delete mode 100644 tests/test_parse.py delete mode 100644 tests/test_sanitise.py create mode 100644 tests/test_sanitise_and_parse.py diff --git a/pyproject.toml b/pyproject.toml index 03ae4e0..ad93033 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] name = "karmakaze" -version = "2.0.1" -description = "Data sanitation engine for Knew Karma" +version = "3.0.0" +description = "Response sanitation & parsing engine for KRAW" authors = ["Richard Mwewa ", "Knew Karma IO SimpleNamespace: - """ - Parses a Reddit comment into a SimpleNamespace object. - - :param data: A dictionary containing raw data for a single Reddit comment. - :type data: Dict - :return: A SimpleNamespace object with parsed comment data. - :rtype: SimpleNamespace - """ + def _to_namespace_object( + self, obj: Union[List[Dict], Dict] + ) -> Union[List[SimpleNamespace], SimpleNamespace, Union[List[Dict], Dict]]: - return ( - SimpleNamespace( + if isinstance(obj, Dict): + # Recursively convert any dictionaries within the current dict + return SimpleNamespace( **{ - "body": data.get("body"), - "id": data.get("id"), - "author": data.get("author"), - "author_is_premium": data.get("author_premium"), - "upvotes": data.get("ups"), - "downvotes": data.get("downs"), - "subreddit": data.get("subreddit_name_prefixed"), - "subreddit_type": data.get("subreddit_type"), - "post_id": data.get("link_id"), - "post_title": data.get("link_title"), - "is_nsfw": data.get("over_18"), - "is_edited": data.get("edited"), - "score": data.get("score"), - "hidden_score": data.get("score_hidden"), - "gilded": data.get("gilded"), - "is_stickied": data.get("stickied"), - "is_locked": data.get("locked"), - "is_archived": data.get("archived"), - "subreddit_id": data.get("subreddit_id"), - "author_is_blocked": data.get("author_is_blocked"), - "link_author": data.get("link_author"), - "replies": data.get("replies"), - "saved": data.get("saved"), - "can_mod_post": data.get("can_mod_post"), - "send_replies": data.get("send_replies"), - "parent_id": data.get("parent_id"), - "author_fullname": data.get("author_fullname"), - "controversiality": data.get("controversiality"), - "body_html": data.get("body_html"), - "link_permalink": data.get("link_permalink"), - "name": data.get("name"), - "treatment_tags": data.get("treatment_tags"), - "awarders": data.get("awarders"), - "all_awardings": data.get("all_awardings"), - "quarantine": data.get("quarantine"), - "link_url": data.get("link_url"), - "created": timestamp_to_readable( - timestamp=data.get("created"), - time_format=self._time_format, - ), + key: ( + timestamp_to_readable(value, self._time_format) + if key in {"created", "created_utc", "edited", "revision_date"} + else self._to_namespace_object(obj=value) + ) + for key, value in obj.items() } ) - if isinstance(data, Dict) - else SimpleNamespace - ) + elif isinstance(obj, List): + # If it's a list, check if any of the items are dicts and convert them too + return [self._to_namespace_object(obj=item) for item in obj] + else: + return obj - def comments(self, data: List[Dict]) -> List[SimpleNamespace]: - """ - Parses a list of Reddit comments into a list of SimpleNamespace objects. + def comment(self, response: Dict) -> SimpleNamespace: - :param data: A list of dictionaries representing raw comment data. - :type data: List[Dict] - :return: A list of SimpleNamespace objects with parsed comment data. - :rtype: List[SimpleNamespace] - """ + if isinstance(response, Dict): + return self._to_namespace_object(obj=response) - if isinstance(data, List) and all( - isinstance(comment, Dict) for comment in data - ): - return [self.comment(data=raw_comment) for raw_comment in data] + def comments( + self, response: Union[List[Dict], Dict] + ) -> Union[List[SimpleNamespace], SimpleNamespace]: - def post(self, data: Dict) -> SimpleNamespace: - """ - Parses a subreddit into a SimpleNamespace object. + if isinstance(response, List) and all( + isinstance(comment, Dict) for comment in response + ): + return [self.comment(response=raw_comment) for raw_comment in response] + elif isinstance(response, Dict): + return self._to_namespace_object(obj=response.get("data", {})) - :param data: A dictionary containing raw data for a single subreddit. - :type data: Dict - :return: A SimpleNamespace object with parsed subreddit data. - :rtype: SimpleNamespace - """ + def post(self, response: List[Dict]) -> SimpleNamespace: - return ( - SimpleNamespace( - **{ - "author": data.get("author"), - "title": data.get("title"), - "body": data.get("selftext"), - "id": data.get("id"), - "subreddit": data.get("subreddit"), - "subreddit_id": data.get("subreddit_id"), - "subreddit_type": data.get("subreddit_type"), - "subreddit_subscribers": data.get("subreddit_subscribers"), - "upvotes": data.get("ups"), - "upvote_ratio": data.get("upvote_ratio"), - "downvotes": data.get("downs"), - "thumbnail": data.get("thumbnail"), - "gilded": data.get("gilded"), - "is_video": data.get("is_video"), - "is_nsfw": data.get("over_18"), - "is_shareable": data.get("is_reddit_media_domain"), - "is_robot_indexable": data.get("is_robot_indexable"), - "permalink": data.get("permalink"), - "is_locked": data.get("locked"), - "is_archived": data.get("archived"), - "domain": data.get("domain"), - "score": data.get("score"), - "comments": data.get("num_comments"), - "saved": data.get("saved"), - "clicked": data.get("clicked"), - "hidden": data.get("hidden"), - "pwls": data.get("pwls"), - "hide_score": data.get("hide_score"), - "num_crossposts": data.get("num_crossposts"), - "parent_whitelist_status": data.get("parent_whitelist_status"), - "name": data.get("name"), - "quarantine": data.get("quarantine"), - "link_flair_text_color": data.get("link_flair_text_color"), - "is_original_content": data.get("is_original_content"), - "can_mod_post": data.get("can_mod_post"), - "is_created_from_ads_ui": data.get("is_created_from_ads_ui"), - "author_premium": data.get("author_premium"), - "is_self": data.get("is_self"), - "link_flair_type": data.get("link_flair_type"), - "wls": data.get("wls"), - "author_flair_type": data.get("author_flair_type"), - "allow_live_comments": data.get("allow_live_comments"), - "no_follow": data.get("no_follow"), - "is_crosspostable": data.get("is_crosspostable"), - "pinned": data.get("pinned"), - "author_is_blocked": data.get("author_is_blocked"), - "link_flair_background_color": data.get( - "link_flair_background_color" - ), - "author_fullname": data.get("author_fullname"), - "whitelist_status": data.get("whitelist_status"), - "edited": timestamp_to_readable( - timestamp=data.get("edited"), time_format=self._time_format - ), - "url": data.get("url"), - "created": timestamp_to_readable( - timestamp=data.get("created"), time_format=self._time_format - ), - } - ) - if isinstance(data, Dict) - else SimpleNamespace - ) + if isinstance(response, List) and len(response) == 2: + children = response[0].get("data", {}).get("children") + return self._to_namespace_object(obj=children[0]) def posts( self, - data: List[Dict], - ) -> List[SimpleNamespace]: - """ - Parses a list of Reddit posts into a list of SimpleNamespace objects. - - :param data: A list of dictionaries representing raw post data. - :type data: List[Dict] - :return: A list of SimpleNamespace objects with parsed post data. - :rtype: List[SimpleNamespace] - """ - - if isinstance(data, List) and all(isinstance(post, Dict) for post in data): - return [self.post(data=raw_post) for raw_post in data] - - def subreddit(self, data: Dict) -> SimpleNamespace: - """ - Parses a single raw subreddit into a SimpleNamespace object. - - :param data: A dictionary containing raw data for a single subreddit. - :type data: Dict - :return: A SimpleNamespace object containing parsed subreddit data. - :rtype: SimpleNamespace - """ - - return ( - SimpleNamespace( - **{ - "title": data.get("title"), - "display_name": data.get("display_name"), - "id": data.get("id"), - "description": data.get("public_description"), - "submit_text": data.get("submit_text"), - "submit_text_html": data.get("submit_text_html"), - "icon": ( - data.get("icon_img").split("?")[0] - if data.get("icon_img") - else "" - ), - "type": data.get("subreddit_type"), - "subscribers": data.get("subscribers"), - "current_active_users": data.get("accounts_active"), - "is_nsfw": data.get("over18"), - "language": data.get("lang"), - "whitelist_status": data.get("whitelist_status"), - "url": data.get("url"), - "user_flair_position": data.get("user_flair_position"), - "spoilers_enabled": data.get("spoilers_enabled"), - "allow_galleries": data.get("allow_galleries"), - "show_media_preview": data.get("show_media_preview"), - "allow_videogifs": data.get("allow_videogifs"), - "allow_videos": data.get("allow_videos"), - "allow_images": data.get("allow_images"), - "allow_polls": data.get("allow_polls"), - "public_traffic": data.get("public_traffic"), - "description_html": data.get("description_html"), - "emojis_enabled": data.get("emojis_enabled"), - "primary_color": data.get("primary_color"), - "key_color": data.get("key_color"), - "banner_background_color": data.get("banner_background_color"), - "icon_size": data.get("icon_size"), - "header_size": data.get("header_size"), - "banner_size": data.get("banner_size"), - "link_flair_enabled": data.get("link_flair_enabled"), - "restrict_posting": data.get("restrict_posting"), - "restrict_commenting": data.get("restrict_commenting"), - "submission_type": data.get("submission_type"), - "free_form_reports": data.get("free_form_reports"), - "wiki_enabled": data.get("wiki_enabled"), - "community_icon": ( - data.get("community_icon").split("?")[0] - if data.get("community_icon") - else "" - ), - "banner_background_image": data.get("banner_background_image"), - "mobile_banner_image": data.get("mobile_banner_image"), - "allow_discovery": data.get("allow_discovery"), - "is_crosspostable_subreddit": data.get( - "is_crosspostable_subreddit" - ), - "notification_level": data.get("notification_level"), - "suggested_comment_sort": data.get("suggested_comment_sort"), - "disable_contributor_requests": data.get( - "disable_contributor_requests" - ), - "community_reviewed": data.get("community_reviewed"), - "original_content_tag_enabled": data.get( - "original_content_tag_enabled" - ), - "has_menu_widget": data.get("has_menu_widget"), - "videostream_links_count": data.get("videostream_links_count"), - "created": timestamp_to_readable( - timestamp=data.get("created"), - time_format=self._time_format, - ), - } - ) - if isinstance(data, Dict) - else SimpleNamespace - ) - - def subreddits(self, data: List[Dict]) -> List[SimpleNamespace]: - """ - Parses a list of subreddits into a list of SimpleNamespace objects. - - :param data: A list of dictionaries representing raw subreddit data. - :type data: List[Dict] - :return: A list of SimpleNamespace objects with parsed subreddit data. - :rtype: List[SimpleNamespace] - """ - - if isinstance(data, List) and all( - isinstance(subreddit, Dict) for subreddit in data - ): - return [self.subreddit(data=raw_subreddit) for raw_subreddit in data] - - def user(self, data: Dict) -> SimpleNamespace: - """ - Parses a Reddit user into a SimpleNamespace object. - - :param data: A dictionary containing raw data for a single Reddit user. - :type data: Dict - :return: A SimpleNamespace object with parsed user data. - :rtype: SimpleNamespace - """ - - return ( - SimpleNamespace( - **{ - "name": data.get("name"), - "id": data.get("id"), - "avatar_url": data.get("icon_img"), - "is_verified": data.get("verified"), - "has_verified_email": data.get("has_verified_email"), - "is_gold": data.get("is_gold"), - "is_mod": data.get("is_mod"), - "is_blocked": data.get("is_blocked"), - "is_employee": data.get("is_employee"), - "hidden_from_bots": data.get("hide_from_robots"), - "accepts_followers": data.get("accept_followers"), - "comment_karma": data.get("comment_karma"), - "link_karma": data.get("link_karma"), - "awardee_karma": data.get("awardee_karma"), - "total_karma": data.get("total_karma"), - "subreddit": data.get("subreddit"), - "is_friend": data.get("is_friend"), - "snoovatar_img": data.get("snoovatar_img"), - "awarder_karma": data.get("awarder_karma"), - "pref_show_snoovatar": data.get("pref_show_snoovatar"), - "has_subscribed": data.get("has_subscribed"), - "created": timestamp_to_readable( - timestamp=data.get("created"), time_format=self._time_format - ), - } - ) - if isinstance(data, Dict) - else SimpleNamespace - ) - - def users(self, data: List[Dict]) -> List[SimpleNamespace]: - """ - Parses a list of Reddit users into a list of SimpleNamespace objects. - - :param data: A list of dictionaries representing raw user data. - :type data: List[Dict] - :return: A list of SimpleNamespace objects with parsed user data. - :rtype: List[SimpleNamespace] - """ - - if isinstance(data, List) and all(isinstance(user, Dict) for user in data): - return [self.user(data=raw_user) for raw_user in data] - - def wiki_page(self, data: Dict) -> SimpleNamespace: - """ - Parses a Reddit wiki page into a SimpleNamespace object. - - :param data: A dictionary containing raw data for a Reddit wiki page. - :type data: Dict - :return: A SimpleNamespace object with parsed wiki page data. - :rtype: SimpleNamespace - """ + response: Dict, + ) -> Union[List[SimpleNamespace], SimpleNamespace]: + data: Dict = response.get("data", {}) if isinstance(data, Dict): - return SimpleNamespace( - **{ - "revision_id": data.get("revision_id"), - "revision_date": timestamp_to_readable( - timestamp=data.get("revision_date"), - time_format=self._time_format, - ), - "content_markdown": data.get("content_md"), - "revised_by": self.user(data=data.get("revised_by")), - "kind": data.get("kind"), - "may_revise": data.get("may_revise"), - "reason": data.get("reason"), - "content_html": data.get("content_html"), - } - ) - - -class Sanitise: - """ - Provides static methods to sanitize various types of data - from Reddit API responses. - """ - - @staticmethod - def comments(response: List[Dict]) -> Union[List[Dict], None]: - """ - Sanitizes a Reddit API response to extract and return a list of comment data. - - :param response: A list containing the Reddit API response data. The response - is expected to contain multiple elements, where the second element - (index 1) holds the relevant data. - :type response: List[Dict] - :return: A list of dictionaries, each representing a comment's data. - Returns None if the response is invalid. - :rtype: Union[List[Dict], None] - """ - if isinstance(response, List) and len(response) == 2: - children = response[1].get("data", {}).get("children") - - if isinstance(children, List): - return ( - [child.get("data") for child in children] - if isinstance(children, List) - else None - ) - - @staticmethod - def kind(response: Dict) -> str: - """ - Sanitises a Reddit API response to extract and return the item type/kind. - - :param response: A dictionary containing Reddit API response data. - :type response: Dict - :return: A string representation of the type of item. - :rtype: str - """ - - return response.get("kind") if isinstance(response, Dict) else None - - @staticmethod - def pagination_id(response: Dict) -> Union[str, None]: - """ - Sanitises a Reddit API response to extract and return a pagination ID. - - :param response: A dictionary containing Reddit API response data. - :type response: Dict - :return: A pagination ID from the response, if response is valid. - Returns None if the response is invalid. - :rtype: Union[str, None] - """ - data: Dict = response.get("data") - return data.get("after") if isinstance(data, Dict) else None - - @staticmethod - def post(response: List[Dict]) -> Union[Dict, None]: - """ - Sanitizes a Reddit API response to extract and return the data of a single post. - - :param response: A list containing the Reddit API response data. The response - is expected to contain multiple elements, where the first element - (index 0) holds the relevant data. - :type response: List[Dict] - :return: A dictionary representing the post's data, or None if the response is invalid. - :rtype: Union[Dict, None] - """ - - children: List[Dict] = [] - if isinstance(response, List): - response = response[0] # Extract the first element where data resides. - children = response.get("data").get("children") - - return children[0].get("data") if isinstance(children, List) else None - - @staticmethod - def posts(response: Dict) -> Union[List[Dict], None]: - """ - Sanitizes a Reddit API response to extract and return a list of post data. - - :param response: A dictionary containing the Reddit API response data. - :type response: Dict - :return: A list of dictionaries, each representing a post's data. - Returns None if the response is invalid. - :rtype: Union[List[Dict], None] - """ - - data: Dict = response.get("data") - children: List = data.get("children") - return ( - [child.get("data") for child in children] - if isinstance(children, List) - else None - ) - - @staticmethod - def subreddit_or_user(response: Dict) -> Union[Dict, None]: - """ - Sanitizes a Reddit API response to extract and return the data of a subreddit or user. + return self._to_namespace_object(obj=data) - :param response: A dictionary containing the Reddit API response data. - :type response: Dict - :return: A dictionary representing the subreddit or user's data, or None if the response is invalid. - :rtype: Union[Dict, None] - """ + def subreddit(self, response: Dict) -> SimpleNamespace: - data: Dict = response.get("data") - return data if isinstance(data, Dict) else None + if "data" in response: + return self._to_namespace_object(obj=response) - @staticmethod - def subreddits_or_users(response: Dict) -> Union[List[Dict], None]: - """ - Sanitizes a Reddit API response to extract and return a list of subreddit or user data. + def subreddits( + self, response: Dict + ) -> Union[List[SimpleNamespace], SimpleNamespace]: - :param response: A dictionary containing the Reddit API response data. - :type response: Dict - :return: A list of dictionaries representing subreddit or user data. - Returns None if the response is invalid. - :rtype: Union[List[Dict], None] - """ + if "data" in response: + return self._to_namespace_object(obj=response.get("data", {})) - data: Dict = response.get("data") - children: List = data.get("children") - return ( - [Sanitise.subreddit_or_user(response=child) for child in children] - if isinstance(children, List) - else None - ) + def user(self, response: Dict) -> SimpleNamespace: - @staticmethod - def wiki_page(response: Dict) -> Union[Dict, None]: - """ - Sanitizes a Reddit API response to extract and return the data of a wiki page, - including revision information. + if "data" in response: + return self._to_namespace_object(obj=response) - :param response: A dictionary containing the Reddit API response data. - :type response: Dict - :return: A dictionary representing the wiki page data, including revision information. - Returns None if the response is invalid. - :rtype: Union[Dict, None] - """ + def users(self, response: Dict) -> Union[List[SimpleNamespace], SimpleNamespace]: + if "data" in response: + return self._to_namespace_object(obj=response.get("data", {})) - data: Dict = response.get("data") - if data: - revision_by = data.get("revision_by") - if revision_by and isinstance(revision_by, Dict): - sanitized_revision_by = Sanitise.subreddit_or_user(response=revision_by) - data["revision_by"] = ( - sanitized_revision_by if sanitized_revision_by else revision_by - ) + def wiki_page(self, response: Dict) -> SimpleNamespace: - return data + if "data" in response: + return self._to_namespace_object(obj=response) # -------------------------------- END ----------------------------------------- # diff --git a/src/karmakaze/_tools.py b/src/karmakaze/_utils.py similarity index 100% rename from src/karmakaze/_tools.py rename to src/karmakaze/_utils.py diff --git a/tests/test_parse.py b/tests/test_parse.py deleted file mode 100644 index 70139f1..0000000 --- a/tests/test_parse.py +++ /dev/null @@ -1,93 +0,0 @@ -from types import SimpleNamespace -from typing import List, Dict - -import karmakaze -from conftest import ( - RAW_COMMENTS, - RAW_POST, - RAW_POSTS, - RAW_SUBREDDIT, - RAW_USER, - RAW_SUBREDDITS, - RAW_USERS, - RAW_WIKI_PAGE, -) - -parse = karmakaze.Parse() -sanitise = karmakaze.Sanitise() - - -def test_comments_parsing(): - sanitised_comments = sanitise.comments(RAW_COMMENTS) - print(sanitised_comments) - parsed_comments = parse.comments(sanitised_comments) - assert isinstance(parsed_comments, List) - - for comment in parsed_comments: - assert isinstance(comment, SimpleNamespace) - assert isinstance(comment.subreddit, str) - assert isinstance(comment.replies, (Dict, str)) - assert hasattr(comment, "upvotes") - - -def test_post_parsing(): - sanitised_post = sanitise.post(RAW_POST) - parsed_post = parse.post(sanitised_post) - assert isinstance(parsed_post, SimpleNamespace) - assert isinstance(parsed_post.upvotes, int) - assert isinstance(parsed_post.upvote_ratio, (float, int)) - assert isinstance(parsed_post.is_robot_indexable, bool) - - -def test_posts_parsing(): - sanitised_posts = sanitise.posts(RAW_POSTS) - parsed_posts = parse.posts(sanitised_posts) - assert isinstance(parsed_posts, List) - for post in parsed_posts: - assert isinstance(post, SimpleNamespace) - assert isinstance(post.comments, int) - assert hasattr(post, "url") - - -def test_subreddit_parsing(): - sanitised_subreddit = sanitise.subreddit_or_user(RAW_SUBREDDIT) - parsed_subreddit = parse.subreddit(sanitised_subreddit) - assert isinstance(parsed_subreddit, SimpleNamespace) - assert isinstance(parsed_subreddit.current_active_users, int) - assert hasattr(parsed_subreddit, "display_name") - - -def test_subreddits_parsing(): - sanitised_subreddits = sanitise.subreddits_or_users(RAW_SUBREDDITS) - parsed_subreddits = parse.subreddits(sanitised_subreddits) - assert isinstance(parsed_subreddits, List) - for subreddit in parsed_subreddits: - assert isinstance(subreddit, SimpleNamespace) - assert isinstance(subreddit.subscribers, int) - assert hasattr(subreddit, "description") - - -def test_user_parsing(): - sanitised_user = sanitise.subreddit_or_user(RAW_USER) - parsed_user = parse.user(sanitised_user) - assert isinstance(parsed_user, SimpleNamespace) - assert isinstance(parsed_user.created, str) - assert hasattr(parsed_user, "comment_karma") - - -def test_users_parsing(): - sanitised_users = sanitise.subreddits_or_users(RAW_USERS) - parsed_users = parse.users(sanitised_users) - assert isinstance(parsed_users, List) - for user in parsed_users: - assert isinstance(user, SimpleNamespace) - assert isinstance(user.accepts_followers, bool) - assert hasattr(user, "name") - - -def test_wiki_page_parsing(): - sanitised_wiki_page = sanitise.wiki_page(RAW_WIKI_PAGE) - parsed_wiki_page = parse.wiki_page(sanitised_wiki_page) - assert isinstance(parsed_wiki_page, SimpleNamespace) - assert isinstance(parsed_wiki_page.revision_date, str) - assert hasattr(parsed_wiki_page, "revision_id") diff --git a/tests/test_sanitise.py b/tests/test_sanitise.py deleted file mode 100644 index 2c72d43..0000000 --- a/tests/test_sanitise.py +++ /dev/null @@ -1,88 +0,0 @@ -from typing import List, Dict - -import karmakaze - -from conftest import ( - RAW_COMMENTS, - RAW_POST, - RAW_POSTS, - RAW_SUBREDDIT, - RAW_USER, - RAW_SUBREDDITS, - RAW_USERS, - RAW_WIKI_PAGE, -) - -sanitise = karmakaze.Sanitise() - - -def test_comments_sanitisation(): - sanitised_comments = sanitise.comments(RAW_COMMENTS) - assert isinstance(sanitised_comments, List) - assert [sanitise.kind(raw_comment) == "t3" for raw_comment in RAW_COMMENTS] - for comment in sanitised_comments: - assert isinstance(comment, Dict) - assert "link_id" in comment - - -def test_post_sanitisation(): - sanitised_post = sanitise.post(RAW_POST) - assert isinstance(sanitised_post, Dict) - assert isinstance(sanitised_post.get("ups"), int) - assert sanitise.kind(RAW_POST[1]) == "Listing" - - -def test_posts_sanitisation(): - sanitised_posts = sanitise.posts(RAW_POSTS) - assert isinstance(sanitised_posts, List) - assert [sanitise.kind(response=raw_post) == "Listing" for raw_post in RAW_POSTS] - for post in sanitised_posts: - assert isinstance(post, Dict) - assert "title" or "selftext" in post - - -def test_subreddit_sanitisation(): - sanitised_subreddit = sanitise.subreddit_or_user(RAW_SUBREDDIT) - assert isinstance(sanitised_subreddit, Dict) - assert isinstance(sanitised_subreddit.get("accounts_active"), int) - assert "display_name" in sanitised_subreddit - assert sanitise.kind(RAW_SUBREDDIT) == "t5" - - -def test_subreddits_sanitisation(): - sanitised_subreddits = sanitise.subreddits_or_users(RAW_SUBREDDITS) - assert isinstance(sanitised_subreddits, List) - assert [ - sanitise.kind(response=raw_subreddit) == "t5" - for raw_subreddit in RAW_SUBREDDITS - ] - for subreddit in sanitised_subreddits: - assert isinstance(subreddit, Dict) - assert isinstance(subreddit.get("subscribers"), int) - assert "display_name" in subreddit - - -def test_user_sanitisation(): - sanitised_user = sanitise.subreddit_or_user(RAW_USER) - assert isinstance(sanitised_user, Dict) - assert isinstance(sanitised_user.get("created"), (float, int)) - assert "comment_karma" in sanitised_user - assert sanitise.kind(RAW_USER) == "t2" - - -def test_users_sanitisation(): - sanitised_users = sanitise.subreddits_or_users(RAW_USERS) - assert isinstance(sanitised_users, List) - assert [sanitise.kind(response=raw_user) == "t2" for raw_user in RAW_USERS] - for user in sanitised_users: - assert isinstance(user, Dict) - assert isinstance(user.get("accept_followers"), bool) - assert "name" in user - - -def test_wiki_page_sanitisation(): - sanitised_wiki_page = sanitise.wiki_page(RAW_WIKI_PAGE) - assert isinstance(sanitised_wiki_page, Dict) - assert isinstance(sanitised_wiki_page.get("revision_date"), (float, int)) - assert "link_karma" in sanitised_wiki_page.get("revision_by") - assert sanitise.kind(RAW_WIKI_PAGE) == "wikipage" diff --git a/tests/test_sanitise_and_parse.py b/tests/test_sanitise_and_parse.py new file mode 100644 index 0000000..3811f86 --- /dev/null +++ b/tests/test_sanitise_and_parse.py @@ -0,0 +1,82 @@ +from types import SimpleNamespace +from typing import List + +import karmakaze +from conftest import ( + RAW_COMMENTS, + RAW_POST, + RAW_POSTS, + RAW_SUBREDDIT, + RAW_USER, + RAW_SUBREDDITS, + RAW_USERS, + RAW_WIKI_PAGE, +) + +sanitise_and_parse = karmakaze.SanitiseAndParse() + + +def test_comments_sanitation_and_parsing(): + comments = sanitise_and_parse.comments(RAW_COMMENTS[1]) + assert isinstance(comments.children, List) + + for comment in comments.children: + assert isinstance(comment, SimpleNamespace) + assert isinstance(comment.data.subreddit, str) + assert hasattr(comment.data, "ups") + + +def test_post_sanitation_and_parsing(): + post = sanitise_and_parse.post(RAW_POST) + assert isinstance(post, SimpleNamespace) + assert isinstance(post.data.ups, int) + assert isinstance(post.data.upvote_ratio, (float, int)) + assert isinstance(post.data.is_robot_indexable, bool) + + +def test_posts_sanitation_and_parsing(): + posts = sanitise_and_parse.posts(RAW_POSTS).children + assert isinstance(posts, List) + for post in posts: + assert isinstance(post, SimpleNamespace) + assert isinstance(post.data.num_comments, int) + assert hasattr(post.data, "url") + + +def test_subreddit_sanitation_and_parsing(): + subreddit = sanitise_and_parse.subreddit(RAW_SUBREDDIT) + assert isinstance(subreddit, SimpleNamespace) + assert isinstance(subreddit.data.active_user_count, int) + assert hasattr(subreddit.data, "display_name") + + +def test_subreddits_sanitation_and_parsing(): + subreddits = sanitise_and_parse.subreddits(RAW_SUBREDDITS) + assert isinstance(subreddits.children, List) + for subreddit in subreddits.children: + assert isinstance(subreddit, SimpleNamespace) + assert isinstance(subreddit.data.subscribers, int) + assert hasattr(subreddit.data, "description") + + +def test_user_sanitation_and_parsing(): + user = sanitise_and_parse.user(RAW_USER) + assert isinstance(user, SimpleNamespace) + assert isinstance(user.data.created, str) + assert hasattr(user.data, "comment_karma") + + +def test_users_sanitation_and_parsing(): + users = sanitise_and_parse.users(RAW_USERS) + assert isinstance(users.children, List) + for user in users.children: + assert isinstance(user, SimpleNamespace) + assert isinstance(user.data.accept_followers, bool) + assert hasattr(user.data, "name") + + +def test_wiki_page_sanitation_and_parsing(): + wiki_page = sanitise_and_parse.wiki_page(RAW_WIKI_PAGE) + assert isinstance(wiki_page, SimpleNamespace) + assert isinstance(wiki_page.data.revision_date, str) + assert hasattr(wiki_page.data, "revision_id") From 9ac9b21bcb51b78de5a30f356d128a4e24705720 Mon Sep 17 00:00:00 2001 From: rly0nheart <74001397+rly0nheart@users.noreply.github.com> Date: Sun, 20 Oct 2024 17:27:15 +0200 Subject: [PATCH 2/2] 3.0.0 --- README.md | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index e2bd2e3..2d4a114 100644 --- a/README.md +++ b/README.md @@ -10,17 +10,11 @@ import karmakaze import requests +snp = karmakaze.SanitiseAndParse() username = "AutoModerator" response = requests.get(f"https://www.reddit.com/user/{username}/about.json").json() -sanitise = karmakaze.Sanitise() # Provides static methods for sanitising raw response data -parse = karmakaze.Parse() # Provides methods for parsing the sanitised data - -sanitised_data = sanitise.subreddit_or_user(response=response) -parsed_data = parse.user(data=sanitised_data) - -print(sanitised_data) -print(parsed_data) +print(snp.user(response=response)) ``` ## License