diff --git a/bot/cogs/dictionary.py b/bot/cogs/dictionary.py index 6daf80a..50c0ac0 100644 --- a/bot/cogs/dictionary.py +++ b/bot/cogs/dictionary.py @@ -1,38 +1,277 @@ -from typing import Optional +from __future__ import annotations + +import re +from typing import TYPE_CHECKING, Any, Optional import discord -import orjson -from catherinecore import Catherine +import msgspec from discord import app_commands -from discord.ext import commands -from libs.ui.dictionary.pages import ( - InclusivePages, - NounPages, - PronounsPages, - TermsPages, -) -from libs.ui.dictionary.structs import ( - InclusiveContent, - InclusiveEntity, - NounContent, - NounEntity, - PronounsEntity, - PronounsMorphemes, - TermAssets, - TermEntity, -) -from libs.ui.dictionary.utils import format_pronouns_info, split_flags +from discord.ext import commands, menus from libs.utils import Embed +from libs.utils.pages import CatherinePages from yarl import URL +if TYPE_CHECKING: + from catherinecore import Catherine + +BASE_URL = URL("https://pronouns.page/") +CDN_FLAGS_URL = URL("https://dclu0bpcdglik.cloudfront.net/images/") +PRONOUNS_FLAGS_URL = URL("https://en.pronouns.page/flags/") + +### Structs + + +class TermInfo(msgspec.Struct, frozen=True): + term: str + original: str + definition: str + locale: str + author: str + category: str + + +class NounEntity(msgspec.Struct, frozen=True): + regular: str + plural: str + + +class NounInfo(msgspec.Struct, frozen=True): + masc: str + fem: str + neutral: str + author: str + + +class InclusiveInfo(msgspec.Struct, frozen=True): + instead_of: str + say: str + because: str + clarification: Optional[str] + author: str + + +### UI components (Page Sources, Pages) +class TermSource(menus.ListPageSource): + def __init__(self, entries: list[dict[str, Any]], *, bot: Catherine, per_page: int = 1): + super().__init__(entries=entries, per_page=per_page) + self.cog: Dictionary = bot.get_cog("dictionary") # type: ignore + + def format_info(self, entry: dict[str, Any]) -> TermInfo: + return TermInfo( + term=", ".join(entry["term"].split("|")), + original=self.cog.format_references(entry["original"]), + definition=self.cog.format_references(entry["definition"]), + locale=entry["locale"], + author=self.cog.determine_author(entry["author"]), + category=", ".join(entry["category"].split(",")), + ) + + async def format_page(self, menu: "TermsPages", entries: dict[str, Any]) -> Embed: + menu.embed.clear_fields() + entry = self.format_info(entries) + + menu.embed.title = entry.term + menu.embed.set_thumbnail(url=self.cog.determine_image_url(entries)) + menu.embed.set_footer(text=f"Page {menu.current_page + 1}/{self.get_max_pages()}") + + menu.embed.description = f"{entry.original}\n\n{entry.definition}" + + # We need to swap the name value for what is in it's native locale + menu.embed.add_field(name="Author", value=entry.author) + menu.embed.add_field(name="Category", value=entry.category) + + return menu.embed + + +class TermsPages(CatherinePages): + def __init__(self, entries: list[dict[str, Any]], *, interaction: discord.Interaction): + self.bot: Catherine = interaction.client # type: ignore + self.entries = entries + super().__init__( + source=TermSource(entries, bot=self.bot, per_page=1), + interaction=interaction, + compact=False, + ) + self.embed = Embed() + + +class NounSource(menus.ListPageSource): + def __init__(self, entries: list[dict[str, Any]], *, per_page: int = 1): + super().__init__(entries=entries, per_page=per_page) + + def determine_author(self, author: Optional[str]) -> str: + if author is None: + return "Unknown" + return author + + def format_info(self, entries: dict[str, Any]) -> NounInfo: + def _fmt_prefix(value: str) -> str: + if value: + return f"- {value}" + return value + + return NounInfo( + masc="\n".join(map(_fmt_prefix, f"{entries['masc']}|{entries['mascPl']}".split("|"))), + fem="\n".join(map(_fmt_prefix, f"{entries['fem']}|{entries['femPl']}".split("|"))), + neutral="\n".join( + map(_fmt_prefix, f"{entries['neutr']}|{entries['neutrPl']}".split("|")) + ), + author=self.determine_author(entries["author"]), + ) + + async def format_page(self, menu: "TermsPages", entries: dict[str, Any]): + menu.embed.clear_fields() + entry = self.format_info(entries) + + menu.embed.set_footer( + text=f"{entry.author} | Page {menu.current_page + 1}/{self.get_max_pages()}" + ) + + menu.embed.add_field(name="Masculine", value=entry.masc) + menu.embed.add_field(name="Feminine", value=entry.fem) + menu.embed.add_field(name="Neutral", value=entry.neutral) + return menu.embed + + +class NounPages(CatherinePages): + def __init__(self, entries: list[dict[str, Any]], *, interaction: discord.Interaction): + self.bot: Catherine = interaction.client # type: ignore + super().__init__(source=NounSource(entries, per_page=1), interaction=interaction) + self.embed = Embed() + + +class InclusiveSource(menus.ListPageSource): + def __init__(self, entries: list[dict[str, Any]], *, bot: Catherine, per_page: int = 1): + super().__init__(entries=entries, per_page=per_page) + self.cog: Dictionary = bot.get_cog("dictionary") # type: ignore + + def format_info(self, entries: dict[str, Any]) -> InclusiveInfo: + return InclusiveInfo( + instead_of="\n".join( + map(lambda value: f"- ~~{value}~~", entries["insteadOf"].split("|")) + ), + say="\n".join(map(lambda value: f"- **{value}**", entries["say"].split("|"))), + because=entries["because"], + clarification=entries["clarification"], + author=self.cog.determine_author(entries["author"]), + ) + + async def format_page(self, menu: "InclusivePages", entries: dict[str, Any]): + menu.embed.clear_fields() + entry = self.format_info(entries) + + menu.embed.description = ( + f"### Instead of \n{entry.instead_of}\n" + f"### Better say\n{entry.say}\n" + f"### Because\n{entry.because}\n" + ) + + if entry.clarification: + menu.embed.description += f"### Clarification\n{entry.clarification}" + + menu.embed.set_footer(text=f"Page {menu.current_page + 1}/{self.get_max_pages()}") + + menu.embed.add_field(name="Author", value=entry.author) + return menu.embed + + +class InclusivePages(CatherinePages): + def __init__(self, entries: list[dict[str, Any]], *, interaction: discord.Interaction): + self.bot: Catherine = interaction.client # type: ignore + super().__init__( + source=InclusiveSource(entries, bot=self.bot, per_page=1), interaction=interaction + ) + self.embed = Embed() + class Dictionary(commands.GroupCog, name="dictionary"): """The to-go LGBTQ+ dictionary""" def __init__(self, bot: Catherine) -> None: self.bot = bot + self.decoder = msgspec.json.Decoder() + self.link_regex = re.compile(r"^(http|https)://") self.session = self.bot.session + ### Term utilities + + def split_flags(self, content: str) -> list[str]: + return re.findall(r"(?<=\[).*(?=\])", content) + + def determine_image_url(self, entry: dict[str, Any]) -> str: + flags = self.split_flags(entry["flags"]) + if len(flags[0]) != 0: + flag_entity = flags[0].replace('"', "").split(",") + return str(PRONOUNS_FLAGS_URL / f"{flag_entity[0]}.png") + elif entry["images"] and "[object Object]" not in entry["images"]: + asset = entry["images"].split(",") + return str( + CDN_FLAGS_URL / f"{asset[0]}-flag.png" + ) # For general use, we'll just use the first flag shown + return "" + + def format_inline_term_reference(self, content: str, entities: list[str]): + if len(entities) == 0: + return content + + url = URL.build(scheme="https", host="en.pronouns.page", path="/terminology") + replacements = {} + cleaned_content = re.sub(r"[\{\}]", "", content) + + # The order of formatting goes like this: + # 1. Hashtag term references + # 2. Link references + # 3. Pronouns references + # 4. Anything that is automatically assumed to be english terms + for entity in entities: + if entity.startswith("#"): + parts = entity[1:].partition("=") + replacements.update( + {entity: f"[{parts[-1]}]({url.with_query({'filter': parts[0]})})"} + ) + elif self.link_regex.match(entity): + # Special case here + keyword = entity.split("=")[-1] + keyword_length = len(keyword) + 1 + reference_url = URL(entity[:-keyword_length]) + if reference_url.host and reference_url.host == "www.perseus.tufts.edu": + replacements.update( + { + entity: f"[{keyword}]({reference_url.with_query({'doc': reference_url.query['doc'].replace(')', '%29').replace('(', '%28')})})" + } + ) + continue + + link_parts = entity.partition("=") + replacements.update({entity: f"[{link_parts[-1]}]({link_parts[0]})"}) + elif entity.startswith("/"): + # For other languages, this is the slash for the path that would be used + # Since we are only using english for now, this doesn't matter + pronouns_parts = entity[1:].partition("=") + pronouns_url = URL.build( + scheme="https", host="en.pronouns.page", path=f"/{pronouns_parts[0]}" + ) + replacements.update({entity: f"[{pronouns_parts[-1]}]({pronouns_url})"}) + else: + replacements.update({entity: f"[{entity}]({url.with_query({'filter': entity})})"}) + + fmt_regex = re.compile(r"(%s)" % "|".join(map(re.escape, replacements.keys()))) + return fmt_regex.sub(lambda mo: replacements[mo.group()], cleaned_content) + + def extract_reference(self, content: str) -> list[str]: + return re.findall(r"{(.*?)}", content) + + def format_references(self, content: str) -> str: + return self.format_inline_term_reference(content, self.extract_reference(content)) + + ### General utilities + + def determine_author(self, author: Optional[str]) -> str: + if author is None: + return "Unknown" + author_link = str(BASE_URL / f"@{author}") + return f"[{author}]({author_link})" + @app_commands.command(name="terms") @app_commands.describe(query="The term to look for") async def terms(self, interaction: discord.Interaction, query: Optional[str] = None) -> None: @@ -42,56 +281,34 @@ async def terms(self, interaction: discord.Interaction, query: Optional[str] = N if query: url = url / "search" / query async with self.session.get(url) as r: - data = await r.json(loads=orjson.loads) + data = await r.json(loads=self.decoder.decode) if len(data) == 0: await interaction.followup.send("No terms were found") return - converted = [ - TermEntity( - term=term["term"], - original=term["original"] if len(term["original"]) > 0 else None, - definition=term["definition"], - key=term["key"], - assets=TermAssets( - flags=split_flags(term["flags"]), - images=term["images"] if len(term["images"]) > 0 else None, - ), - category=term["category"].split(","), - author=term["author"], - ) - for term in data - ] - pages = TermsPages(entries=converted, interaction=interaction) + pages = TermsPages(data, interaction=interaction) await pages.start() @app_commands.command(name="nouns") @app_commands.describe(query="The noun to look for") async def nouns(self, interaction: discord.Interaction, query: Optional[str] = None) -> None: """Looks up gender neutral nouns and language""" - await interaction.response.defer() url = URL("https://en.pronouns.page/api/nouns") if query: url = url / "search" / query async with self.session.get(url) as r: # If people start using this for pronouns, then a generator shows up # so that's in case this happens - if r.content_type == "text/html": - await interaction.followup.send("Uhhhhhhhhhhhh what mate") + if r.status == 204: + await interaction.response.send_message("Uhhhhhhhhhhhh what") return - data = await r.json(loads=orjson.loads) + + data = await r.json(loads=self.decoder.decode) + if len(data) == 0: - await interaction.followup.send("No nouns were found") + await interaction.response.send_message("No nouns were found") return - converted = [ - NounEntity( - masc=NounContent(regular=entry["masc"], plural=entry["mascPl"]), - fem=NounContent(regular=entry["fem"], plural=entry["femPl"]), - neutral=NounContent(regular=entry["neutr"], plural=entry["neutrPl"]), - author=entry["author"], - ) - for entry in data - ] - pages = NounPages(entries=converted, interaction=interaction) + + pages = NounPages(data, interaction=interaction) await pages.start() @app_commands.command(name="inclusive") @@ -103,96 +320,13 @@ async def inclusive(self, interaction: discord.Interaction, term: Optional[str] if term: url = url / "search" / term async with self.session.get(url) as r: - data = await r.json(loads=orjson.loads) + data = await r.json(loads=self.decoder.decode) if len(data) == 0: await interaction.followup.send("No inclusive terms were found") return - converted = [ - InclusiveEntity( - content=InclusiveContent( - instead_of=entry["insteadOf"], - say=entry["say"], - because=entry["because"], - clarification=entry["clarification"], - ), - author=entry["author"], - ) - for entry in data - ] - pages = InclusivePages(entries=converted, interaction=interaction) + pages = InclusivePages(entries=data, interaction=interaction) await pages.start() - @app_commands.command(name="lookup") - @app_commands.describe( - pronouns="The pronouns to look up. Examples include she/her, etc. Defaults to all pronouns." - ) - async def lookup( - self, interaction: discord.Interaction, pronouns: Optional[str] = None - ) -> None: - """Lookup info about the given pronouns - - Pronouns include she/her, they/them and many others. Singular pronouns (eg 'she') also work. - """ - await interaction.response.defer() - url = URL("https://en.pronouns.page/api/pronouns/") - if pronouns: - url = url / pronouns - async with self.session.get(url) as r: - data = await r.json(loads=orjson.loads) - if data is None: - await interaction.followup.send("The pronouns requested were not found") - return - - if pronouns is not None: - pronouns_entry = PronounsEntity( - name=data["name"], - canonical_name=data["canonicalName"], - description=data["description"], - aliases=data["aliases"], - normative=data["normative"], - morphemes=PronounsMorphemes( - pronoun_subject=data["morphemes"]["pronoun_subject"], - pronoun_object=data["morphemes"]["pronoun_object"], - possessive_determiner=data["morphemes"]["possessive_determiner"], - possessive_pronoun=data["morphemes"]["possessive_pronoun"], - reflexive=data["morphemes"]["reflexive"], - ), - examples=data["examples"], - history=data["history"], - sources_info=data["sourcesInfo"], - ) - - pronouns_info = format_pronouns_info(pronouns_entry) - embed = Embed() - embed.title = pronouns_info["title"] - embed.description = pronouns_info["desc"] - embed.add_field(name="Aliases", value=", ".join(pronouns_entry.aliases).rstrip(",")) - embed.add_field(name="Normative", value=pronouns_entry.normative) - await interaction.followup.send(embed=embed) - else: - converted = [ - PronounsEntity( - name=entry["canonicalName"], - canonical_name=entry["canonicalName"], - description=entry["description"], - aliases=entry["aliases"], - normative=entry["normative"], - morphemes=PronounsMorphemes( - pronoun_subject=entry["morphemes"]["pronoun_subject"], - pronoun_object=entry["morphemes"]["pronoun_object"], - possessive_determiner=entry["morphemes"]["possessive_determiner"], - possessive_pronoun=entry["morphemes"]["possessive_pronoun"], - reflexive=entry["morphemes"]["reflexive"], - ), - examples=entry["examples"], - history=entry["history"], - sources_info=entry["sourcesInfo"], - ) - for entry in data.values() - ] - pages = PronounsPages(entries=converted, interaction=interaction) - await pages.start() - async def setup(bot: Catherine) -> None: await bot.add_cog(Dictionary(bot)) diff --git a/bot/libs/cog_utils/dictionary/__init__.py b/bot/libs/cog_utils/dictionary/__init__.py deleted file mode 100644 index 5421f4b..0000000 --- a/bot/libs/cog_utils/dictionary/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .utils import ( - format_inline_references as format_inline_references, - format_multi_reference as format_multi_reference, -) diff --git a/bot/libs/cog_utils/dictionary/utils.py b/bot/libs/cog_utils/dictionary/utils.py deleted file mode 100644 index 00ebce1..0000000 --- a/bot/libs/cog_utils/dictionary/utils.py +++ /dev/null @@ -1,53 +0,0 @@ -import re - - -def format_link_references(content: str) -> str: - keyword = content.split("=")[-1] - keyword_length = len(keyword) + 1 - - # Replace the broken parentheses with a quoted one - url = content[:-keyword_length].replace(")", "%29").replace("(", "%28") - return f"[{keyword}]({url})" - - -def format_pronouns_references(match: str) -> str: - cleaned = re.sub(r"^/", "", match) - parts = cleaned.split("=") - link = f"https://en.pronouns.page/{parts[0]}".replace(" ", "%20") - return f"[{parts[1]}]({link})" - - -# What this does is if it's a term starting with an "#", -# we strip that and give the result -def format_term_references(match: str): - # For terms - if match.startswith("#"): - cleaned = re.sub(r"^#", "", match) - parts = cleaned.split("=") - link = f"https://en.pronouns.page/terminology#{parts[0]}".replace(" ", "%20") - return f"[{parts[1]}]({link})" - - cleaned_link = f"https://en.pronouns.page/terminology#{match}".replace(" ", "%20") - return f"[{match}]({cleaned_link})" - - -def format_inline_references(content: str): - def _format_reference(match: re.Match): - link_regex = re.compile(r"^(http|https)://") - extracted_content = match.group(1) - if link_regex.search(extracted_content) is not None: - return format_link_references(extracted_content) - elif extracted_content.startswith("/"): - return format_pronouns_references(extracted_content) - return format_term_references(extracted_content) - - regex = re.compile(r"{(.*?)}") - return regex.sub(lambda match: _format_reference(match), content) - - -def format_multi_reference(content: str, sep: str = "|") -> str: - if sep in content: - parts = content.split(sep) - formatted = "; ".join(format_inline_references(part) for part in parts) - return f"({formatted})" - return format_inline_references(content) diff --git a/bot/libs/ui/dictionary/__init__.py b/bot/libs/ui/dictionary/__init__.py deleted file mode 100644 index f5b20ea..0000000 --- a/bot/libs/ui/dictionary/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -from .pages import ( - InclusivePages as InclusivePages, - NounPages as NounPages, - PronounsPages as PronounsPages, - TermsPages as TermsPages, -) -from .structs import ( - InclusiveContent as InclusiveContent, - InclusiveEntity as InclusiveEntity, - NounContent as NounContent, - NounEntity as NounEntity, - TermAssets as TermAssets, - TermEntity as TermEntity, -) -from .utils import ( - format_pronouns_info as format_pronouns_info, - split_flags as split_flags, -) diff --git a/bot/libs/ui/dictionary/embed_entries.py b/bot/libs/ui/dictionary/embed_entries.py deleted file mode 100644 index 814928a..0000000 --- a/bot/libs/ui/dictionary/embed_entries.py +++ /dev/null @@ -1,127 +0,0 @@ -import re - -from libs.cog_utils.dictionary import ( - format_inline_references, - format_multi_reference, -) - -from .structs import InclusiveEntity, NounEntity, PronounsEntity, TermEntity -from .utils import ( - determine_author, - determine_image_url, - format_gender_neutral_content, - format_inclusive_content, - format_pronouns_info, - format_term_titles, -) - - -class InclusiveEntityEntry: - __slots__ = ("content", "author", "author_base_url") - - def __init__(self, entry: InclusiveEntity): - self.content = entry.content - self.author = entry.author - - def to_dict(self): - desc = format_inclusive_content(self.content) - data = { - "description": desc, - "fields": [{"name": "Author", "value": determine_author(self.author)}], - } - return data - - -class TermEntityEntry: - __slots__ = ( - "term", - "original", - "definition", - "key", - "assets", - "category", - "author", - ) - - def __init__(self, entry: TermEntity): - self.term = entry.term - self.original = entry.original - self.definition = entry.definition - self.key = entry.key - self.assets = entry.assets - self.category = entry.category - self.author = entry.author - - def to_dict(self): - dirty_original = ( - f"({format_term_titles(format_inline_references(self.original))})" - if self.original is not None - else "" - ) - cleaning_regex = re.compile(r"[\{\}]") - possible_image_url = determine_image_url(self.assets) - possible_author = determine_author(self.author) - title = format_term_titles(self.term) - formatted_original = format_multi_reference(cleaning_regex.sub("", dirty_original)) - formatted_def = cleaning_regex.sub( - "", format_inline_references(self.definition) - ).capitalize() - formatted_category = ", ".join(self.category).rstrip(",") - desc = f""" - {formatted_original} - - {formatted_def} - """ - data = { - "title": title, - "description": desc, - "thumbnail": possible_image_url, - "fields": [ - {"name": "Author", "value": possible_author, "inline": True}, - {"name": "Category", "value": formatted_category, "inline": True}, - ], - } - return data - - -class NounEntityEntry: - __slots__ = ("entry", "author") - - def __init__(self, entry: NounEntity): - self.entry = entry - self.author = entry.author - - def to_dict(self): - desc = format_gender_neutral_content(self.entry) - possible_author = determine_author(self.author) - - data = { - "description": desc, - "fields": [{"name": "Author", "value": possible_author, "inline": True}], - } - return data - - -class PronounsEntityEntry: - __slots__ = ("entry", "history", "sources_info") - - def __init__(self, entry: PronounsEntity): - self.entry = entry - self.history = self.entry.history - self.sources_info = self.entry.sources_info - - def to_dict(self): - info = format_pronouns_info(self.entry) - data = { - "title": info["title"], - "description": info["desc"], - "fields": [ - { - "name": "Aliases", - "value": ", ".join(self.entry.aliases).rstrip(","), - "inline": True, - }, - {"name": "Normative", "value": self.entry.normative, "inline": True}, - ], - } - return data diff --git a/bot/libs/ui/dictionary/pages.py b/bot/libs/ui/dictionary/pages.py deleted file mode 100644 index e418460..0000000 --- a/bot/libs/ui/dictionary/pages.py +++ /dev/null @@ -1,64 +0,0 @@ -from typing import List - -import discord -from libs.utils.pages import CatherinePages, EmbedListSource - -from .embed_entries import ( - InclusiveEntityEntry, - NounEntityEntry, - PronounsEntityEntry, - TermEntityEntry, -) -from .structs import InclusiveEntity, NounEntity, PronounsEntity, TermEntity - - -class TermsPages(CatherinePages): - def __init__( - self, - entries: List[TermEntity], - *, - interaction: discord.Interaction, - per_page: int = 1, - ): - converted = [TermEntityEntry(entry).to_dict() for entry in entries] - super().__init__(EmbedListSource(converted, per_page=per_page), interaction=interaction) - self.embed = discord.Embed(colour=discord.Colour.from_rgb(255, 125, 212)) - - -class InclusivePages(CatherinePages): - def __init__( - self, - entries: List[InclusiveEntity], - *, - interaction: discord.Interaction, - per_page: int = 1, - ): - converted = [InclusiveEntityEntry(entry).to_dict() for entry in entries] - super().__init__(EmbedListSource(converted, per_page=per_page), interaction=interaction) - self.embed = discord.Embed(colour=discord.Colour.from_rgb(255, 125, 212)) - - -class NounPages(CatherinePages): - def __init__( - self, - entries: List[NounEntity], - *, - interaction: discord.Interaction, - per_page: int = 1, - ): - converted = [NounEntityEntry(entry).to_dict() for entry in entries] - super().__init__(EmbedListSource(converted, per_page=per_page), interaction=interaction) - self.embed = discord.Embed(colour=discord.Colour.from_rgb(255, 125, 212)) - - -class PronounsPages(CatherinePages): - def __init__( - self, - entries: List[PronounsEntity], - *, - interaction: discord.Interaction, - per_page: int = 1, - ): - converted = [PronounsEntityEntry(entry).to_dict() for entry in entries] - super().__init__(EmbedListSource(converted, per_page=per_page), interaction=interaction) - self.embed = discord.Embed(colour=discord.Colour.from_rgb(255, 125, 212)) diff --git a/bot/libs/ui/dictionary/structs.py b/bot/libs/ui/dictionary/structs.py deleted file mode 100644 index 4d29ea5..0000000 --- a/bot/libs/ui/dictionary/structs.py +++ /dev/null @@ -1,68 +0,0 @@ -from typing import List, Optional - -import msgspec - - -class InclusiveContent(msgspec.Struct): - instead_of: str - say: str - because: str - clarification: Optional[str] - - -class InclusiveEntity(msgspec.Struct): - content: InclusiveContent - author: Optional[str] - - -class TermAssets(msgspec.Struct): - flags: List[str] - images: Optional[str] - - -class TermEntity(msgspec.Struct): - term: str - original: Optional[str] - definition: str - key: str - assets: TermAssets - category: List[str] - author: Optional[str] - - -class NounContent(msgspec.Struct): - regular: str - plural: str - - -class NounEntity(msgspec.Struct): - masc: NounContent - fem: NounContent - neutral: NounContent - author: Optional[str] - - -class PronounsMorphemes(msgspec.Struct): - pronoun_subject: str - pronoun_object: str - possessive_determiner: str - possessive_pronoun: str - reflexive: str - - def to_dict(self): - return {f: getattr(self, f) for f in self.__struct_fields__} - - def values(self) -> List[str]: - return [getattr(self, f) for f in self.__struct_fields__] - - -class PronounsEntity(msgspec.Struct): - name: str - canonical_name: str - description: str - aliases: List[str] - normative: bool - morphemes: PronounsMorphemes - examples: List[str] - history: str - sources_info: Optional[str] diff --git a/bot/libs/ui/dictionary/utils.py b/bot/libs/ui/dictionary/utils.py deleted file mode 100644 index 4cec955..0000000 --- a/bot/libs/ui/dictionary/utils.py +++ /dev/null @@ -1,125 +0,0 @@ -import re -from typing import List, Optional, TypedDict - -from libs.cog_utils.dictionary import format_inline_references -from yarl import URL - -from .structs import ( - InclusiveContent, - NounContent, - NounEntity, - PronounsEntity, - TermAssets, -) - - -class PronounsInfo(TypedDict): - title: str - desc: str - - -def format_term_titles(title: str): - title_list = title.split("|") - return ", ".join(title_list).rstrip(",") - - -def format_title_options(title: str): - title_list = title.split("|") - return "\n".join([f"- **{title}**" for title in title_list]) - - -def format_instead_of_options(options: str): - options_list = options.split("|") - return "\n".join([f"- ~~{options}~~" for options in options_list]) - - -def format_pronouns_examples(examples: List[str]) -> str: - subbed = [f"- {item}" for item in examples] - return "\n".join(subbed) - - -def format_pronouns_info(entry: PronounsEntity) -> PronounsInfo: - def format_table(): - data = entry.morphemes.to_dict() - final_form = "\n".join( - [f"- **{k.replace('_', ' ').title()}**: {v}" for k, v in data.items()] - ) - return f"### Morphemes \n{final_form}" - - title = f"{entry.name}" - desc = [ - f"(*{entry.description}*)", - f"{format_table()}", - f"### Examples \n{format_pronouns_examples(entry.examples)}", - ] - if len(entry.history) != 0: - desc.append(f"### History\n{format_inline_references(entry.history)}") - - if entry.sources_info is not None: - desc.append(f"### Source Info\n{format_inline_references(entry.sources_info)}") - final_desc = "\n".join(desc) - return PronounsInfo(title=title, desc=final_desc) - - -def format_inclusive_content(content: InclusiveContent): - final_content = ( - f"### Instead of \n{format_instead_of_options(content.instead_of)}", - f"### Better Say\n{format_title_options(content.say)}", - f"### Because\n{content.because}", - ( - f"### Clarification\n{content.clarification}" - if content.clarification is not None - else "" - ), - ) - - return "\n".join(final_content) - - -def format_gender_neutral_content(content: NounEntity) -> str: - def _format_internals(noun_content: NounContent) -> str: - combined = f"{noun_content.regular}|{noun_content.plural}" - if len(noun_content.plural) == 0: - combined = noun_content.regular - internals_list = combined.split("|") - return "\n".join([f"- {item}" for item in internals_list]) - - final_content = ( - f"### Masculine \n{_format_internals(content.masc)}", - f"### Feminine \n{_format_internals(content.fem)}", - f"### Neutral \n{_format_internals(content.neutral)}", - ) - return "\n".join(final_content) - - -def split_flags(content: str) -> List[str]: - regex = re.compile(r"(?<=\[).*(?=\])") - return regex.findall(content) - - -def determine_author(author: Optional[str]) -> str: - author_base_url = URL("https://pronouns.page/") - if author is None: - return "Unknown" - author_link = str(author_base_url / f"@{author}") - return f"[{author}]({author_link})" - - -def determine_image_url(assets: TermAssets) -> str: - if len(assets.flags[0]) != 0: - base_flags_url = URL("https://en.pronouns.page/flags/") - asset = assets.flags[0].replace('"', "") - complete_url = ( - base_flags_url / f"{asset}.png" - ) # Always grab the first one bc i doubt there are two or more flags - return str(complete_url) - else: - # Apparently the "[object Object]" thing is a pronouns.page bug - if assets.images is None or "[object Object]" in assets.images: - return "" - - # If there isn't a flag, then it's probably a custom one - base_cdn_asset = URL("https://dclu0bpcdglik.cloudfront.net/images/") - asset = assets.images.split(",") - image_file = f"{asset[0]}-flag.png" - return str(base_cdn_asset / image_file) diff --git a/bot/tests/test_dictionary.py b/bot/tests/test_dictionary.py new file mode 100644 index 0000000..54d210e --- /dev/null +++ b/bot/tests/test_dictionary.py @@ -0,0 +1,161 @@ +import pytest +from cogs.dictionary import Dictionary +from typing import Any, Optional + + +@pytest.fixture +def cog(bot) -> Dictionary: + return Dictionary(bot) + + +@pytest.mark.parametrize( + "flags,expected", + [ + ('["Abroromantic"]', ['"Abroromantic"']), + ('["Abrosexual"]', ['"Abrosexual"']), + ('["Two Spirit","Two Spirit_"]', ['"Two Spirit","Two Spirit_"']), + ], +) +def test_split_flags(cog: Dictionary, flags: str, expected: str): + assert cog.split_flags(flags) == expected + + +@pytest.mark.parametrize( + "entry,expected", + [ + ( + {"flags": "[]", "images": "01GP3RYDBKEHR6DNCV7HWPZE2S"}, + "https://dclu0bpcdglik.cloudfront.net/images/01GP3RYDBKEHR6DNCV7HWPZE2S-flag.png", + ), + ( + {"flags": '["Abroromantic"]', "images": ""}, + "https://en.pronouns.page/flags/Abroromantic.png", + ), + ( + { + "flags": '["Two Spirit","Two Spirit_"]', + "images": "01FE7WPBA5YCGNY819Y9REWTXT,01FE7WQS5BCT7Z4YSWNYEWYTSP,01HMNR02Z8ACYXX52B2Y0FZH15", + }, + "https://en.pronouns.page/flags/Two%20Spirit.png", + ), + ( + { + "flags": "[]", + "images": "01FE7WPBA5YCGNY819Y9REWTXT,01FE7WQS5BCT7Z4YSWNYEWYTSP,01HMNR02Z8ACYXX52B2Y0FZH15", + }, + "https://dclu0bpcdglik.cloudfront.net/images/01FE7WPBA5YCGNY819Y9REWTXT-flag.png", + ), + ({"flags": "[]", "images": "[object Object]"}, ""), + ({"flags": "[]", "images": ""}, ""), + ], +) +def test_determine_image_url(cog: Dictionary, entry: dict[str, Any], expected: str): + assert cog.determine_image_url(entry) == expected + + +@pytest.mark.parametrize( + "content,expected", + [ + ( + "a term referring to people whose {#AGAB=gender assigned at birth} is different than their {gender identity}. It includes {#trans man=trans men} and {#trans woman=trans women} as well as {non-binary} people. Many, but not all, transgender people experience {gender dysphoria} or {#gender euphoria=euphoria}", + [ + "#AGAB=gender assigned at birth", + "gender identity", + "#trans man=trans men", + "#trans woman=trans women", + "non-binary", + "gender dysphoria", + "#gender euphoria=euphoria", + ], + ), + ( + "a {neutral}/{non-binary}, {abinary} or unaligned {gender identity}", + ["neutral", "non-binary", "abinary", "gender identity"], + ), + ("{transgender} person.", ["transgender"]), + ], +) +def test_extract_reference(cog: Dictionary, content: str, expected: list[str]): + assert cog.extract_reference(content) == expected + + +@pytest.mark.parametrize( + "content,entities,expected", + [ + ( + "a {neutral}/{non-binary}, {abinary} or unaligned {gender identity}", + ["neutral", "non-binary", "abinary", "gender identity"], + "a [neutral](https://en.pronouns.page/terminology?filter=neutral)/[non-binary](https://en.pronouns.page/terminology?filter=non-binary), [abinary](https://en.pronouns.page/terminology?filter=abinary) or unaligned [gender identity](https://en.pronouns.page/terminology?filter=gender+identity)", + ), + ( + "a term referring to people whose {#AGAB=gender assigned at birth} is different than their {gender identity}. It includes {#trans man=trans men} and {#trans woman=trans women} as well as {non-binary} people.", + [ + "#AGAB=gender assigned at birth", + "gender identity", + "#trans man=trans men", + "#trans woman=trans women", + "non-binary", + ], + "a term referring to people whose [gender assigned at birth](https://en.pronouns.page/terminology?filter=AGAB) is different than their [gender identity](https://en.pronouns.page/terminology?filter=gender+identity). It includes [trans men](https://en.pronouns.page/terminology?filter=trans+man) and [trans women](https://en.pronouns.page/terminology?filter=trans+woman) as well as [non-binary](https://en.pronouns.page/terminology?filter=non-binary) people.", + ), + ( + "a person who uses {/he=he/him} pronouns and identifies as a {lesbian}", + ["/he=he/him", "lesbian"], + "a person who uses [he/him](https://en.pronouns.page/he) pronouns and identifies as a [lesbian](https://en.pronouns.page/terminology?filter=lesbian)", + ), + ( + "from Ancient Greek {http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:1999.04.0057:entry=a(bro/s=ἁβρός} [habros] - “delicate”", + [ + "http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:1999.04.0057:entry=a(bro/s=ἁβρός" + ], + "from Ancient Greek [ἁβρός](http://www.perseus.tufts.edu/hopper/text?doc=Perseus:text:1999.04.0057:entry%3Da%2528bro/s) [habros] - “delicate”", + ), + ( + "{http://eprints.hud.ac.uk/id/eprint/33535/=Monro i in. z 2017}", + ["http://eprints.hud.ac.uk/id/eprint/33535/=Monro i in. z 2017"], + "[Monro i in. z 2017](http://eprints.hud.ac.uk/id/eprint/33535/)", + ), + ], +) +def test_format_inline_term_reference( + cog: Dictionary, content: str, entities: list[str], expected: str +): + assert cog.format_inline_term_reference(content, entities) == expected + + +@pytest.mark.parametrize( + "content,expected", + [ + ( + "a {neutral}/{non-binary}, {abinary} or unaligned {gender identity}", + "a [neutral](https://en.pronouns.page/terminology?filter=neutral)/[non-binary](https://en.pronouns.page/terminology?filter=non-binary), [abinary](https://en.pronouns.page/terminology?filter=abinary) or unaligned [gender identity](https://en.pronouns.page/terminology?filter=gender+identity)", + ), + ( + "a {man} whose {#AGAB=gender assigned at birth} is or was different than that of their gender (usually, they were {#AFAB=assigned female at birth}).", + "a [man](https://en.pronouns.page/terminology?filter=man) whose [gender assigned at birth](https://en.pronouns.page/terminology?filter=AGAB) is or was different than that of their gender (usually, they were [assigned female at birth](https://en.pronouns.page/terminology?filter=AFAB)).", + ), + ( + "a term referring to people whose {#AGAB=gender assigned at birth} is different than their {gender identity}. It includes {#trans man=trans men} and {#trans woman=trans women} as well as {non-binary} people. Many, but not all, transgender people experience {gender dysphoria} or {#gender euphoria=euphoria}. Some opt for medical {transition} (e.g. {#HRT=hormone replacement therapy} or {#SRS=surgeries}), but not all (they may be unable due to medical, financial or political reasons, or simply may not want to). Some opt for changing their {legal gender} marker, but not all. It should be noted that procedures of changing one's legal gender marker may be difficult or simply non-existent in certain countries; moreover, in many countries there is no option available for {non-binary} people. Whether (and how) a person transitions has no bearing on the validity of their identity.", + "a term referring to people whose [gender assigned at birth](https://en.pronouns.page/terminology?filter=AGAB) is different than their [gender identity](https://en.pronouns.page/terminology?filter=gender+identity). It includes [trans men](https://en.pronouns.page/terminology?filter=trans+man) and [trans women](https://en.pronouns.page/terminology?filter=trans+woman) as well as [non-binary](https://en.pronouns.page/terminology?filter=non-binary) people. Many, but not all, transgender people experience [gender dysphoria](https://en.pronouns.page/terminology?filter=gender+dysphoria) or [euphoria](https://en.pronouns.page/terminology?filter=gender+euphoria). Some opt for medical [transition](https://en.pronouns.page/terminology?filter=transition) (e.g. [hormone replacement therapy](https://en.pronouns.page/terminology?filter=HRT) or [surgeries](https://en.pronouns.page/terminology?filter=SRS)), but not all (they may be unable due to medical, financial or political reasons, or simply may not want to). Some opt for changing their [legal gender](https://en.pronouns.page/terminology?filter=legal+gender) marker, but not all. It should be noted that procedures of changing one's [legal gender](https://en.pronouns.page/terminology?filter=legal+gender) marker may be difficult or simply non-existent in certain countries; moreover, in many countries there is no option available for [non-binary](https://en.pronouns.page/terminology?filter=non-binary) people. Whether (and how) a person [transition](https://en.pronouns.page/terminology?filter=transition)s has no bearing on the validity of their identity.", + ), + # Technically the links are invalid but if locale is to be assumed to be russian, then they actually do work entirely + ( + "збірний термін, що позначає явища, де призначений при народженні ґендер ({agab}) відрізняється від фактичної ґендерної ідентичності людини. Це включає в себе {#трансфемінність=транс жінок}, {#трансмаскулінність=транс чоловіків} і {#небінарність=небінарних людей}. Деякі, але не всі, трансґендерні люди відчувають ґендерну дісфорію та/або ейфорію. Деякі, але не всі, хочуть пройти через медичний перехід (наприклад, пройти замісну гормональну терапію та/або хірургічні операції). Ті, хто не хоче проходити медичний перехід, можуть мати будь-які причини на це: від політичних до фінансових, або навіть просто цього не хотіти. Деякі хочуть змінити свою стать у паспорті, але не обов'язково всі. Важливо пам'ятати, що в багатьох країнах цей процес є дуже довгим та енерго і фінансово затратним; в деяких він неможливий; а також, що в багатьох країнах не існує варіанту ґендерного маркеру для небінарних людей.", + "збірний термін, що позначає явища, де призначений при народженні ґендер ([agab](https://en.pronouns.page/terminology?filter=agab)) відрізняється від фактичної ґендерної ідентичності людини. Це включає в себе [транс жінок](https://en.pronouns.page/terminology?filter=%D1%82%D1%80%D0%B0%D0%BD%D1%81%D1%84%D0%B5%D0%BC%D1%96%D0%BD%D0%BD%D1%96%D1%81%D1%82%D1%8C), [транс чоловіків](https://en.pronouns.page/terminology?filter=%D1%82%D1%80%D0%B0%D0%BD%D1%81%D0%BC%D0%B0%D1%81%D0%BA%D1%83%D0%BB%D1%96%D0%BD%D0%BD%D1%96%D1%81%D1%82%D1%8C) і [небінарних людей](https://en.pronouns.page/terminology?filter=%D0%BD%D0%B5%D0%B1%D1%96%D0%BD%D0%B0%D1%80%D0%BD%D1%96%D1%81%D1%82%D1%8C). Деякі, але не всі, трансґендерні люди відчувають ґендерну дісфорію та/або ейфорію. Деякі, але не всі, хочуть пройти через медичний перехід (наприклад, пройти замісну гормональну терапію та/або хірургічні операції). Ті, хто не хоче проходити медичний перехід, можуть мати будь-які причини на це: від політичних до фінансових, або навіть просто цього не хотіти. Деякі хочуть змінити свою стать у паспорті, але не обов'язково всі. Важливо пам'ятати, що в багатьох країнах цей процес є дуже довгим та енерго і фінансово затратним; в деяких він неможливий; а також, що в багатьох країнах не існує варіанту ґендерного маркеру для небінарних людей.", + ), + ], +) +def test_format_references(cog: Dictionary, content: str, expected: str): + assert cog.format_references(content) == expected + + +@pytest.mark.parametrize( + "author,expected", + [ + (None, "Unknown"), + ("No767", "[No767](https://pronouns.page/@No767)"), + ("ausir", "[ausir](https://pronouns.page/@ausir)"), + ], +) +def test_determine_author(cog: Dictionary, author: Optional[str], expected: str): + assert cog.determine_author(author) == expected diff --git a/changelog.md b/changelog.md index 998e5bc..1430c64 100644 --- a/changelog.md +++ b/changelog.md @@ -26,6 +26,7 @@ - Pride Profiles (https://github.com/No767/Catherine-Chan/pull/198) - HRT Conversion (https://github.com/No767/Catherine-Chan/pull/200) - Pronouns (https://github.com/No767/Catherine-Chan/pull/207) + - Dictionary (https://github.com/No767/Catherine-Chan/pull/221) - Include Taskfile (https://github.com/No767/Catherine-Chan/pull/208) ## ➖ Removals