Skip to content

Commit

Permalink
Documenting citation style (#817)
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesbraza authored Jan 18, 2025
1 parent f653903 commit a1c53d3
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 19 deletions.
23 changes: 4 additions & 19 deletions paperqa/docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
from paperqa.settings import MaybeSettings, get_settings
from paperqa.types import Doc, DocDetails, DocKey, PQASession, Text
from paperqa.utils import (
citation_to_docname,
gather_with_concurrency,
get_loop,
maybe_is_html,
Expand Down Expand Up @@ -306,23 +307,7 @@ async def aadd( # noqa: PLR0912
):
citation = f"Unknown, {os.path.basename(path)}, {datetime.now().year}"

if docname is None:
# get first name and year from citation
match = re.search(r"([A-Z][a-z]+)", citation)
if match is not None:
author = match.group(1)
else:
# panicking - no word??
raise ValueError(
f"Could not parse docname from citation {citation}. "
"Consider just passing key explicitly - e.g. docs.py "
"(path, citation, key='mykey')"
)
year = ""
match = re.search(r"(\d{4})", citation)
if match is not None:
year = match.group(1)
docname = f"{author}{year}"
docname = citation_to_docname(citation) if docname is None else docname
docname = self._get_unique_name(docname)

doc = Doc(docname=docname, citation=citation, dockey=dockey)
Expand Down Expand Up @@ -801,8 +786,8 @@ async def aquery( # noqa: PLR0912
answer_text = answer_result.text
session.add_tokens(answer_result)
# it still happens
if prompt_config.EXAMPLE_CITATION in answer_text:
answer_text = answer_text.replace(prompt_config.EXAMPLE_CITATION, "")
if (ex_citation := prompt_config.EXAMPLE_CITATION) in answer_text:
answer_text = answer_text.replace(ex_citation, "")
for c in filtered_contexts:
name = c.text.name
citation = c.text.doc.formatted_citation
Expand Down
1 change: 1 addition & 0 deletions paperqa/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ def get_formatted_variables(s: str) -> set[str]:
class PromptSettings(BaseModel):
model_config = ConfigDict(extra="forbid", validate_assignment=True)

# MLA parenthetical in-text citation, SEE: https://nwtc.libguides.com/citations/MLA#s-lg-box-707489
EXAMPLE_CITATION: ClassVar[str] = "(Example2012Example pages 3-4)"

summary: str = summary_prompt
Expand Down
20 changes: 20 additions & 0 deletions paperqa/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -544,3 +544,23 @@ def logging_filters(
log_with_filter = logging.getLogger(logger_name)
for log_filter_to_remove in log_filters_to_remove:
log_with_filter.removeFilter(log_filter_to_remove)


def citation_to_docname(citation: str) -> str:
"""Create a docname that follows MLA parenthetical in-text citation."""
# get first name and year from citation
match = re.search(r"([A-Z][a-z]+)", citation)
if match is not None:
author = match.group(1)
else:
# panicking - no word??
raise ValueError(
f"Could not parse docname from citation {citation}. "
"Consider just passing key explicitly - e.g. docs.py "
"(path, citation, key='mykey')"
)
year = ""
match = re.search(r"(\d{4})", citation)
if match is not None:
year = match.group(1)
return f"{author}{year}"

0 comments on commit a1c53d3

Please sign in to comment.