Skip to content

Commit

Permalink
Move over NLTK code from Coaster; revise Ruff config (#768)
Browse files Browse the repository at this point in the history
  • Loading branch information
jace authored May 13, 2024
1 parent 8b4cd7d commit 0b84f87
Show file tree
Hide file tree
Showing 17 changed files with 94 additions and 84 deletions.
4 changes: 2 additions & 2 deletions hasjob/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
import geoip2.database
from flask import Flask
from flask_assets import Bundle
from flask_lastuser import Lastuser
from flask_lastuser.sqlalchemy import UserManager
from flask_mail import Mail
from flask_migrate import Migrate
from flask_redis import FlaskRedis
Expand All @@ -13,6 +11,8 @@
import coaster.app
from baseframe import Version, assets, baseframe
from coaster.assets import WebpackManifest
from flask_lastuser import Lastuser
from flask_lastuser.sqlalchemy import UserManager

from ._version import __version__
from .uploads import configure as uploads_configure
Expand Down
2 changes: 1 addition & 1 deletion hasjob/forms/board.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import tldextract
from flask import Markup
from markupsafe import Markup

import baseframe.forms as forms
from baseframe import _, __
Expand Down
2 changes: 1 addition & 1 deletion hasjob/forms/domain.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# from flask import Markup
# from markupsafe import Markup
import baseframe.forms as forms
from baseframe import __

Expand Down
2 changes: 1 addition & 1 deletion hasjob/forms/filterset.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ class FiltersetForm(forms.Form):
__("Tags"), autocomplete_endpoint='/api/1/tag/autocomplete', results_key='tags'
)

def set_queries(self):
def __post_init__(self):
if not self.edit_parent:
self.edit_parent = g.board
self.types.query = (
Expand Down
5 changes: 3 additions & 2 deletions hasjob/forms/jobpost.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@
from decimal import Decimal, InvalidOperation
from difflib import SequenceMatcher

from flask import Markup, g, request
from flask_lastuser import LastuserResourceError
from flask import g, request
from markupsafe import Markup

import baseframe.forms as forms
from baseframe import _, __
from baseframe.utils import is_public_email_domain
from coaster.utils import get_email_domain, getbool
from flask_lastuser import LastuserResourceError

from .. import app, lastuser
from ..models import CURRENCY, PAY_TYPE, Domain, JobApplication, JobType, User
Expand Down
3 changes: 2 additions & 1 deletion hasjob/models/board.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

from flask import Markup, url_for
from flask import url_for
from markupsafe import Markup
from pytz import timezone
from sqlalchemy.ext.associationproxy import association_proxy
from werkzeug.utils import cached_property
Expand Down
3 changes: 2 additions & 1 deletion hasjob/models/campaign.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

from datetime import timedelta

from flask import Markup, request
from flask import request
from markupsafe import Markup
from sqlalchemy import event
from sqlalchemy.ext.associationproxy import association_proxy
from sqlalchemy.ext.orderinglist import ordering_list
Expand Down
3 changes: 2 additions & 1 deletion hasjob/models/jobpost.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
from datetime import timedelta

import tldextract
from flask import Markup, escape, url_for
from flask import url_for
from flask_babel import format_datetime
from markupsafe import Markup, escape
from sqlalchemy import event
from sqlalchemy.dialects.postgresql import TSVECTOR
from sqlalchemy.ext.associationproxy import association_proxy
Expand Down
2 changes: 1 addition & 1 deletion hasjob/models/user.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
from uuid import uuid4

from flask import request
from flask_lastuser.sqlalchemy import UserBase2

from baseframe import _, cache
from coaster.sqlalchemy import JsonDict, UuidMixin
from coaster.utils import utcnow
from flask_lastuser.sqlalchemy import UserBase2

from . import BaseMixin, Model, backref, db, relationship, sa

Expand Down
32 changes: 31 additions & 1 deletion hasjob/tagging.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from collections import defaultdict
from collections.abc import Iterable
from urllib.parse import urljoin

import nltk
import requests

from coaster.nlp import extract_named_entities
from coaster.utils import text_blocks

from . import app, rq
Expand All @@ -25,6 +26,35 @@


@rq.job('hasjob')
def extract_named_entities(text_blocks: Iterable[str]) -> set[str]:
"""Return a set of named entities extracted from the provided text blocks."""
sentences = []
for text in text_blocks:
sentences.extend(nltk.sent_tokenize(text))

tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences]
tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences]
chunked_sentences = nltk.ne_chunk_sents(tagged_sentences, binary=True)

def extract_entity_names(tree: nltk.Tree) -> list[str]:
entity_names = []

if hasattr(tree, "label"):
if tree.label() == "NE":
entity_names.append(" ".join(child[0] for child in tree))
else:
for child in tree:
entity_names.extend(extract_entity_names(child))

return entity_names

entity_names = []
for tree in chunked_sentences:
entity_names.extend(extract_entity_names(tree))

return set(entity_names)


def tag_locations(jobpost_id):
with app.test_request_context():
post = JobPost.query.get(jobpost_id)
Expand Down
3 changes: 2 additions & 1 deletion hasjob/views/campaign.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
from functools import wraps
from io import StringIO

from flask import Markup, abort, flash, g, redirect, render_template, request, url_for
from flask import abort, flash, g, redirect, render_template, request, url_for
from markupsafe import Markup
from pytz import UTC

from baseframe import __
Expand Down
5 changes: 3 additions & 2 deletions hasjob/views/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,17 @@
from uuid import uuid4

import bleach
from flask import Markup, copy_current_request_context, g, request, session
from flask_lastuser import signal_user_looked_up
from flask import copy_current_request_context, g, request, session
from geoip2.errors import AddressNotFoundError
from markupsafe import Markup
from pytz import UTC
from sqlalchemy.exc import IntegrityError

from baseframe import _, cache
from baseframe.signals import form_validation_error, form_validation_success
from coaster.sqlalchemy import failsafe_add
from coaster.utils import utcnow
from flask_lastuser import signal_user_looked_up

from .. import app, lastuser, redis_store, rq
from ..extapi import location_geodata
Expand Down
2 changes: 1 addition & 1 deletion hasjob/views/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from uuid import uuid4

from flask import (
Markup,
Response,
abort,
flash,
Expand All @@ -14,6 +13,7 @@
request,
url_for,
)
from markupsafe import Markup

from baseframe import _, request_is_xhr
from coaster.utils import ParseError, getbool, parse_isoformat, utcnow
Expand Down
2 changes: 1 addition & 1 deletion hasjob/views/listing.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

import bleach
from flask import (
Markup,
abort,
flash,
g,
Expand All @@ -16,6 +15,7 @@
)
from flask_mail import Message
from html2text import html2text
from markupsafe import Markup
from premailer import transform as email_transform
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm.exc import StaleDataError
Expand Down
2 changes: 1 addition & 1 deletion hasjob/views/login.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from flask import Response, flash, g, redirect
from flask_lastuser import signal_user_session_refreshed
from sqlalchemy.exc import IntegrityError

from coaster.views import get_next_url
from flask_lastuser import signal_user_session_refreshed

from .. import app, lastuser
from ..models import UserActiveAt, db
Expand Down
104 changes: 39 additions & 65 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ use_parentheses = true
combine_as_imports = true
split_on_trailing_comma = false
extra_standard_library = ['typing_extensions']
known_first_party = ['baseframe', 'coaster']
known_first_party = ['baseframe', 'coaster', 'flask_lastuser']
known_repo = ['hasjob']
default_section = 'THIRDPARTY'
sections = [
Expand Down Expand Up @@ -86,64 +86,6 @@ exclude_dirs = ['node_modules', 'build/lib']
skips = ['*/*_test.py', '*/test_*.py']

[tool.ruff]
# This is a slight customisation of the default rules
# 1. Hasjob still supports Python 3.7 pending its EOL
# 2. Rule E402 (module-level import not top-level) is disabled as isort handles it
# 3. Rule E501 (line too long) is left to Black; some strings are worse for wrapping

# Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.
select = ["E", "F"]
ignore = ["E402", "E501"]

# Allow autofix for all enabled rules (when `--fix`) is provided.
fixable = [
"A",
"B",
"C",
"D",
"E",
"F",
"G",
"I",
"N",
"Q",
"S",
"T",
"W",
"ANN",
"ARG",
"BLE",
"COM",
"DJ",
"DTZ",
"EM",
"ERA",
"EXE",
"FBT",
"ICN",
"INP",
"ISC",
"NPY",
"PD",
"PGH",
"PIE",
"PL",
"PT",
"PTH",
"PYI",
"RET",
"RSE",
"RUF",
"SIM",
"SLF",
"TCH",
"TID",
"TRY",
"UP",
"YTT",
]
unfixable = []

# Exclude a variety of commonly ignored directories.
exclude = [
".bzr",
Expand Down Expand Up @@ -171,23 +113,48 @@ exclude = [
# Same as Black.
line-length = 88

# Target Python 3.11
target-version = "py311"

[tool.ruff.format]
docstring-code-format = true
quote-style = "preserve"

[tool.ruff.lint]
# Allow unused variables when underscore-prefixed.
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"

# Target Python 3.11
target-version = "py311"
# Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.
select = ["E", "F"]
ignore = ["E402", "E501"]

[tool.ruff.mccabe]
# Allow autofix for all enabled rules (when `--fix`) is provided.
fixable = ["ALL"]
unfixable = []

# Allow these characters in strings
allowed-confusables = ["", ""]

[tool.ruff.lint.mccabe]
# Unlike Flake8, default to a complexity level of 10.
max-complexity = 10

[tool.ruff.isort]
[tool.ruff.lint.extend-per-file-ignores]
"__init__.py" = ["E402"] # Allow non-top-level imports
"tests/**.py" = [
"S101", # Allow assert
"ANN001", # Args don't need types (usually fixtures)
"N802", # Fixture returning a class may be named per class name convention
"N803", # Args don't require naming convention (fixture could be a class)
]

[tool.ruff.lint.isort]
# These config options should match isort config above under [tool.isort]
combine-as-imports = true
extra-standard-library = ['typing_extensions']
split-on-trailing-comma = false
relative-imports-order = 'furthest-to-closest'
known-first-party = ['coaster']
known-first-party = ['coaster', 'baseframe', 'flask_lastuser']
section-order = [
'future',
'standard-library',
Expand All @@ -197,5 +164,12 @@ section-order = [
'local-folder',
]

[tool.ruff.isort.sections]
[tool.ruff.lint.isort.sections]
repo = ['hasjob']

[tool.ruff.lint.flake8-pytest-style]
fixture-parentheses = false
mark-parentheses = false

[tool.ruff.lint.pyupgrade]
keep-runtime-typing = true
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@ Flask-Migrate
Flask-Redis
Flask-RQ2
Flask-SQLAlchemy
Flask-Testing
git+https://github.com/maxcountryman/flask-uploads.git#egg=Flask-Uploads
Flask-WTF
geoip2
gunicorn
html2text
jsmin
langid
nltk
Pillow
premailer
progressbar2
Expand Down

0 comments on commit 0b84f87

Please sign in to comment.