Skip to content

Commit

Permalink
Merge pull request #75 from adsabs/sort
Browse files Browse the repository at this point in the history
removing bibcode desc sorts
  • Loading branch information
femalves authored Aug 26, 2024
2 parents 2f49386 + d033550 commit 8deffbc
Show file tree
Hide file tree
Showing 4 changed files with 114 additions and 11 deletions.
101 changes: 101 additions & 0 deletions alembic/versions/21bf40903ec3_update_json_queries_with_bibcode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
"""Update JSON queries with bibcode
Revision ID: 21bf40903ec3
Revises: ffdbd392dc89
Create Date: 2024-06-25 14:02:38.782780
s
"""

# revision identifiers, used by Alembic.
revision = '21bf40903ec3'
down_revision = 'ffdbd392dc89'

from alembic import op
import sqlalchemy as sa
import json
from flask import current_app


def update_queries(main_session):
sql_query = """
WITH problematic_ids AS (
SELECT q.id
FROM public.queries AS q
WHERE convert_from(q.query, 'UTF-8')::text LIKE '%\\u0000%'
),
filtered_queries AS (
SELECT
q.id, convert_from(q.query, 'UTF-8')::jsonb AS json_query
FROM
public.queries AS q
INNER JOIN
public.myads AS m
ON
q.id = m.query_id
WHERE
q.id NOT IN (SELECT id FROM problematic_ids)
)
SELECT *
FROM filtered_queries
"""

current_app.logger.info('Getting records...')
result = main_session.execute(sa.text(sql_query))
records = result.fetchall()

update_queries = []

for query_id, saved_queries in records:

query = saved_queries['query']
modified = False

if 'date+desc%2C+bibcode+desc' in query:
query = query.replace('%2C+bibcode+desc', '%2C+score+desc')
modified = True
elif 'date+asc%2C+bibcode+asc' in query:
query = query.replace('%2C+bibcode+asc', '%2C+score+asc')
modified = True
elif 'bibcode+desc' in query:
query = query.replace('bibcode+desc', 'date+desc')
modified = True
elif 'bibcode+asc' in query:
query = query.replace('bibcode+asc', 'date+asc')
modified = True

if modified:
saved_queries['query'] = query
update_queries.append({
'id': query_id,
'query': json.dumps(saved_queries).encode('utf-8')
})

current_app.logger.info('Records to update: {}'.format(len(update_queries)))
try:
for item in update_queries:
update_sql = sa.text("""
UPDATE public.queries
SET query = :query
WHERE id = :id
""")
current_app.logger.info('Updating record with id: {}'.format(item['id']))
main_session.execute(update_sql, {'query': item['query'], 'id': item['id']})

current_app.logger.info('Total records updated: {}'.format(len(update_queries)))
main_session.commit()
except Exception as e:
main_session.rollback()
current_app.logger.error('Error occurred during update: {}'.format(str(e)))
raise
finally:
main_session.close()

def upgrade():
session = sa.orm.Session(bind=op.get_bind())
try:
update_queries(session)
except Exception as e:
current_app.logger.error('Upgrade failed: {}'.format(str(e)))

def downgrade():
pass
2 changes: 1 addition & 1 deletion vault_service/tests/test_user.py
Original file line number Diff line number Diff line change
Expand Up @@ -770,7 +770,7 @@ def test_myads_execute_notification(self):
self.assertStatus(r, 200)
self.assertEqual(r.json, [{'q': 'author:"Kurtz, Michael" entdate:["{0}Z00:00" TO "{1}Z23:59"] '
'pubdate:[{2}-00 TO *]'.format(start_date, now, beg_pubyear),
'sort': 'score desc, bibcode desc'}])
'sort': 'score desc, date desc'}])

@httpretty.activate
def test_myads_import(self):
Expand Down
5 changes: 3 additions & 2 deletions vault_service/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,10 @@ def test_exc():
self.assertTrue(r == {'query': 'fq=%7B%21bitset%7D&q=foo', 'bigquery': 'foo\nbar'})

# Typical general myADS notification
r = utils.cleanup_payload({'query': {'fq': ['{!type=aqp v=$fq_database}'], 'fq_database': ['(database:astronomy)'], 'q': ['star'], 'sort': ['citation_count desc, bibcode desc']},
r = utils.cleanup_payload({'query': {'fq': ['{!type=aqp v=$fq_database}'], 'fq_database': ['(database:astronomy)'], 'q': ['star'], 'sort': ['citation_count desc, date desc']},
})
self.assertTrue(r == {'bigquery': '', 'query': 'fq=%7B%21type%3Daqp+v%3D%24fq_database%7D&fq_database=%28database%3Aastronomy%29&q=star&sort=citation_count+desc%2C+bibcode+desc'})

self.assertTrue(r == {'bigquery': '', 'query': 'fq=%7B%21type%3Daqp+v%3D%24fq_database%7D&fq_database=%28database%3Aastronomy%29&q=star&sort=citation_count+desc%2C+date+desc'})

@httpretty.activate
def test_upsert_myads(self):
Expand Down
17 changes: 9 additions & 8 deletions vault_service/views/user.py
Original file line number Diff line number Diff line change
Expand Up @@ -655,25 +655,26 @@ def _create_myads_query(template_type, frequency, data, classes=None, start_isod
if frequency == 'daily':
connector = [' ', ' NOT ']
# keyword search should be sorted by score, "other recent" should be sorted by bibcode
sort_w_keywords = ['score desc, bibcode desc', 'bibcode desc']
sort_w_keywords = ['score desc, date desc', 'date desc']
elif frequency == 'weekly':
connector = [' ']
sort_w_keywords = ['score desc, bibcode desc']
sort_w_keywords = ['score desc, date desc']
if not keywords:
q = 'bibstem:arxiv {0} entdate:["{1}Z00:00" TO "{2}Z23:59"] pubdate:[{3}-00 TO *]'.\
format(classes, start_date, end_date, beg_pubyear)
sort = 'bibcode desc'
sort = 'date desc'
out.append({'q': q, 'sort': sort})
else:
for c, s in zip(connector, sort_w_keywords):
q = 'bibstem:arxiv ({0}{1}({2})) entdate:["{3}Z00:00" TO "{4}Z23:59"] pubdate:[{5}-00 TO *]'.\
format(classes, c, keywords, start_date, end_date, beg_pubyear)
sort = s

out.append({'q': q, 'sort': sort})
elif template_type == 'citations':
keywords = data
q = 'citations({0})'.format(keywords)
sort = 'entry_date desc, bibcode desc'
sort = 'entry_date desc, date desc'
out.append({'q': q, 'sort': sort})
elif template_type == 'authors':
keywords = data
Expand All @@ -682,7 +683,7 @@ def _create_myads_query(template_type, frequency, data, classes=None, start_isod
start_date = start_isodate
q = '{0} entdate:["{1}Z00:00" TO "{2}Z23:59"] pubdate:[{3}-00 TO *]'.\
format(keywords, start_date, end_date, beg_pubyear)
sort = 'score desc, bibcode desc'
sort = 'score desc, date desc'
out.append({'q': q, 'sort': sort})
elif template_type == 'keyword':
keywords = data
Expand All @@ -692,15 +693,15 @@ def _create_myads_query(template_type, frequency, data, classes=None, start_isod
# most recent
q = '{0} entdate:["{1}Z00:00" TO "{2}Z23:59"] pubdate:[{3}-00 TO *]'.\
format(keywords, start_date, end_date, beg_pubyear)
sort = 'entry_date desc, bibcode desc'
sort = 'entry_date desc, date desc'
out.append({'q': q, 'sort': sort})
# most popular
q = 'trending({0})'.format(keywords)
sort = 'score desc, bibcode desc'
sort = 'score desc, date desc'
out.append({'q': q, 'sort': sort})
# most cited
q = 'useful({0})'.format(keywords)
sort = 'score desc, bibcode desc'
sort = 'score desc, date desc'
out.append({'q': q, 'sort': sort})
elif template_type is None and data:
# General query - for consistency with the rest of templates,
Expand Down

0 comments on commit 8deffbc

Please sign in to comment.