-
Notifications
You must be signed in to change notification settings - Fork 7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implementar verificação de disponibilidade de artigos no site QA e public (opac_5) #462
Changes from 14 commits
b8901ab
490530f
793a84e
48118d4
783ae05
0e7779d
5582b74
91a5b6c
4a6b2e8
4db2b20
627c144
465f4a6
c47344c
f3f615f
9bb5aea
4bc73af
5f978ff
a99427b
2706e9d
f8fd6fe
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
# Generated by Django 5.0.3 on 2024-05-21 00:43 | ||
|
||
import django.db.models.deletion | ||
from django.conf import settings | ||
from django.db import migrations, models | ||
|
||
|
||
class Migration(migrations.Migration): | ||
dependencies = [ | ||
("article", "0001_initial"), | ||
migrations.swappable_dependency(settings.AUTH_USER_MODEL), | ||
] | ||
|
||
operations = [ | ||
migrations.CreateModel( | ||
name="ScieloSiteStatus", | ||
fields=[ | ||
( | ||
"id", | ||
models.BigAutoField( | ||
auto_created=True, | ||
primary_key=True, | ||
serialize=False, | ||
verbose_name="ID", | ||
), | ||
), | ||
( | ||
"created", | ||
models.DateTimeField( | ||
auto_now_add=True, verbose_name="Creation date" | ||
), | ||
), | ||
( | ||
"updated", | ||
models.DateTimeField( | ||
auto_now=True, verbose_name="Last update date" | ||
), | ||
), | ||
("check_date", models.DateTimeField(blank=True, null=True)), | ||
("url_site_scielo", models.SlugField(max_length=500, unique=True)), | ||
("available", models.BooleanField(default=False)), | ||
( | ||
"creator", | ||
models.ForeignKey( | ||
editable=False, | ||
on_delete=django.db.models.deletion.CASCADE, | ||
related_name="%(class)s_creator", | ||
to=settings.AUTH_USER_MODEL, | ||
verbose_name="Creator", | ||
), | ||
), | ||
( | ||
"updated_by", | ||
models.ForeignKey( | ||
blank=True, | ||
editable=False, | ||
null=True, | ||
on_delete=django.db.models.deletion.CASCADE, | ||
related_name="%(class)s_last_mod_user", | ||
to=settings.AUTH_USER_MODEL, | ||
verbose_name="Updater", | ||
), | ||
), | ||
], | ||
options={ | ||
"abstract": False, | ||
}, | ||
), | ||
migrations.CreateModel( | ||
name="CheckArticleAvailability", | ||
fields=[ | ||
( | ||
"id", | ||
models.BigAutoField( | ||
auto_created=True, | ||
primary_key=True, | ||
serialize=False, | ||
verbose_name="ID", | ||
), | ||
), | ||
( | ||
"created", | ||
models.DateTimeField( | ||
auto_now_add=True, verbose_name="Creation date" | ||
), | ||
), | ||
( | ||
"updated", | ||
models.DateTimeField( | ||
auto_now=True, verbose_name="Last update date" | ||
), | ||
), | ||
( | ||
"article", | ||
models.ForeignKey( | ||
null=True, | ||
on_delete=django.db.models.deletion.SET_NULL, | ||
to="article.article", | ||
), | ||
), | ||
( | ||
"creator", | ||
models.ForeignKey( | ||
editable=False, | ||
on_delete=django.db.models.deletion.CASCADE, | ||
related_name="%(class)s_creator", | ||
to=settings.AUTH_USER_MODEL, | ||
verbose_name="Creator", | ||
), | ||
), | ||
( | ||
"updated_by", | ||
models.ForeignKey( | ||
blank=True, | ||
editable=False, | ||
null=True, | ||
on_delete=django.db.models.deletion.CASCADE, | ||
related_name="%(class)s_last_mod_user", | ||
to=settings.AUTH_USER_MODEL, | ||
verbose_name="Updater", | ||
), | ||
), | ||
("site_status", models.ManyToManyField(to="article.scielositestatus")), | ||
], | ||
options={ | ||
"abstract": False, | ||
}, | ||
), | ||
] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
import logging | ||
import datetime | ||
|
||
from django.contrib.auth import get_user_model | ||
from django.db import models | ||
|
@@ -314,3 +315,152 @@ def __str__(self) -> str: | |
return f"{self.article or self.pid_v3} - {self.deadline}" | ||
|
||
base_form_class = RequestArticleChangeForm | ||
|
||
|
||
class CheckArticleAvailability(CommonControlField): | ||
""" | ||
Modelo para armazenar o status de disponibilidade nos sites, | ||
tanto na nova versao, quanto na antiga, do scielo.br. | ||
""" | ||
article = models.ForeignKey( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @samuelveigarangel adicione a collection de modo que checará a disponibilidade em todas as coleções. Também deveria ser considerado a disponibilidade no site QA, inclusive saber se está disponível em ambos ou em somente um |
||
Article, | ||
on_delete=models.SET_NULL, | ||
null=True, | ||
) | ||
site_status = models.ManyToManyField( | ||
"ScieloSiteStatus" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @samuelveigarangel mude para url_status e "URLStatus |
||
) | ||
|
||
def __str__(self): | ||
return f"{self.article.pid_v3}" | ||
|
||
@classmethod | ||
def get(cls, article): | ||
return cls.objects.get(article=article) | ||
|
||
def create_or_update_scielo_site_status( | ||
self, | ||
url, | ||
status, | ||
user, | ||
date=None, | ||
): | ||
obj = ScieloSiteStatus.create_or_update( | ||
url=url, | ||
status=status, | ||
date=date, | ||
user=user, | ||
) | ||
self.site_status.add(obj) | ||
self.save() | ||
|
||
|
||
@classmethod | ||
def create( | ||
cls, | ||
article, | ||
status, | ||
url, | ||
user, | ||
date=None, | ||
): | ||
obj = cls( | ||
article=article, | ||
creator=user, | ||
) | ||
obj.save() | ||
obj.create_or_update_scielo_site_status( | ||
url=url, | ||
status=status, | ||
user=user, | ||
date=date, | ||
) | ||
return obj | ||
|
||
@classmethod | ||
def create_or_update(cls, | ||
article, | ||
status, | ||
url, | ||
user, | ||
date=None, | ||
): | ||
try: | ||
obj = cls.get(article=article) | ||
obj.create_or_update_scielo_site_status( | ||
url=url, | ||
status=status, | ||
date=date, | ||
user=user, | ||
) | ||
return obj | ||
except cls.DoesNotExist: | ||
cls.create( | ||
article=article, | ||
status=status, | ||
url=url, | ||
date=date, | ||
user=user | ||
) | ||
|
||
class ScieloSiteStatus(CommonControlField): | ||
check_date = models.DateTimeField(null=True, blank=True) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @samuelveigarangel precisa de check_date se há updated? |
||
url_site_scielo = models.SlugField(max_length=500, unique=True) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @samuelveigarangel por que SlugField no lugar de URLField? |
||
available = models.BooleanField(default=False) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @samuelveigarangel qual é a diferença entre status e available? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Status pode dar uma mensagem melhor para o usuário. Distingué entre uma URL errada ou instabilidade na URL. Talvez o nome do campo possa ser melhor. |
||
|
||
def update( | ||
self, | ||
status, | ||
date=None, | ||
): | ||
self.check_date = date or datetime.datetime.now() | ||
self.available = status | ||
self.save() | ||
return self | ||
|
||
|
||
@classmethod | ||
def get(cls, url): | ||
return cls.objects.get(url_site_scielo=url) | ||
|
||
|
||
@classmethod | ||
def create( | ||
cls, | ||
url, | ||
status, | ||
user, | ||
date=None, | ||
): | ||
date = date or datetime.datetime.now() | ||
obj = cls( | ||
check_date=date, | ||
url_site_scielo=url, | ||
available=status, | ||
creator=user | ||
) | ||
obj.save() | ||
return obj | ||
|
||
@classmethod | ||
def create_or_update( | ||
cls, | ||
url, | ||
status, | ||
user, | ||
date=None, | ||
): | ||
try: | ||
obj = cls.get(url=url) | ||
obj.update( | ||
status=status, | ||
date=date | ||
) | ||
return obj | ||
except cls.DoesNotExist: | ||
return cls.create( | ||
url=url, | ||
status=status, | ||
user=user, | ||
date=date | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
from article.tasks import initiate_article_availability_check | ||
|
||
|
||
def run(pid_v3, username=None, user_id=None): | ||
initiate_article_availability_check.apply_async( | ||
kwargs=dict( | ||
username=username, | ||
user_id=user_id, | ||
article_pid_v3=pid_v3, | ||
) | ||
) |
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
@@ -0,0 +1,89 @@ | ||||
import sys | ||||
from django.db.models import Q | ||||
|
||||
from config import celery_app | ||||
from core.utils.get_user import _get_user | ||||
from core.utils.requester import fetch_data | ||||
from article.models import CheckArticleAvailability, Article | ||||
from collection.models import Collection | ||||
from tracker.models import UnexpectedEvent | ||||
|
||||
@celery_app.task(bind=True) | ||||
def initiate_article_availability_check( | ||||
self, | ||||
username, | ||||
user_id, | ||||
issn_print=None, | ||||
issn_electronic=None, | ||||
publication_year=None, | ||||
updated=None, | ||||
article_pid_v3=None, | ||||
collection_acron=None, | ||||
): | ||||
if collection_acron: | ||||
collection = Collection.objects.filter(acron=collection_acron) | ||||
else: | ||||
collection = Collection.objects.all() | ||||
|
||||
query = Q(journal__journalproc__collection__in=collection) | ||||
if not updated: | ||||
if article_pid_v3: | ||||
query |= Q(pid_v3=article_pid_v3) | ||||
if issn_print: | ||||
query |= Q(journal__official_journal__issn_print=issn_print) | ||||
if issn_electronic: | ||||
query |= Q(journal__official_journal__issn_electronic=issn_electronic) | ||||
if publication_year: | ||||
query |= Q(issue__publication_year=publication_year) | ||||
|
||||
articles = Article.objects.filter(query) | ||||
|
||||
for article in articles.iterator(): | ||||
for article_per_lang in article.doi_with_lang.lang: | ||||
process_article_availability.apply_async( | ||||
kwargs=dict( | ||||
user_id=user_id, | ||||
username=username, | ||||
pid_v3=article.pid_v3, | ||||
journal_acron=article.journal.journal_acron, | ||||
lang=article_per_lang, | ||||
) | ||||
) | ||||
|
||||
|
||||
@celery_app.task(bind=True) | ||||
def process_article_availability(self, user_id, username, pid_v3, journal_acron, lang): | ||||
urls = [ | ||||
f"https://www.scielo.br/scielo.php?script=sci_arttext&pid={pid_v3}&lng={lang}&nrm=iso", | ||||
f"https://www.scielo.br/j/{journal_acron}/a/{pid_v3}/?lang={lang}" | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @samuelveigarangel a parte www.scielo.br deve ser obtida da class Collection, no entanto, acho que ela está bem diferente da que está no Core. Será necessário atualizá-la There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @samuelveigarangel adicionar também os formatos pdf There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @samuelveigarangel acho que seria interessante colocar como melhoria o tempo de resposta There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @robertatakenaka Verdade. Acho que posso conseguir esse dado em: scms-upload/collection/models.py Line 65 in 91234dd
|
||||
] | ||||
try: | ||||
user = _get_user(self.request, user_id=user_id, username=username) | ||||
article = Article.objects.get(pid_v3=pid_v3) | ||||
|
||||
for url in urls: | ||||
try: | ||||
response = fetch_data(url, timeout=2, verify=True) | ||||
CheckArticleAvailability.create_or_update( | ||||
article=article, | ||||
status=True, | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @samuelveigarangel trocar True / False por HTTP ERROR CODE, ou pode ser um outro atributo |
||||
url=url, | ||||
user=user, | ||||
) | ||||
except Exception as e: | ||||
CheckArticleAvailability.create_or_update( | ||||
article=article, | ||||
status=False, | ||||
url=url, | ||||
user=user, | ||||
) | ||||
except Exception as e: | ||||
exc_type, exc_value, exc_traceback = sys.exc_info() | ||||
UnexpectedEvent.create( | ||||
e=e, | ||||
exc_traceback=exc_traceback, | ||||
detail={ | ||||
"function": "article.tasks.process_article_availability", | ||||
"urls": urls | ||||
}, | ||||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@samuelveigarangel Use
ArticleAvailability
no lugar deCheckArticleAvailability
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@samuelveigarangel mova este modelo para dentro de publication, pois dentro de publication podemos importar quaisquer outros modelos como JournalProc, IssueProc e outros minimizando problemas de dependências circulares