Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix: Corrige forma dos artigos serem selecionados por data. #876

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 0 additions & 15 deletions article/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,21 +267,6 @@ def get_or_create(
except cls.DoesNotExist:
return cls.create(pid_v3=pid_v3, user=user)

# @classmethod
# def get_or_create(cls, doi, pid_v2, fundings, user):
# try:
# return cls.objects.get(doi__in=doi, pid_v2=pid_v2)
# except cls.DoesNotExist:
# article = cls()
# article.pid_v2 = pid_v2
# article.creator = user
# article.save()
# article.doi.set(doi)
# if fundings:
# for funding in fundings:
# article.fundings.add(funding)
# return article

def set_date_pub(self, dates):
if dates:
self.pub_date_day = dates.get("day")
Expand Down
57 changes: 25 additions & 32 deletions article/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from django.db.models import Q, Count
from django.contrib.auth import get_user_model
from django.utils.translation import gettext as _
from django.db.models import Subquery

from article.models import Article, ArticleFormat
from article.sources import xmlsps
Expand Down Expand Up @@ -42,54 +43,46 @@ def load_article(self, user_id=None, username=None, file_path=None, v3=None):
xmlsps.load_article(user, file_path=file_path, v3=v3)


def _items_to_load_article(from_date, force_update):
def _items_to_load_article(from_date):
if from_date:
try:
from_date = datetime.strptime(from_date, "%Y-%m-%d")
except Exception:
from_date = None
if not from_date:
# obtém a última atualização de Article
try:
article = Article.objects.filter(
~Q(valid=True)
).order_by("-updated").first()
if not article:
article = Article.objects.filter(valid=True).order_by("-updated").first()
if article:
from_date = article.updated
except Article.DoesNotExist:
# Obtém a data do último artigo válido
last_valid_article = Article.objects.all().order_by("-updated").first()
if last_valid_article:
from_date = last_valid_article.updated
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@samuelveigarangel pensando melhor: a data do último artigo criado ou atualizado não vai garantir a seleção adequada dos registros de PidProvider. Como o procedimento de carga de artigos não está sincronizado com o recebimento de XML, a seleção dos registros de PidProvider pode não ser completa.
Para resolver este problema, faça o seguinte:

No lugar de

        last_valid_article = Article.objects.all().order_by("-updated").first()
        if last_valid_article:
            from_date = last_valid_article.updated 

use:

        last_created_article = Article.objects.all().order_by("-created").first()
        if last_created_article:
            pid_v3 = last_created_article.pid_v3
            from_date = PidProviderXML.objects.filter(v3=pid_v3).order_by("created").first().created

else:
from_date = datetime(1900, 1, 1)

if not from_date:
from_date = datetime(1900, 1, 1)

items = PidProviderXML.public_items(from_date)
if force_update:
yield from items

for item in items:
try:
article = Article.objects.get(
~Q(valid=True),
pid_v3=item.v3,
updated__lt=item.updated,
created__lt=item.created,
)
if article:
yield item
except Article.DoesNotExist:
yield item
yield item


def items_to_load_article_with_valid_false():
# Obtém os objetos PidProviderXMl onde o campo pid_v3 de article e v3 possuem o mesmo valor
articles = Article.objects.filter(valid=False).values("pid_v3")
items = PidProviderXML.objects.filter(v3__in=Subquery(articles))
for item in items:
yield item


@celery_app.task(bind=True, name=_("load_articles"))
def load_articles(
self, user_id=None, username=None, from_date=None, force_update=False
self, user_id=None, username=None, from_date=None, load_invalid_articles=False, force_update=False
):
try:
user = _get_user(self.request, username, user_id)

for item in _items_to_load_article(from_date, force_update):
if load_invalid_articles:
generator_articles = items_to_load_article_with_valid_false()
else:
generator_articles = _items_to_load_article(from_date)

for item in generator_articles:
try:
load_article.apply_async(
kwargs={
Expand Down Expand Up @@ -270,9 +263,9 @@ def remove_duplicate_articles(pid_v3=None):
ids_to_exclude = []
try:
if pid_v3:
duplicates = Article.objects.filter(pid_v3=pid_v3).values("pid_v3").annotate(pid_v3_count=Count("pid_v3")).filter(pid_v3_count__gt=1)
duplicates = Article.objects.filter(pid_v3=pid_v3).values("pid_v3").annotate(pid_v3_count=Count("pid_v3")).filter(pid_v3_count__gt=1, valid=False)
else:
duplicates = Article.objects.values("pid_v3").annotate(pid_v3_count=Count("pid_v3")).filter(pid_v3_count__gt=1)
duplicates = Article.objects.values("pid_v3").annotate(pid_v3_count=Count("pid_v3")).filter(pid_v3_count__gt=1, valid=False)
for duplicate in duplicates:
article_ids = Article.objects.filter(
pid_v3=duplicate["pid_v3"]
Expand Down
Loading