From 9e6c0c9e5c9f30244934f379f14a2d42eb51f815 Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Fri, 11 Oct 2024 10:44:31 -0300 Subject: [PATCH 1/5] Faz melhorias em _items_to_load_article --- article/tasks.py | 37 +++++++------------------------------ 1 file changed, 7 insertions(+), 30 deletions(-) diff --git a/article/tasks.py b/article/tasks.py index 1fc6eaa0..3be6d1df 100644 --- a/article/tasks.py +++ b/article/tasks.py @@ -42,44 +42,21 @@ def load_article(self, user_id=None, username=None, file_path=None, v3=None): xmlsps.load_article(user, file_path=file_path, v3=v3) -def _items_to_load_article(from_date, force_update): +def _items_to_load_article(from_date): if from_date: try: from_date = datetime.strptime(from_date, "%Y-%m-%d") - except Exception: + except ValueError: from_date = None if not from_date: - # obtém a última atualização de Article - try: - article = Article.objects.filter( - ~Q(valid=True) - ).order_by("-updated").first() - if not article: - article = Article.objects.filter(valid=True).order_by("-updated").first() - if article: - from_date = article.updated - except Article.DoesNotExist: - from_date = datetime(1900, 1, 1) - - if not from_date: - from_date = datetime(1900, 1, 1) + # Obtém a data do último artigo válido + last_valid_article = Article.objects.all().order_by("-updated").first() + from_date = last_valid_article.updated items = PidProviderXML.public_items(from_date) - if force_update: - yield from items for item in items: - try: - article = Article.objects.get( - ~Q(valid=True), - pid_v3=item.v3, - updated__lt=item.updated, - created__lt=item.created, - ) - if article: - yield item - except Article.DoesNotExist: - yield item + yield item @celery_app.task(bind=True, name=_("load_articles")) @@ -89,7 +66,7 @@ def load_articles( try: user = _get_user(self.request, username, user_id) - for item in _items_to_load_article(from_date, force_update): + for item in _items_to_load_article(from_date): try: load_article.apply_async( kwargs={ From 8f49c8ec9f16810773d86cb1f4ebcd656124e03a Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Fri, 11 Oct 2024 11:44:53 -0300 Subject: [PATCH 2/5] =?UTF-8?q?Cria=20op=C3=A7=C3=A3o=20para=20reprocessar?= =?UTF-8?q?=20os=20artigos=20com=20valid=3DFalse?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- article/tasks.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/article/tasks.py b/article/tasks.py index 3be6d1df..3f076e7c 100644 --- a/article/tasks.py +++ b/article/tasks.py @@ -5,6 +5,7 @@ from django.db.models import Q, Count from django.contrib.auth import get_user_model from django.utils.translation import gettext as _ +from django.db.models import Subquery from article.models import Article, ArticleFormat from article.sources import xmlsps @@ -59,14 +60,25 @@ def _items_to_load_article(from_date): yield item +def items_to_load_article_with_valid_false(): + # Obtém os objetos PidProviderXMl onde o campo pid_v3 de article e v3 possuem o mesmo valor + articles = Article.objects.filter(valid=False).values("pid_v3") + items = PidProviderXML.objects.filter(v3__in=Subquery(articles)) + for item in items: + yield item + + @celery_app.task(bind=True, name=_("load_articles")) def load_articles( - self, user_id=None, username=None, from_date=None, force_update=False + self, user_id=None, username=None, from_date=None, load_invalid_articles=True, force_update=False ): try: user = _get_user(self.request, username, user_id) - - for item in _items_to_load_article(from_date): + if load_invalid_articles: + generator_articles = _items_to_load_article(from_date) + else: + generator_articles = items_to_load_article_with_valid_false() + for item in generator_articles: try: load_article.apply_async( kwargs={ @@ -247,9 +259,9 @@ def remove_duplicate_articles(pid_v3=None): ids_to_exclude = [] try: if pid_v3: - duplicates = Article.objects.filter(pid_v3=pid_v3).values("pid_v3").annotate(pid_v3_count=Count("pid_v3")).filter(pid_v3_count__gt=1) + duplicates = Article.objects.filter(pid_v3=pid_v3).values("pid_v3").annotate(pid_v3_count=Count("pid_v3")).filter(pid_v3_count__gt=1, valid=False) else: - duplicates = Article.objects.values("pid_v3").annotate(pid_v3_count=Count("pid_v3")).filter(pid_v3_count__gt=1) + duplicates = Article.objects.values("pid_v3").annotate(pid_v3_count=Count("pid_v3")).filter(pid_v3_count__gt=1, valid=False) for duplicate in duplicates: article_ids = Article.objects.filter( pid_v3=duplicate["pid_v3"] From 3e8612d6f749145bfd318d52b950b0d11a587821 Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Fri, 11 Oct 2024 11:45:01 -0300 Subject: [PATCH 3/5] Remove comentarios --- article/models.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/article/models.py b/article/models.py index 69adbceb..a9ca8785 100755 --- a/article/models.py +++ b/article/models.py @@ -267,21 +267,6 @@ def get_or_create( except cls.DoesNotExist: return cls.create(pid_v3=pid_v3, user=user) - # @classmethod - # def get_or_create(cls, doi, pid_v2, fundings, user): - # try: - # return cls.objects.get(doi__in=doi, pid_v2=pid_v2) - # except cls.DoesNotExist: - # article = cls() - # article.pid_v2 = pid_v2 - # article.creator = user - # article.save() - # article.doi.set(doi) - # if fundings: - # for funding in fundings: - # article.fundings.add(funding) - # return article - def set_date_pub(self, dates): if dates: self.pub_date_day = dates.get("day") From a62a120b617a6f1ca9adc751466919a129f993ce Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Thu, 17 Oct 2024 14:10:29 -0300 Subject: [PATCH 4/5] =?UTF-8?q?Insere=20uma=20exce=C3=A7=C3=A3o=20generica?= =?UTF-8?q?=20e=20fix=20condi=C3=A7=C3=A3o=20if?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- article/tasks.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/article/tasks.py b/article/tasks.py index 3f076e7c..4b7ea6c1 100644 --- a/article/tasks.py +++ b/article/tasks.py @@ -47,12 +47,15 @@ def _items_to_load_article(from_date): if from_date: try: from_date = datetime.strptime(from_date, "%Y-%m-%d") - except ValueError: + except Exception: from_date = None if not from_date: # Obtém a data do último artigo válido last_valid_article = Article.objects.all().order_by("-updated").first() - from_date = last_valid_article.updated + if last_valid_article: + from_date = last_valid_article.updated + else: + from_date = datetime(1900, 1, 1) items = PidProviderXML.public_items(from_date) @@ -70,14 +73,15 @@ def items_to_load_article_with_valid_false(): @celery_app.task(bind=True, name=_("load_articles")) def load_articles( - self, user_id=None, username=None, from_date=None, load_invalid_articles=True, force_update=False + self, user_id=None, username=None, from_date=None, load_invalid_articles=False, force_update=False ): try: user = _get_user(self.request, username, user_id) if load_invalid_articles: - generator_articles = _items_to_load_article(from_date) - else: generator_articles = items_to_load_article_with_valid_false() + else: + generator_articles = _items_to_load_article(from_date) + for item in generator_articles: try: load_article.apply_async( From 559028e7b4b8ef85405307dbf33a0222cfed83e3 Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Thu, 5 Dec 2024 15:04:54 -0300 Subject: [PATCH 5/5] Altera logica para obter a data do ultimo artigo valido --- article/tasks.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/article/tasks.py b/article/tasks.py index 4b7ea6c1..73f31316 100644 --- a/article/tasks.py +++ b/article/tasks.py @@ -51,9 +51,10 @@ def _items_to_load_article(from_date): from_date = None if not from_date: # Obtém a data do último artigo válido - last_valid_article = Article.objects.all().order_by("-updated").first() - if last_valid_article: - from_date = last_valid_article.updated + last_created_article = Article.objects.all().order_by("-created").first() + if last_created_article: + pid_v3 = last_created_article.pid_v3 + from_date = PidProviderXML.objects.filter(v3=pid_v3).order_by("created").first().created else: from_date = datetime(1900, 1, 1)