Skip to content
This repository has been archived by the owner on Sep 21, 2020. It is now read-only.

Commit

Permalink
add log to scrape table
Browse files Browse the repository at this point in the history
  • Loading branch information
drkane committed May 11, 2020
1 parent 1d3b0f2 commit 22facb2
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 0 deletions.
28 changes: 28 additions & 0 deletions alembic/versions/5c0f09a5b2ef_add_log_to_scraper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""Add log to scraper
Revision ID: 5c0f09a5b2ef
Revises: 66e2f2bd33e6
Create Date: 2020-05-11 15:00:10.910038
"""
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = '5c0f09a5b2ef'
down_revision = '66e2f2bd33e6'
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('scrape', sa.Column('log', sa.Text(), nullable=True))
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('scrape', 'log')
# ### end Alembic commands ###
1 change: 1 addition & 0 deletions findthatcharity_import/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,4 +122,5 @@
Column("errors", Integer),
Column("start_time", DateTime),
Column("finish_time", DateTime),
Column("log", Text),
)
10 changes: 10 additions & 0 deletions findthatcharity_import/pipelines/sqlsave_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import logging
import uuid
import datetime
from io import StringIO

from sqlalchemy import create_engine, and_
from sqlalchemy.exc import InternalError, IntegrityError
Expand All @@ -23,6 +24,14 @@ def __init__(self, db_uri, chunk_size, stats):
self.stats = stats
self.spider_name = None
self.crawl_id = uuid.uuid4().hex

self.log_stream = StringIO()
self.log_handler = logging.StreamHandler(self.log_stream)
formatter = logging.Formatter('%(asctime)s [%(name)s] %(levelname)s: %(message)s')
self.log_handler.setLevel(logging.INFO)
self.log_handler.setFormatter(formatter)
self.log = logging.getLogger()
self.log.addHandler(self.log_handler)

# logging.basicConfig()
# logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)
Expand Down Expand Up @@ -158,5 +167,6 @@ def save_stats(self):
"items": stats.get('item_scraped_count', 0),
"start_time": stats.get('start_time', datetime.datetime.utcnow()),
"finish_time": stats.get('finish_time'),
"log": self.log_stream.getvalue(),
}
self.records['scrape'].append(to_save)

0 comments on commit 22facb2

Please sign in to comment.