diff --git a/myworld1/breakpoint_demo.py b/myworld1/breakpoint_demo.py new file mode 100644 index 0000000..6153b5d --- /dev/null +++ b/myworld1/breakpoint_demo.py @@ -0,0 +1,16 @@ +breakpoint() + +print("This is line number 1") + +a = 1 +b = 2 + +sum1 = a + b + +print(f"The sum is {sum}") + +for i in range(5): + sum1 += 1 + +print(f"The sum at the end is {sum1}") + diff --git a/myworld1/db.sqlite3 b/myworld1/db.sqlite3 new file mode 100644 index 0000000..48761c3 Binary files /dev/null and b/myworld1/db.sqlite3 differ diff --git a/myworld1/docker-compose.yml b/myworld1/docker-compose.yml new file mode 100644 index 0000000..e31deb6 --- /dev/null +++ b/myworld1/docker-compose.yml @@ -0,0 +1,63 @@ + version: "3" + services: + web_service: + build: + context: ./ + dockerfile: ./dockerfiles/Dockerfile + image: workshop1_web + container_name: workshop_web_container + stdin_open: true # docker attach container_id + tty: true + environment: + - RABBITMQ_DEFAULT_USER=myuser + - RABBITMQ_DEFAULT_PASS=mypassword + - BROKER_HOST=service-rabbitmq + - RABBITMQ_DEFAULT_VHOST=extractor + - BROKER_PORT=5672 + ports: + - "8000:8000" + volumes: + - .:/root/workspace/site + psql-db: + image: 'postgres:14' + container_name: psql-db + environment: + - PGPASSWORD=123456 + - POSTGRES_USER=postgres + - POSTGRES_PASSWORD=123456 + ports: + - '5432:5432' + service-rabbitmq: + container_name: "service_rabbitmq" + image: rabbitmq:3.8-management-alpine + environment: + - RABBITMQ_DEFAULT_USER=myuser + - RABBITMQ_DEFAULT_PASS=mypassword + - BROKER_HOST=service-rabbitmq + - RABBITMQ_DEFAULT_VHOST=extractor + - BROKER_PORT=5672 + ports: + - '5672:5672' + - '15676:15672' + worker: + build: + context: ./ + dockerfile: ./dockerfiles/Dockerfile + image: workshop1_web + container_name: worker + stdin_open: true # docker attach container_id + tty: true + environment: + - RABBITMQ_DEFAULT_USER=myuser + - RABBITMQ_DEFAULT_PASS=mypassword + - BROKER_HOST=service-rabbitmq + - RABBITMQ_DEFAULT_VHOST=extractor + - BROKER_PORT=5672 + ports: + - "4356:8000" + volumes: + - .:/root/workspace/site + volumes: + db: + driver: local + diff --git a/myworld1/dockerfiles/Dockerfile b/myworld1/dockerfiles/Dockerfile new file mode 100644 index 0000000..3958919 --- /dev/null +++ b/myworld1/dockerfiles/Dockerfile @@ -0,0 +1,14 @@ +FROM python:3.10.2-alpine3.15 +# Install required packages +# For psycopg2 +RUN apk update && \ +apk --no-cache add --virtual build-deps-alpine build-base && \ +apk --no-cache add --virtual postgresql-deps libpq-dev +# Install requirements +RUN pip install --upgrade pip +RUN pip install Django psycopg2==2.9.3 bs4 html5lib requests python-dateutil celery curl +# Create directories +RUN mkdir -p /root/workspace/src +COPY ./ /root/workspace/site +# Switch to project directory +WORKDIR /root/workspace/site diff --git a/myworld1/manage.py b/myworld1/manage.py new file mode 100755 index 0000000..43002dd --- /dev/null +++ b/myworld1/manage.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python +"""Django's command-line utility for administrative tasks.""" +import os +import sys + + +def main(): + """Run administrative tasks.""" + os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'myworld1.settings') + try: + from django.core.management import execute_from_command_line + except ImportError as exc: + raise ImportError( + "Couldn't import Django. Are you sure it's installed and " + "available on your PYTHONPATH environment variable? Did you " + "forget to activate a virtual environment?" + ) from exc + execute_from_command_line(sys.argv) + + +if __name__ == '__main__': + main() diff --git a/myworld1/members/__init__.py b/myworld1/members/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/myworld1/members/__pycache__/__init__.cpython-310.pyc b/myworld1/members/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..5ce1726 Binary files /dev/null and b/myworld1/members/__pycache__/__init__.cpython-310.pyc differ diff --git a/myworld1/members/__pycache__/admin.cpython-310.pyc b/myworld1/members/__pycache__/admin.cpython-310.pyc new file mode 100644 index 0000000..ad90994 Binary files /dev/null and b/myworld1/members/__pycache__/admin.cpython-310.pyc differ diff --git a/myworld1/members/__pycache__/apps.cpython-310.pyc b/myworld1/members/__pycache__/apps.cpython-310.pyc new file mode 100644 index 0000000..b6ab213 Binary files /dev/null and b/myworld1/members/__pycache__/apps.cpython-310.pyc differ diff --git a/myworld1/members/__pycache__/models.cpython-310.pyc b/myworld1/members/__pycache__/models.cpython-310.pyc new file mode 100644 index 0000000..3962a92 Binary files /dev/null and b/myworld1/members/__pycache__/models.cpython-310.pyc differ diff --git a/myworld1/members/__pycache__/tasks.cpython-310.pyc b/myworld1/members/__pycache__/tasks.cpython-310.pyc new file mode 100644 index 0000000..a03dae0 Binary files /dev/null and b/myworld1/members/__pycache__/tasks.cpython-310.pyc differ diff --git a/myworld1/members/__pycache__/urls.cpython-310.pyc b/myworld1/members/__pycache__/urls.cpython-310.pyc new file mode 100644 index 0000000..76a22ae Binary files /dev/null and b/myworld1/members/__pycache__/urls.cpython-310.pyc differ diff --git a/myworld1/members/__pycache__/views.cpython-310.pyc b/myworld1/members/__pycache__/views.cpython-310.pyc new file mode 100644 index 0000000..48ed3e7 Binary files /dev/null and b/myworld1/members/__pycache__/views.cpython-310.pyc differ diff --git a/myworld1/members/admin.py b/myworld1/members/admin.py new file mode 100644 index 0000000..40d5be5 --- /dev/null +++ b/myworld1/members/admin.py @@ -0,0 +1,60 @@ +from django.contrib import admin +from .models import Students, Blog, Job, JobLogs, JobStats +from django.urls import reverse +from django.utils.html import format_html + +class DjStudentAdmin(admin.ModelAdmin): + list_display = ("first_name", "last_name", "address", "roll_number", "mobile", "branch") + list_filter = ("branch",) + + +class DjBlogAdmin(admin.ModelAdmin): + list_display = ("title", "release_date", "blog_time", "created_date","content","author", "recommended","path") + list_filter = ("author",) + + + +class DjJob(admin.ModelAdmin): + + def view_stats(self, obj): + path = "../jobstats/?q={}".format(obj.pk) + return format_html(f'''stats''') + + view_stats.short_description = 'Stats' + view_stats.allow_tags = True + + list_display = ("job_name", "start_date", "end_date", "no_of_blogs", "start_no", "created_date", "view_stats") + list_filter = ("job_name", "start_date") + readonly_fields = ("created_date",) + + def run(self, obj): + return format_html('RUN', reverse('scraping', args=(str(obj.pk)))) + + run.short_description = 'Run' + run.allow_tags = True + list_display = ("job_name", "start_date", "end_date", "no_of_blogs", "start_no", "created_date", "run", "view_stats") + +class DjJobStats(admin.ModelAdmin): + def view_logs(self, obj): + path = "../joblogs/?q={}".format(obj.pk) + return format_html(f'''Logs''') + + view_logs.short_description = 'Stats' + view_logs.allow_tags = True + list_display = ("job", "status", "view_logs", "total_blogs", "no_of_blogs_extracted", "start_date", "end_date") + search_fields = ('job__pk',) + +class DjJobLogs(admin.ModelAdmin): + list_display = ("date", "log", "function_name") + search_fields = ('job_stats__pk',) + + + + +# Register your models here. +admin.site.register(Students, DjStudentAdmin) +admin.site.register(Blog, DjBlogAdmin) + +admin.site.register(Job, DjJob) +admin.site.register(JobStats, DjJobStats) +admin.site.register(JobLogs, DjJobLogs) diff --git a/myworld1/members/apps.py b/myworld1/members/apps.py new file mode 100644 index 0000000..0ec23dd --- /dev/null +++ b/myworld1/members/apps.py @@ -0,0 +1,108 @@ +from django.apps import AppConfig + + +from django.apps import AppConfig + +import psycopg2 +import requests +import re +from bs4 import BeautifulSoup, element +from django.apps import AppConfig +import psycopg2 +import requests +import re +from bs4 import BeautifulSoup, element +import datetime +from dateutil.parser import parse + + +db_name = 'member_db' +db_user = 'postgres' +db_pass = '123456' +db_host = 'psql-db' +db_port = '5432' + +conn = psycopg2.connect(dbname=db_name, user=db_user, password=db_pass, host=db_host, port=db_port) + +def add_row_to_blog(title, author, date, time): + sql = """INSERT INTO members_blog (title, release_date, blog_time, author, created_date) VALUES (%s, %s::DATE, %s::TIME, %s, NOW())""" + + with conn: + with conn.cursor() as curs: + time=time.replace('\u202f',"") + curs.execute(sql, (title, date, time, author)) + +def truncate_table(): + print("Truncating contents all the tables") + with conn: + with conn.cursor() as curs: + curs.execute("TRUNCATE members_blog CASCADE;") + + +def start_extraction(start_date=None, end_date=None, no_of_articles=None, start_id = None): + print("Extraction started") + url = "https://blog.python.org/" + + data = requests.get(url) + page_soup = BeautifulSoup(data.text, 'html.parser') + + if start_date: + start_date = parse(start_date) + if end_date: + end_date = parse(end_date) + + blogs = page_soup.select('div.date-outer') + truncate_table() + article_count = 0 + counter = 1 + for blog in blogs: + article_count += 1 + if start_id and article_count < int(start_id): + continue + if no_of_articles and counter > int(no_of_articles): + continue + date = blog.select('.date-header span')[0].get_text() + + converted_date = parse(date) + + if start_date and converted_date < start_date: + continue + if end_date and converted_date > end_date: + continue + + post = blog.select('.post')[0] + + title = "" + title_bar = post.select('.post-title') + if len(title_bar) > 0: + title = title_bar[0].text + else: + title = post.select('.post-body')[0].contents[0].text + + # getting the author and blog time + post_footer = post.select('.post-footer')[0] + + author = post_footer.select('.post-author span')[0].text + + time = post_footer.select('abbr')[0].text + + add_row_to_blog(title, author, date, time) + + print("\nTitle:", title.strip('\n')) + print("Date:", date, ) + print("Time:", time) + print("Author:", author) + + # print("Number of blogs read:", count) + print( + "\n---------------------------------------------------------------------------------------------------------------\n") + counter += 1 + + +if __name__ == "__main__": + start_extraction() + + +class MembersConfig(AppConfig): + default_auto_field = 'django.db.models.BigAutoField' + name = 'members' diff --git a/myworld1/members/migrations/0001_initial.py b/myworld1/members/migrations/0001_initial.py new file mode 100644 index 0000000..c02674d --- /dev/null +++ b/myworld1/members/migrations/0001_initial.py @@ -0,0 +1,22 @@ +# Generated by Django 4.2 on 2023-04-25 05:56 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ] + + operations = [ + migrations.CreateModel( + name='Members', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('firstname', models.CharField(max_length=255)), + ('lastname', models.CharField(max_length=255)), + ], + ), + ] diff --git a/myworld1/members/migrations/0002_students_delete_members.py b/myworld1/members/migrations/0002_students_delete_members.py new file mode 100644 index 0000000..35b345f --- /dev/null +++ b/myworld1/members/migrations/0002_students_delete_members.py @@ -0,0 +1,28 @@ +# Generated by Django 4.2 on 2023-04-25 08:51 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('members', '0001_initial'), + ] + + operations = [ + migrations.CreateModel( + name='Students', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('first_name', models.CharField(max_length=200)), + ('last_name', models.CharField(max_length=200)), + ('address', models.CharField(max_length=200)), + ('roll_number', models.IntegerField()), + ('mobile', models.CharField(max_length=10)), + ('branch', models.CharField(choices=[('BA', 'BA'), ('B.COM', 'B.COM'), ('MBA', 'MBA'), ('CA', 'CA')], max_length=10)), + ], + ), + migrations.DeleteModel( + name='Members', + ), + ] diff --git a/myworld1/members/migrations/0003_blog.py b/myworld1/members/migrations/0003_blog.py new file mode 100644 index 0000000..4c4cd65 --- /dev/null +++ b/myworld1/members/migrations/0003_blog.py @@ -0,0 +1,24 @@ +# Generated by Django 4.2.1 on 2023-05-23 04:25 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('members', '0002_students_delete_members'), + ] + + operations = [ + migrations.CreateModel( + name='Blog', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('title', models.CharField(max_length=500)), + ('release_date', models.DateTimeField(verbose_name='Realse Date')), + ('blog_time', models.CharField(max_length=50)), + ('author', models.CharField(max_length=200)), + ('created_date', models.DateTimeField(auto_now_add=True, null=True, verbose_name='Created Date')), + ], + ), + ] diff --git a/myworld1/members/migrations/0004_blog_content.py b/myworld1/members/migrations/0004_blog_content.py new file mode 100644 index 0000000..8214234 --- /dev/null +++ b/myworld1/members/migrations/0004_blog_content.py @@ -0,0 +1,19 @@ +# Generated by Django 4.2.1 on 2023-05-23 09:53 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('members', '0003_blog'), + ] + + operations = [ + migrations.AddField( + model_name='blog', + name='content', + field=models.CharField(default=None, max_length=20000), + preserve_default=False, + ), + ] diff --git a/myworld1/members/migrations/0005_blog_path.py b/myworld1/members/migrations/0005_blog_path.py new file mode 100644 index 0000000..08327be --- /dev/null +++ b/myworld1/members/migrations/0005_blog_path.py @@ -0,0 +1,19 @@ +# Generated by Django 4.2.1 on 2023-05-28 12:30 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('members', '0004_blog_content'), + ] + + operations = [ + migrations.AddField( + model_name='blog', + name='path', + field=models.CharField(default=None, max_length=500), + preserve_default=False, + ), + ] diff --git a/myworld1/members/migrations/0006_alter_blog_path.py b/myworld1/members/migrations/0006_alter_blog_path.py new file mode 100644 index 0000000..46547f1 --- /dev/null +++ b/myworld1/members/migrations/0006_alter_blog_path.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.1 on 2023-05-28 12:39 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('members', '0005_blog_path'), + ] + + operations = [ + migrations.AlterField( + model_name='blog', + name='path', + field=models.CharField(max_length=500, null=True), + ), + ] diff --git a/myworld1/members/migrations/0007_alter_blog_author_alter_blog_blog_time_and_more.py b/myworld1/members/migrations/0007_alter_blog_author_alter_blog_blog_time_and_more.py new file mode 100644 index 0000000..b7cd122 --- /dev/null +++ b/myworld1/members/migrations/0007_alter_blog_author_alter_blog_blog_time_and_more.py @@ -0,0 +1,38 @@ +# Generated by Django 4.2.1 on 2023-05-28 13:57 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('members', '0006_alter_blog_path'), + ] + + operations = [ + migrations.AlterField( + model_name='blog', + name='author', + field=models.CharField(max_length=200, null=True), + ), + migrations.AlterField( + model_name='blog', + name='blog_time', + field=models.CharField(max_length=50, null=True), + ), + migrations.AlterField( + model_name='blog', + name='content', + field=models.CharField(max_length=20000, null=True), + ), + migrations.AlterField( + model_name='blog', + name='release_date', + field=models.DateTimeField(null=True, verbose_name='Realse Date'), + ), + migrations.AlterField( + model_name='blog', + name='title', + field=models.CharField(max_length=500, null=True), + ), + ] diff --git a/myworld1/members/migrations/0008_alter_blog_author_alter_blog_blog_time_and_more.py b/myworld1/members/migrations/0008_alter_blog_author_alter_blog_blog_time_and_more.py new file mode 100644 index 0000000..36ee48e --- /dev/null +++ b/myworld1/members/migrations/0008_alter_blog_author_alter_blog_blog_time_and_more.py @@ -0,0 +1,48 @@ +# Generated by Django 4.2.1 on 2023-05-28 14:02 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('members', '0007_alter_blog_author_alter_blog_blog_time_and_more'), + ] + + operations = [ + migrations.AlterField( + model_name='blog', + name='author', + field=models.CharField(max_length=200), + ), + migrations.AlterField( + model_name='blog', + name='blog_time', + field=models.CharField(default=None, max_length=50), + preserve_default=False, + ), + migrations.AlterField( + model_name='blog', + name='content', + field=models.CharField(default=None, max_length=20000), + preserve_default=False, + ), + migrations.AlterField( + model_name='blog', + name='path', + field=models.CharField(default=None, max_length=500), + preserve_default=False, + ), + migrations.AlterField( + model_name='blog', + name='release_date', + field=models.DateTimeField(default=None, verbose_name='Realse Date'), + preserve_default=False, + ), + migrations.AlterField( + model_name='blog', + name='title', + field=models.CharField(default=None, max_length=500), + preserve_default=False, + ), + ] diff --git a/myworld1/members/migrations/0009_alter_blog_path.py b/myworld1/members/migrations/0009_alter_blog_path.py new file mode 100644 index 0000000..eb26a5f --- /dev/null +++ b/myworld1/members/migrations/0009_alter_blog_path.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.1 on 2023-05-28 14:11 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('members', '0008_alter_blog_author_alter_blog_blog_time_and_more'), + ] + + operations = [ + migrations.AlterField( + model_name='blog', + name='path', + field=models.CharField(max_length=500, null=True), + ), + ] diff --git a/myworld1/members/migrations/0010_alter_blog_author_alter_blog_blog_time_and_more.py b/myworld1/members/migrations/0010_alter_blog_author_alter_blog_blog_time_and_more.py new file mode 100644 index 0000000..11bc263 --- /dev/null +++ b/myworld1/members/migrations/0010_alter_blog_author_alter_blog_blog_time_and_more.py @@ -0,0 +1,38 @@ +# Generated by Django 4.2.1 on 2023-05-28 14:23 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('members', '0009_alter_blog_path'), + ] + + operations = [ + migrations.AlterField( + model_name='blog', + name='author', + field=models.CharField(max_length=200, null=True), + ), + migrations.AlterField( + model_name='blog', + name='blog_time', + field=models.CharField(max_length=50, null=True), + ), + migrations.AlterField( + model_name='blog', + name='content', + field=models.CharField(max_length=20000, null=True), + ), + migrations.AlterField( + model_name='blog', + name='release_date', + field=models.DateTimeField(null=True, verbose_name='Realse Date'), + ), + migrations.AlterField( + model_name='blog', + name='title', + field=models.CharField(max_length=500, null=True), + ), + ] diff --git a/myworld1/members/migrations/0011_alter_blog_author_alter_blog_blog_time_and_more.py b/myworld1/members/migrations/0011_alter_blog_author_alter_blog_blog_time_and_more.py new file mode 100644 index 0000000..b979ff7 --- /dev/null +++ b/myworld1/members/migrations/0011_alter_blog_author_alter_blog_blog_time_and_more.py @@ -0,0 +1,49 @@ +# Generated by Django 4.2.1 on 2023-05-29 13:34 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('members', '0010_alter_blog_author_alter_blog_blog_time_and_more'), + ] + + operations = [ + migrations.AlterField( + model_name='blog', + name='author', + field=models.CharField(default=None, max_length=200), + preserve_default=False, + ), + migrations.AlterField( + model_name='blog', + name='blog_time', + field=models.CharField(default=None, max_length=50), + preserve_default=False, + ), + migrations.AlterField( + model_name='blog', + name='content', + field=models.CharField(default=None, max_length=20000), + preserve_default=False, + ), + migrations.AlterField( + model_name='blog', + name='path', + field=models.CharField(default=None, max_length=500), + preserve_default=False, + ), + migrations.AlterField( + model_name='blog', + name='release_date', + field=models.DateTimeField(default=None, verbose_name='Realse Date'), + preserve_default=False, + ), + migrations.AlterField( + model_name='blog', + name='title', + field=models.CharField(default=None, max_length=500), + preserve_default=False, + ), + ] diff --git a/myworld1/members/migrations/0012_blog_recommended.py b/myworld1/members/migrations/0012_blog_recommended.py new file mode 100644 index 0000000..8b9c303 --- /dev/null +++ b/myworld1/members/migrations/0012_blog_recommended.py @@ -0,0 +1,19 @@ +# Generated by Django 4.2.1 on 2023-05-29 13:53 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('members', '0011_alter_blog_author_alter_blog_blog_time_and_more'), + ] + + operations = [ + migrations.AddField( + model_name='blog', + name='recommended', + field=models.CharField(default=None, max_length=500), + preserve_default=False, + ), + ] diff --git a/myworld1/members/migrations/0013_job_jobstats_joblogs.py b/myworld1/members/migrations/0013_job_jobstats_joblogs.py new file mode 100644 index 0000000..9d42066 --- /dev/null +++ b/myworld1/members/migrations/0013_job_jobstats_joblogs.py @@ -0,0 +1,48 @@ +# Generated by Django 4.2.1 on 2023-09-26 09:55 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('members', '0012_blog_recommended'), + ] + + operations = [ + migrations.CreateModel( + name='Job', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('job_name', models.CharField(max_length=500)), + ('start_date', models.DateTimeField(null=True, verbose_name='Blog start date')), + ('end_date', models.DateTimeField(null=True, verbose_name='Blog end date')), + ('start_no', models.IntegerField(null=True, verbose_name='No of blogs to skip')), + ('no_of_blogs', models.IntegerField(null=True, verbose_name='No of blogs to extract')), + ('created_date', models.DateTimeField(auto_now_add=True, null=True, verbose_name='Job created date')), + ], + ), + migrations.CreateModel( + name='JobStats', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('status', models.CharField(max_length=50)), + ('total_blogs', models.IntegerField(null=True, verbose_name='Total blogs found')), + ('no_of_blogs_extracted', models.IntegerField(null=True, verbose_name='No of blogs extracted')), + ('start_date', models.DateTimeField(null=True, verbose_name='Extraction start date')), + ('end_date', models.DateTimeField(null=True, verbose_name='Extraction start date')), + ('job', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='members.job')), + ], + ), + migrations.CreateModel( + name='JobLogs', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('log', models.TextField(verbose_name='job logs')), + ('function_name', models.TextField(verbose_name='Function name')), + ('date', models.DateTimeField(auto_now_add=True, null=True, verbose_name='Log date')), + ('job_stats', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='members.jobstats')), + ], + ), + ] diff --git a/myworld1/members/migrations/__init__.py b/myworld1/members/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/myworld1/members/migrations/__pycache__/0001_initial.cpython-310.pyc b/myworld1/members/migrations/__pycache__/0001_initial.cpython-310.pyc new file mode 100644 index 0000000..3dcb15a Binary files /dev/null and b/myworld1/members/migrations/__pycache__/0001_initial.cpython-310.pyc differ diff --git a/myworld1/members/migrations/__pycache__/0002_students_delete_members.cpython-310.pyc b/myworld1/members/migrations/__pycache__/0002_students_delete_members.cpython-310.pyc new file mode 100644 index 0000000..e8a6d2d Binary files /dev/null and b/myworld1/members/migrations/__pycache__/0002_students_delete_members.cpython-310.pyc differ diff --git a/myworld1/members/migrations/__pycache__/0003_blog.cpython-310.pyc b/myworld1/members/migrations/__pycache__/0003_blog.cpython-310.pyc new file mode 100644 index 0000000..e25eb97 Binary files /dev/null and b/myworld1/members/migrations/__pycache__/0003_blog.cpython-310.pyc differ diff --git a/myworld1/members/migrations/__pycache__/0004_blog_content.cpython-310.pyc b/myworld1/members/migrations/__pycache__/0004_blog_content.cpython-310.pyc new file mode 100644 index 0000000..3e3b3a0 Binary files /dev/null and b/myworld1/members/migrations/__pycache__/0004_blog_content.cpython-310.pyc differ diff --git a/myworld1/members/migrations/__pycache__/0005_blog_path.cpython-310.pyc b/myworld1/members/migrations/__pycache__/0005_blog_path.cpython-310.pyc new file mode 100644 index 0000000..959d33f Binary files /dev/null and b/myworld1/members/migrations/__pycache__/0005_blog_path.cpython-310.pyc differ diff --git a/myworld1/members/migrations/__pycache__/0006_alter_blog_path.cpython-310.pyc b/myworld1/members/migrations/__pycache__/0006_alter_blog_path.cpython-310.pyc new file mode 100644 index 0000000..a841782 Binary files /dev/null and b/myworld1/members/migrations/__pycache__/0006_alter_blog_path.cpython-310.pyc differ diff --git a/myworld1/members/migrations/__pycache__/0007_alter_blog_author_alter_blog_blog_time_and_more.cpython-310.pyc b/myworld1/members/migrations/__pycache__/0007_alter_blog_author_alter_blog_blog_time_and_more.cpython-310.pyc new file mode 100644 index 0000000..24bf11f Binary files /dev/null and b/myworld1/members/migrations/__pycache__/0007_alter_blog_author_alter_blog_blog_time_and_more.cpython-310.pyc differ diff --git a/myworld1/members/migrations/__pycache__/0008_alter_blog_author_alter_blog_blog_time_and_more.cpython-310.pyc b/myworld1/members/migrations/__pycache__/0008_alter_blog_author_alter_blog_blog_time_and_more.cpython-310.pyc new file mode 100644 index 0000000..b2e89f1 Binary files /dev/null and b/myworld1/members/migrations/__pycache__/0008_alter_blog_author_alter_blog_blog_time_and_more.cpython-310.pyc differ diff --git a/myworld1/members/migrations/__pycache__/0009_alter_blog_path.cpython-310.pyc b/myworld1/members/migrations/__pycache__/0009_alter_blog_path.cpython-310.pyc new file mode 100644 index 0000000..c3571da Binary files /dev/null and b/myworld1/members/migrations/__pycache__/0009_alter_blog_path.cpython-310.pyc differ diff --git a/myworld1/members/migrations/__pycache__/0010_alter_blog_author_alter_blog_blog_time_and_more.cpython-310.pyc b/myworld1/members/migrations/__pycache__/0010_alter_blog_author_alter_blog_blog_time_and_more.cpython-310.pyc new file mode 100644 index 0000000..88d91bf Binary files /dev/null and b/myworld1/members/migrations/__pycache__/0010_alter_blog_author_alter_blog_blog_time_and_more.cpython-310.pyc differ diff --git a/myworld1/members/migrations/__pycache__/0011_alter_blog_author_alter_blog_blog_time_and_more.cpython-310.pyc b/myworld1/members/migrations/__pycache__/0011_alter_blog_author_alter_blog_blog_time_and_more.cpython-310.pyc new file mode 100644 index 0000000..cf43569 Binary files /dev/null and b/myworld1/members/migrations/__pycache__/0011_alter_blog_author_alter_blog_blog_time_and_more.cpython-310.pyc differ diff --git a/myworld1/members/migrations/__pycache__/0012_blog_recommended.cpython-310.pyc b/myworld1/members/migrations/__pycache__/0012_blog_recommended.cpython-310.pyc new file mode 100644 index 0000000..db337c2 Binary files /dev/null and b/myworld1/members/migrations/__pycache__/0012_blog_recommended.cpython-310.pyc differ diff --git a/myworld1/members/migrations/__pycache__/0013_job_jobstats_joblogs.cpython-310.pyc b/myworld1/members/migrations/__pycache__/0013_job_jobstats_joblogs.cpython-310.pyc new file mode 100644 index 0000000..b5f43ad Binary files /dev/null and b/myworld1/members/migrations/__pycache__/0013_job_jobstats_joblogs.cpython-310.pyc differ diff --git a/myworld1/members/migrations/__pycache__/__init__.cpython-310.pyc b/myworld1/members/migrations/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..96bf2fb Binary files /dev/null and b/myworld1/members/migrations/__pycache__/__init__.cpython-310.pyc differ diff --git a/myworld1/members/models.py b/myworld1/members/models.py new file mode 100644 index 0000000..4162cc1 --- /dev/null +++ b/myworld1/members/models.py @@ -0,0 +1,64 @@ +from django.db import models + +BRANCH_CHOICES = ( + ("BA", "BA"), + ("B.COM", "B.COM"), + ("MBA", "MBA"), + ("CA", "CA"), +) + +# Create your models here. +class Students(models.Model): + first_name = models.CharField(max_length=200) + last_name = models.CharField(max_length=200) + address = models.CharField(max_length=200) + roll_number = models.IntegerField() + mobile = models.CharField(max_length=10) + branch = models.CharField(max_length=10, choices=BRANCH_CHOICES) + + def __str__(self): + return self.first_name + " " + self.last_name + +class Blog(models.Model): + title = models.CharField(max_length=500) + release_date = models.DateTimeField('Realse Date') + blog_time = models.CharField(max_length=50) + created_date = models.DateTimeField('Created Date', auto_now_add=True, null=True) + content = models.CharField(max_length=20000) + author = models.CharField(max_length=200) + recommended =models.CharField(max_length=500) + path= models.CharField(max_length=500) + + def __str__(self): + return self.title + +class Job(models.Model): + job_name = models.CharField(max_length=500) + start_date = models.DateTimeField('Blog start date', null=True) + end_date = models.DateTimeField('Blog end date', null=True) + start_no = models.IntegerField(verbose_name="No of blogs to skip", null=True) + no_of_blogs = models.IntegerField(verbose_name="No of blogs to extract", null=True) + created_date = models.DateTimeField('Job created date', auto_now_add=True, null=True) + + def __str__(self): + return self.job_name + + +class JobStats(models.Model): + job = models.ForeignKey(Job, on_delete=models.CASCADE) + status = models.CharField(max_length=50) + total_blogs = models.IntegerField(verbose_name="Total blogs found", null=True) + no_of_blogs_extracted = models.IntegerField(verbose_name='No of blogs extracted', null=True) + start_date = models.DateTimeField('Extraction start date', null=True) + end_date = models.DateTimeField('Extraction start date', null=True) + + def __str__(self): + return self.job + + +class JobLogs(models.Model): + job_stats = models.ForeignKey(JobStats, on_delete=models.CASCADE) + log = models.TextField(verbose_name="job logs") + function_name = models.TextField(verbose_name="Function name") + date = models.DateTimeField('Log date', null=True, auto_now_add=True) + diff --git a/myworld1/members/tasks.py b/myworld1/members/tasks.py new file mode 100644 index 0000000..2cc39b1 --- /dev/null +++ b/myworld1/members/tasks.py @@ -0,0 +1,83 @@ +import datetime +from myworld1.celery import app +from .models import Job, Blog, JobStats, JobLogs +import requests +from bs4 import BeautifulSoup +from dateutil.parser import parse +import pytz + +utc=pytz.UTC + +@app.task(bind=True, name="extract") +def extract(self, job_id): + job_obj = Job.objects.get(pk=job_id) + job_stats_obj = JobStats(job=job_obj, status="IN PROGRESS", start_date=datetime.datetime.now(), no_of_blogs_extracted=0) + job_stats_obj.save() + JobLogs(job_stats=job_stats_obj, log="Extraction stated", function_name="extract", date=datetime.datetime.now()).save() + start_date = job_obj.start_date + end_date = job_obj.end_date + start_id = job_obj.start_no + no_of_articles = job_obj.no_of_blogs + url = "https://blog.python.org/" + try: + data = requests.get(url) + page_soup = BeautifulSoup(data.text, 'html.parser') + + blogs = page_soup.select('div.date-outer') + article_count = 0 + counter = 1 + for blog in blogs: + article_count += 1 + if start_id and article_count < int(start_id): + continue + if no_of_articles and counter > int(no_of_articles): + continue + date = blog.select('.date-header span')[0].get_text() + + converted_date = parse(date) + JobLogs(job_stats=job_stats_obj, log=f"Extracting {article_count}", function_name="extract", date=datetime.datetime.now()).save() + if start_date and utc.localize(converted_date) < start_date: + continue + if end_date and utc.localize(converted_date) > end_date: + continue + + post = blog.select('.post')[0] + + title = "" + title_bar = post.select('.post-title') + if len(title_bar) > 0: + title = title_bar[0].text + else: + title = post.select('.post-body')[0].contents[0].text + + # getting the author and blog time + post_footer = post.select('.post-footer')[0] + + author = post_footer.select('.post-author span')[0].text + + time = post_footer.select('abbr')[0].text + + blog_obj = Blog(title=title, author=author, release_date=date, blog_time=time) + blog_obj.save() + job_stats_obj.no_of_blogs_extracted += job_stats_obj.no_of_blogs_extracted + job_stats_obj.save() + + print("\nTitle:", title.strip('\n')) + print("Date:", date, ) + print("Time:", time) + print("Author:", author) + counter += 1 + JobLogs(job_stats=job_stats_obj, log=f"Total {counter} articles extracted: ", function_name="extract", date=datetime.datetime.now()).save() + job_stats_obj.end_date = datetime.datetime.now() + job_stats_obj.total_blogs = article_count + job_stats_obj.status = "COMPLETED" + job_stats_obj.save() + JobLogs(job_stats=job_stats_obj, log="Extraction Done", function_name="extract", date=datetime.datetime.now()).save() + except Exception as ex: + JobLogs(job_stats=job_stats_obj, log=str(ex), function_name="extract", date=datetime.datetime.now()).save() + job_stats_obj.end_date = datetime.datetime.now() + job_stats_obj.total_blogs = article_count + job_stats_obj.status = "FAILED" + job_stats_obj.save() + JobLogs(job_stats=job_stats_obj, log="Extraction Done", function_name="extract", date=datetime.datetime.now()).save() + diff --git a/myworld1/members/templates/.myfirst.html.swp b/myworld1/members/templates/.myfirst.html.swp new file mode 100644 index 0000000..46e6419 Binary files /dev/null and b/myworld1/members/templates/.myfirst.html.swp differ diff --git a/myworld1/members/templates/myfirst.html b/myworld1/members/templates/myfirst.html new file mode 100644 index 0000000..ef1398d --- /dev/null +++ b/myworld1/members/templates/myfirst.html @@ -0,0 +1,9 @@ + + + + +

Hello World!

+

Welcome to my first Django project!

+ + + diff --git a/myworld1/members/tests.py b/myworld1/members/tests.py new file mode 100644 index 0000000..7ce503c --- /dev/null +++ b/myworld1/members/tests.py @@ -0,0 +1,3 @@ +from django.test import TestCase + +# Create your tests here. diff --git a/myworld1/members/urls.py b/myworld1/members/urls.py new file mode 100644 index 0000000..3c63964 --- /dev/null +++ b/myworld1/members/urls.py @@ -0,0 +1,20 @@ +from django.urls import path +from . import views + +urlpatterns = [ + path('', views.index, name='index'), + path('rest/student/', views.StudentView.as_view()), + path('rest/student/', views.StudentView.as_view()), + path('rest/student/', views.StudentView.as_view()), + path('rest/student/', views.StudentView.as_view()), + path('rest/student/', views.StudentView.as_view()), + path('rest/student/', views.StudentView.as_view()), + path('start_python_blog_scraping', views.python_blog_scrap, name='triger'), + path('rest/student/', views.StudentView.as_view()), + path('rest/student/', views.StudentView.as_view()), + path('rest/student/', views.StudentView.as_view()), + path('start_python_blog_scraping', views.python_blog_scrap, name='triger'), + path('rest/blog/', views.BlogView.as_view()), + path('python_blog_scraping/', views.python_blog_scraping, name="scraping") +] + diff --git a/myworld1/members/views.py b/myworld1/members/views.py new file mode 100644 index 0000000..7620be0 --- /dev/null +++ b/myworld1/members/views.py @@ -0,0 +1,104 @@ +from django.shortcuts import render + +# Create your views here. +from django.http import HttpResponse + +from django.template import loader +from django.views import View +from .models import Students +from django.http import JsonResponse +from django.views.decorators.csrf import csrf_exempt +from django.utils.decorators import method_decorator +from django.views import View +from .models import Students, Blog +from django.http import JsonResponse +from django.views.decorators.csrf import csrf_exempt +from django.utils.decorators import method_decorator +from . import apps +from members.tasks import extract +from django.shortcuts import redirect + +def index(request): + template = loader.get_template('myfirst.html') + return HttpResponse(template.render()) + + + + +@method_decorator(csrf_exempt, name='dispatch') +class BlogView(View): + def post(self, request): + start_date = request.POST.get('start_date', None) + end_date = request.POST.get('end_date', None) + no_of_articles = request.POST.get('no_of_articles', None) + start_id = request.POST.get('start_id', None) + + apps.start_extraction(start_date=start_date, end_date=end_date, no_of_articles=no_of_articles, start_id=start_id) + + blog_model_list = Blog.objects.filter() + + blogs = [] + for blog in blog_model_list: + data = { + "Title": blog.title, + "Release Date": blog.release_date, + "Author": blog.author, + "Blog time": blog.blog_time + } + blogs.append(data) + + return JsonResponse({'status': 'success', "Blogs": blogs}, status=200) + + + + +class StudentView(View): + + def get(self, request, rolno=None, branch=None): + student_model_list = [] + try: + if rolno: + student_model_list = Students.objects.filter(roll_number=rolno) + elif branch: + student_model_list = Students.objects.filter(branch=branch) + except Students.DoesNotExist: + return JsonResponse({'status': 'failed', "students": None}, status=400) + students = [] + for student in student_model_list: + data = { + "first_name" : student.first_name, + "last_name": student.last_name, + "address": student.address, + "roll_number": student.roll_number, + "mobile": student.mobile, + "branch": student.branch + } + students.append(data) + return JsonResponse({'status': 'success', "students": students}, status=200) + + def post(self, request): + if not request.POST.get('first_name') or not request.POST.get('last_name') or not request.POST.get('address') or not request.POST.get('roll_number') or not request.POST.get('mobile'): + return JsonResponse({'status': 'failed', "message" : "all fields required"}, status=500) + + Students.objects.create( + first_name= request.POST.get('first_name'), + last_name= request.POST.get('last_name'), + address= request.POST.get('address'), + roll_number= request.POST.get('roll_number'), + mobile= request.POST.get('mobile'), + branch= request.POST.get('branch')) + return JsonResponse({'status': 'sucess'}, status=200) + + +def python_blog_scrap(request): + apps.start_extraction() + return JsonResponse({'status': 'sucess', "message" : "Extracted and populated the table."}, status=200) + + + + +def python_blog_scraping(request, job_id): + extract.delay(job_id) + return redirect('/admin/members/job/') + + diff --git a/myworld1/myworld1/__init__.py b/myworld1/myworld1/__init__.py new file mode 100644 index 0000000..15d7c50 --- /dev/null +++ b/myworld1/myworld1/__init__.py @@ -0,0 +1,5 @@ +# This will make sure the app is always imported when +# Django starts so that shared_task will use this app. +from .celery import app as celery_app + +__all__ = ('celery_app',) diff --git a/myworld1/myworld1/__pycache__/__init__.cpython-310.pyc b/myworld1/myworld1/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..f7dea43 Binary files /dev/null and b/myworld1/myworld1/__pycache__/__init__.cpython-310.pyc differ diff --git a/myworld1/myworld1/__pycache__/celery.cpython-310.pyc b/myworld1/myworld1/__pycache__/celery.cpython-310.pyc new file mode 100644 index 0000000..62dba02 Binary files /dev/null and b/myworld1/myworld1/__pycache__/celery.cpython-310.pyc differ diff --git a/myworld1/myworld1/__pycache__/settings.cpython-310.pyc b/myworld1/myworld1/__pycache__/settings.cpython-310.pyc new file mode 100644 index 0000000..dae74db Binary files /dev/null and b/myworld1/myworld1/__pycache__/settings.cpython-310.pyc differ diff --git a/myworld1/myworld1/__pycache__/urls.cpython-310.pyc b/myworld1/myworld1/__pycache__/urls.cpython-310.pyc new file mode 100644 index 0000000..b2cbf27 Binary files /dev/null and b/myworld1/myworld1/__pycache__/urls.cpython-310.pyc differ diff --git a/myworld1/myworld1/__pycache__/wsgi.cpython-310.pyc b/myworld1/myworld1/__pycache__/wsgi.cpython-310.pyc new file mode 100644 index 0000000..3166f0f Binary files /dev/null and b/myworld1/myworld1/__pycache__/wsgi.cpython-310.pyc differ diff --git a/myworld1/myworld1/asgi.py b/myworld1/myworld1/asgi.py new file mode 100644 index 0000000..5f6a330 --- /dev/null +++ b/myworld1/myworld1/asgi.py @@ -0,0 +1,16 @@ +""" +ASGI config for myworld1 project. + +It exposes the ASGI callable as a module-level variable named ``application``. + +For more information on this file, see +https://docs.djangoproject.com/en/4.2/howto/deployment/asgi/ +""" + +import os + +from django.core.asgi import get_asgi_application + +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'myworld1.settings') + +application = get_asgi_application() diff --git a/myworld1/myworld1/celery.py b/myworld1/myworld1/celery.py new file mode 100644 index 0000000..03ccf50 --- /dev/null +++ b/myworld1/myworld1/celery.py @@ -0,0 +1,35 @@ +import os +from celery import Celery + +# Set the default Django settings module for the 'celery' program. +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'myworld1.settings') + +app = Celery('myworld1') + +# Using a string here means the worker doesn't have to serialize +# the configuration object to child processes. +# - namespace='CELERY' means all celery-related configuration keys +# should have a `CELERY_` prefix. +app.config_from_object('django.conf:settings', namespace='CELERY') + +# Load task modules from all registered Django apps. +app.autodiscover_tasks() + +app.conf.beat_schedule = { + #Scheduler Name + 'run-task-ten-seconds': { + # Task Name (Name Specified in Decorator) + 'task': 'extract', + # Schedule + 'schedule': 60.0, + # Function Arguments + 'args': (1,) + } +} + + +@app.task(bind=True) +def debug_task(self): + print(f'Request: {self.request!r}') + + diff --git a/myworld1/myworld1/settings.py b/myworld1/myworld1/settings.py new file mode 100644 index 0000000..28bc5f4 --- /dev/null +++ b/myworld1/myworld1/settings.py @@ -0,0 +1,141 @@ +""" +Django settings for myworld1 project. + +Generated by 'django-admin startproject' using Django 4.2. + +For more information on this file, see +https://docs.djangoproject.com/en/4.2/topics/settings/ + +For the full list of settings and their values, see +https://docs.djangoproject.com/en/4.2/ref/settings/ +""" + +from pathlib import Path +import os + +# Build paths inside the project like this: BASE_DIR / 'subdir'. +BASE_DIR = Path(__file__).resolve().parent.parent + + +# Quick-start development settings - unsuitable for production +# See https://docs.djangoproject.com/en/4.2/howto/deployment/checklist/ + +# SECURITY WARNING: keep the secret key used in production secret! +SECRET_KEY = 'django-insecure-di5t*_$2r$6+4u_j6%)04vpc^49&428z6ru^dnlcc@*%u#ar_b' + +# SECURITY WARNING: don't run with debug turned on in production! +DEBUG = True + +ALLOWED_HOSTS = [ +'0.0.0.0','localhost'] + + +# Application definition + +INSTALLED_APPS = [ + 'django.contrib.admin', + 'django.contrib.auth', + 'django.contrib.contenttypes', + 'django.contrib.sessions', + 'django.contrib.messages', + 'django.contrib.staticfiles', + 'members.apps.MembersConfig', + ] + +MIDDLEWARE = [ + 'django.middleware.security.SecurityMiddleware', + 'django.contrib.sessions.middleware.SessionMiddleware', + 'django.middleware.common.CommonMiddleware', + 'django.middleware.csrf.CsrfViewMiddleware', + 'django.contrib.auth.middleware.AuthenticationMiddleware', + 'django.contrib.messages.middleware.MessageMiddleware', + 'django.middleware.clickjacking.XFrameOptionsMiddleware', +] + +ROOT_URLCONF = 'myworld1.urls' + +TEMPLATES = [ + { + 'BACKEND': 'django.template.backends.django.DjangoTemplates', + 'DIRS': [], + 'APP_DIRS': True, + 'OPTIONS': { + 'context_processors': [ + 'django.template.context_processors.debug', + 'django.template.context_processors.request', + 'django.contrib.auth.context_processors.auth', + 'django.contrib.messages.context_processors.messages', + ], + }, + }, +] + +WSGI_APPLICATION = 'myworld1.wsgi.application' + + +# Database +# https://docs.djangoproject.com/en/4.2/ref/settings/#databases + +DATABASES = { + 'default': { + 'ENGINE': 'django.db.backends.postgresql', + 'NAME': 'member_db', + 'USER': 'postgres', + 'PASSWORD': '123456', + 'HOST': 'psql-db', + 'PORT': 5432, + } +} + + +# Password validation +# https://docs.djangoproject.com/en/4.2/ref/settings/#auth-password-validators + +AUTH_PASSWORD_VALIDATORS = [ + { + 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', + }, +] + + +# Internationalization +# https://docs.djangoproject.com/en/4.2/topics/i18n/ + +LANGUAGE_CODE = 'en-us' + +TIME_ZONE = 'UTC' + +USE_I18N = True + +USE_TZ = True + + +# Static files (CSS, JavaScript, Images) +# https://docs.djangoproject.com/en/4.2/howto/static-files/ + +STATIC_URL = 'static/' + +# Default primary key field type +# https://docs.djangoproject.com/en/4.2/ref/settings/#default-auto-field + +DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField' + + + +CELERY_TASK_SERIALIZER = 'json' +CELERY_RESULT_SERIALIZER = 'json' +CELERY_TIMEZONE = 'America/Los_Angeles' +# This configures rabbitmq as the datastore between Django + Celery +CELERY_BROKER_URL = 'amqp://{0}:{1}@{2}:{3}/{4}'.format( + os.environ["RABBITMQ_DEFAULT_USER"], os.environ["RABBITMQ_DEFAULT_PASS"], + os.environ["BROKER_HOST"], os.environ["BROKER_PORT"], + os.environ["RABBITMQ_DEFAULT_VHOST"]) diff --git a/myworld1/myworld1/urls.py b/myworld1/myworld1/urls.py new file mode 100644 index 0000000..c7c68c3 --- /dev/null +++ b/myworld1/myworld1/urls.py @@ -0,0 +1,23 @@ +""" +URL configuration for myworld1 project. + +The `urlpatterns` list routes URLs to views. For more information please see: + https://docs.djangoproject.com/en/4.2/topics/http/urls/ +Examples: +Function views + 1. Add an import: from my_app import views + 2. Add a URL to urlpatterns: path('', views.home, name='home') +Class-based views + 1. Add an import: from other_app.views import Home + 2. Add a URL to urlpatterns: path('', Home.as_view(), name='home') +Including another URLconf + 1. Import the include() function: from django.urls import include, path + 2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) +""" +from django.contrib import admin +from django.urls import include, path + +urlpatterns = [ + path('members/', include('members.urls')), + path('admin/', admin.site.urls), +] diff --git a/myworld1/myworld1/wsgi.py b/myworld1/myworld1/wsgi.py new file mode 100644 index 0000000..2aed5e7 --- /dev/null +++ b/myworld1/myworld1/wsgi.py @@ -0,0 +1,16 @@ +""" +WSGI config for myworld1 project. + +It exposes the WSGI callable as a module-level variable named ``application``. + +For more information on this file, see +https://docs.djangoproject.com/en/4.2/howto/deployment/wsgi/ +""" + +import os + +from django.core.wsgi import get_wsgi_application + +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'myworld1.settings') + +application = get_wsgi_application() diff --git a/myworld1/scrapped_data.html b/myworld1/scrapped_data.html new file mode 100644 index 0000000..835c686 --- /dev/null +++ b/myworld1/scrapped_data.html @@ -0,0 +1,37 @@ +
+ +

+Python 3.12.0 beta 1 released +

+
+
+
+
+

I'm pleased to announce the release of Python 3.12 beta 1 (and feature freeze for Python 3.12).

https://www.python.org/downloads/release/python-3120b1/

This is a beta preview of Python 3.12


Python 3.12 is still in development. This release, 3.12.0b1, is the first of four planned beta release previews of 3.12.

Beta release previews are intended to give the wider community the opportunity to test new features and bug fixes and to prepare their projects to support the new feature release.

We strongly encourage maintainers of third-party Python projects to test with 3.12 during the beta phase and report issues found to [the Python bug tracker (Issues · python/cpython · GitHub) as soon as possible. While the release is planned to be feature complete entering the beta phase, it is possible that features may be modified or, in rare cases, deleted up until the start of the release candidate phase (Monday, 2023-07-31). Our goal is to have no ABI changes after beta 4 and as few code changes as possible after 3.12.0rc1, the first release candidate. To achieve that, it will be extremely important to get as much exposure for 3.12 as possible during the beta phase.

Please keep in mind that this is a preview release and its use is not recommended for production environments.


Major new features of the 3.12 series, compared to 3.11

Some of the new major new features and changes in Python 3.12 are:

  • New type annotation syntax for generic classes (PEP 695).
  • More flexible f-string parsing, allowing many things previously disallowed (PEP 701).
  • Even more improved error messages. More exceptions potentially caused by typos now make suggestions to the user.
  • Many large and small performance improvements (like PEP 709).
  • Support for the Linux perf profiler to report Python function names in traces.
  • The deprecated wstr and wstr_length members of the C implementation of unicode objects were removed, per PEP 623.
  • In the unittest module, a number of long deprecated methods and classes were removed. (They had been deprecated since Python 3.1 or 3.2).
  • The deprecated smtpd and distutils modules have been removed (see PEP 594 and PEP 632. The setuptools package (installed by default in virtualenvs and many other places) continues to provide the distutils module.
  • A number of other old, broken and deprecated functions, classes and methods have been removed.
  • Invalid backslash escape sequences in strings now warn with SyntaxWarning instead of DeprecationWarning, making them more visible. (They will become syntax errors in the future.)
  • The internal representation of integers has changed in preparation for performance enhancements. (This should not affect most users as it is an internal detail, but it may cause problems for Cython-generated code.)
  • (Hey, fellow core developer, if a feature you find important is missing from this list, let Thomas know.)

For more details on the changes to Python 3.12, see What’s new in Python 3.12. The next pre-release of Python 3.12 will be 3.12.0b2, currently scheduled for 2023-05-29.


More resources


PEP 693, the Python 3.12 Release Schedule.
Report bugs via GitHub Issues.


And now for something completely different

As the first beta release marks the point at which we fork off the release branch from the main development branch, here’s a poem about forks in the road.

Two roads diverged in a yellow wood,
And sorry I could not travel both
And be one traveler, long I stood
And looked down one as far as I could
To where it bent in the undergrowth;

Then took the other, as just as fair,
And having perhaps the better claim,
Because it was grassy and wanted wear;
Though as for that the passing there
Had worn them really about the same,

And both that morning equally lay
In leaves, no step had trodden black.
Oh, I kept the first for another day!
Yet knowing how way leads on to way,
I doubted if I should ever come back.

I shall be telling this with a sigh
Somewhere ages and ages hence:
Two roads diverged in a wood, and I —
I took the one less traveled by,
And that has made all the difference.

The Road Not Taken, by Robert Frost.

Enjoy the new release


Thanks to all of the many volunteers who help make Python Development and these releases possible! Please consider supporting our efforts by volunteering yourself or through organization contributions to the Python Software Foundation.

Your release team,
Thomas Wouters
Ned Deily
Steve Dower
+
+
+ +
\ No newline at end of file diff --git a/myworld1/web_scrapper.py b/myworld1/web_scrapper.py new file mode 100644 index 0000000..7eac4fd --- /dev/null +++ b/myworld1/web_scrapper.py @@ -0,0 +1,95 @@ +import psycopg2 +import requests +import re +from bs4 import BeautifulSoup, element +import os + +# For the credentials mentioned below, you may refer the docker-compose.yml present in myworld . +db_name = 'member_db' +db_user = 'postgres' +db_pass = '123456' +db_host = 'psql-db' +db_port = '5432' + +# This will create the connection the to postgres database. +conn = psycopg2.connect(dbname=db_name, user=db_user, password=db_pass, host=db_host, port=db_port) + + +def add_row_to_blog(title, author, date, time,content,recommended,path): + # This function will add the entry to database + sql = """INSERT INTO members_blog (title, release_date, blog_time, created_date,content, author,recommended,path) VALUES (%s, %s::DATE, %s::TIME, NOW(),%s,%s,%s,%s)""" + + with conn: + with conn.cursor() as curs: + time=time.replace('\u202f',"") + curs.execute(sql, (title, date, time, content,author,recommended,path)) + + +def truncate_table(): + # This function will delete the existing entries from the database. + with conn: + with conn.cursor() as curs: + curs.execute("TRUNCATE members_blog CASCADE;") + + +def start_extraction(): + print("Extraction started") + url = "https://blog.python.org/" + + # Each time when we add new entry we delete the existing entries. + truncate_table() + data = requests.get(url) + page_soup = BeautifulSoup(data.text, 'html.parser') + + # Getting all the articles + blogs = page_soup.select('div.date-outer') + + fp=open("scrapped_data.html","w") + path=os.path.abspath("scrapped_data.html") + + for blog in blogs: + # loop through each article + date = blog.select('.date-header span')[0].get_text() + + post = blog.select('.post')[0] + #print(post) + fp.write(str(post)) + + content=post.select('.post-body')[0].get_text() + recommended =post.select('.post-share-buttons')[0].get_text() + + + title = "" + title_bar = post.select('.post-title') + if len(title_bar) > 0: + title = title_bar[0].text + else: + title = post.select('.post-body')[0].contents[0].text + + # getting the author and blog time + post_footer = post.select('.post-footer')[0] + + author = post_footer.select('.post-author span')[0].text + + time = post_footer.select('abbr')[0].text + + + # Inserting data into database + add_row_to_blog(title, author, date, time,content,recommended,path) + + print("\nTitle:", title.strip('\n')) + print("Date:", date, ) + print("Time:", time) + print("Author:", author) + print("Content:",content) + print("recommended: ",recommended) + + # print("Number of blogs read:", count) + print( + "\n---------------------------------------------------------------------------------------------------------------\n") + + fp.close() + + +if __name__ == "__main__": + start_extraction() diff --git a/test_celery/__init__.py b/test_celery/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test_celery/__pycache__/__init__.cpython-310.pyc b/test_celery/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..fb161c7 Binary files /dev/null and b/test_celery/__pycache__/__init__.cpython-310.pyc differ diff --git a/test_celery/__pycache__/celery.cpython-310.pyc b/test_celery/__pycache__/celery.cpython-310.pyc new file mode 100644 index 0000000..e6ca07f Binary files /dev/null and b/test_celery/__pycache__/celery.cpython-310.pyc differ diff --git a/test_celery/__pycache__/run_tasks.cpython-310.pyc b/test_celery/__pycache__/run_tasks.cpython-310.pyc new file mode 100644 index 0000000..3e5349d Binary files /dev/null and b/test_celery/__pycache__/run_tasks.cpython-310.pyc differ diff --git a/test_celery/__pycache__/tasks.cpython-310.pyc b/test_celery/__pycache__/tasks.cpython-310.pyc new file mode 100644 index 0000000..82231f4 Binary files /dev/null and b/test_celery/__pycache__/tasks.cpython-310.pyc differ diff --git a/test_celery/celery.py b/test_celery/celery.py new file mode 100644 index 0000000..a5041f9 --- /dev/null +++ b/test_celery/celery.py @@ -0,0 +1,9 @@ +from __future__ import absolute_import +from celery import Celery + +app = Celery('test_celery', + broker='amqp://jimmy:jimmy123@localhost/jimmy_vhost', + backend='rpc://', + include=['test_celery.tasks']) + + diff --git a/test_celery/run_tasks.py b/test_celery/run_tasks.py new file mode 100644 index 0000000..24a1442 --- /dev/null +++ b/test_celery/run_tasks.py @@ -0,0 +1,13 @@ +from .tasks import longtime_add +import time + +if __name__ == '__main__': + result = longtime_add.delay(1,2) + # at this time, our task is not finished, so it will return False + print ('Task finished? ', result.ready()) + print ('Task result: ', result.result) + # sleep 10 seconds to ensure the task has been finished + time.sleep(10) + # now the task should be finished and ready method will return True + print ('Task finished? ', result.ready()) + print ('Task result: ', result.result) diff --git a/test_celery/tasks.py b/test_celery/tasks.py new file mode 100644 index 0000000..88f2f1e --- /dev/null +++ b/test_celery/tasks.py @@ -0,0 +1,11 @@ +from __future__ import absolute_import +from test_celery.celery import app +import time + +@app.task +def longtime_add(x, y): + print ('long time task begins') + # sleep 5 seconds + time.sleep(5) + print ('long time task finished') + return x + y