From d7ed879bd14b69329b02a845072e122e54f1790f Mon Sep 17 00:00:00 2001 From: Stefano Gerli Date: Sun, 5 Mar 2023 01:07:03 +0000 Subject: [PATCH 1/3] feat: add itcr parser --- courses/serializers.py | 1 + parsing/schools/active | 1 + parsing/schools/itcr/__init__.py | 14 ++ parsing/schools/itcr/config.json | 36 +++++ parsing/schools/itcr/courses.py | 196 ++++++++++++++++++++++++++ parsing/schools/itcr/data/.gitkeep | 0 parsing/schools/itcr/logs/.gitkeep | 0 static/js/redux/constants/schools.jsx | 18 +++ 8 files changed, 266 insertions(+) create mode 100644 parsing/schools/itcr/__init__.py create mode 100644 parsing/schools/itcr/config.json create mode 100644 parsing/schools/itcr/courses.py create mode 100644 parsing/schools/itcr/data/.gitkeep create mode 100644 parsing/schools/itcr/logs/.gitkeep diff --git a/courses/serializers.py b/courses/serializers.py index 33b8689083..f3ac2d1ac3 100644 --- a/courses/serializers.py +++ b/courses/serializers.py @@ -119,6 +119,7 @@ def get_regexed_courses(self, course): "umich": r"([A-Z]{2,8}\s\d{3})", "chapman": r"([A-Z]{2,4}\s\d{3})", "salisbury": r"([A-Z]{3,4} \d{2,3})", + "itcr": r"([A-Z]{2}\d{4})" } course_code_to_name = {} if self.context["school"] in school_to_course_regex: diff --git a/parsing/schools/active b/parsing/schools/active index 8d872b2807..8f8181e071 100644 --- a/parsing/schools/active +++ b/parsing/schools/active @@ -7,3 +7,4 @@ umd umich uoft vandy +itcr \ No newline at end of file diff --git a/parsing/schools/itcr/__init__.py b/parsing/schools/itcr/__init__.py new file mode 100644 index 0000000000..e4c9c3323a --- /dev/null +++ b/parsing/schools/itcr/__init__.py @@ -0,0 +1,14 @@ +# Copyright (C) 2023 Semester.ly Technologies, LLC +# +# Semester.ly is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Semester.ly is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +from parsing.schools import load_school_logger +load_school_logger('itcr') diff --git a/parsing/schools/itcr/config.json b/parsing/schools/itcr/config.json new file mode 100644 index 0000000000..03a85769c4 --- /dev/null +++ b/parsing/schools/itcr/config.json @@ -0,0 +1,36 @@ +{ + "school": { + "code": "itcr", + "name": "Instituto Tecnológico de Costa Rica" + }, + "course_code_regex": "([A-Z]{2}\\d{4})$", + "terms": [ + "1", + "2", + "V" + ], + "granularity": 5, + "ampm": false, + "full_academic_year_registration": false, + "single_access": false, + "active_semesters": { + "2023": [ + "1" + ], + "2022": [ + "V", + "2", + "1" + ], + "2021": [ + "V", + "2", + "1" + ], + "2020": [ + "2", + "1" + ] + }, + "registrar": false +} \ No newline at end of file diff --git a/parsing/schools/itcr/courses.py b/parsing/schools/itcr/courses.py new file mode 100644 index 0000000000..2e534127cf --- /dev/null +++ b/parsing/schools/itcr/courses.py @@ -0,0 +1,196 @@ +# Copyright (C) 2023 Semester.ly Technologies, LLC +# +# Semester.ly is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Semester.ly is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + + +import json + + +from parsing.library.base_parser import BaseParser +from parsing.library.utils import dict_filter_by_dict +from datetime import datetime + + +import urllib3 +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + + +class Parser(BaseParser): + """TEC course parser. + + Attributes: + API_URL (str): Description + DAY_MAP (TYPE): Description + last_course (dict): Description + schools (list): Description + semester (TYPE): Description + verbosity (TYPE): Description + """ + + API_URL = 'https://tec-appsext.itcr.ac.cr/guiahorarios/escuela.aspx/' + DAY_MAP = { + 'LUNES': 'M', + 'MARTES': 'T', + 'MIERCOLES': 'W', + 'JUEVES': 'R', + 'VIERNES': 'F', + 'SABADO': 'S', + 'DOMINGO': 'U' + } + + def __new__(cls, *args, **kwargs): + """Set static variables within closure. + + Returns: + Parser + """ + new_instance = object.__new__(cls) + return new_instance + + def __init__(self, **kwargs): + """Construct itcr parser object.""" + self.schools = [] + self.last_course = {} + super(Parser, self).__init__('itcr', **kwargs) + + def _get_schools(self): + headers = { + 'Content-Type': 'application/json' + } + request = self.requester.post( + Parser.API_URL + 'cargaEscuelas', data="{}", headers=headers, verify=False) + self.schools = json.loads(request['d']) + + def _get_courses(self, school): + headers = { + 'Content-Type': 'application/json' + } + payload = json.dumps( + {'escuela': school['IDE_DEPTO'], 'ano': self.year}) + request = self.requester.post( + Parser.API_URL + 'getdatosEscuelaAno', data=payload, headers=headers, verify=False) + try: + data = json.loads(request['d']) + return data + except: + return [] + + def _parse_schools(self): + for school in self.schools: + self._parse_school(school) + + def _parse_school(self, school): + courses = self._get_courses(school) + if self.term.isdigit(): + courses = [course for course in courses if ( + course['IDE_MODALIDAD'] == "S" and course["IDE_PER_MOD"] == int(self.term))] + elif self.term == "V": + courses = [course for course in courses if ( + course['IDE_MODALIDAD'] == "V" and course["IDE_PER_MOD"] == 1)] + else: + courses = [] + + sections = self._parse_sections(courses) + for courseCode in sections: + course = sections[courseCode] + self._load_ingestor(course[0], course) + + def _parse_sections(self, courses): + res = {} + for course in courses: + section_code = course['IDE_MATERIA'] + str(course['IDE_GRUPO']) + if res.get(section_code, None) is None: + res[section_code] = [] + res[section_code].append(course) + + return res + + def _load_ingestor(self, course, section): + try: + num_credits = float(course['CAN_CREDITOS']) + except: + num_credits = 0 + + # Load core course fields + self.ingestor['name'] = course['DSC_MATERIA'] + self.ingestor['description'] = '' + self.ingestor['code'] = course['IDE_MATERIA'] + self.ingestor['num_credits'] = num_credits + self.ingestor['department_name'] = course['DSC_DEPTO'] + self.ingestor['campus'] = course['DSC_SEDE'] + + created_course = self.ingestor.ingest_course() + + if self.last_course \ + and created_course['code'] == course['IDE_MATERIA'] \ + and created_course['name'] != course['DSC_MATERIA']: + self.ingestor['section_name'] = course['IDE_MATERIA'] + self.last_course = created_course + + for meeting in section: + # Load core section fields + self.ingestor['section_code'] = str(meeting["IDE_GRUPO"]) + self.ingestor['instrs'] = meeting["NOM_PROFESOR"] + + self.ingestor['section_type'] = meeting["TIPO_CURSO"] + + # We have no data on the capacity + self.ingestor['size'] = 1 + self.ingestor['enrollment'] = 0 + self.ingestor['waitlist'] = 0 + + created_section = self.ingestor.ingest_section(created_course) + + # Theres no real way to get this data from the current api + # so for now just filling with the current date + self.ingestor["date_start"] = datetime.now() + self.ingestor["date_end"] = datetime.now() + + self.ingestor['time_start'] = meeting['HINICIO'] + self.ingestor['time_end'] = meeting['HFIN'] + self.ingestor['days'] = [ + Parser.DAY_MAP.get(meeting['NOM_DIA'], '') + ] + course_campus = f'{meeting["DSC_SEDE"]} ({meeting["TIPO_CURSO"]})' + self.ingestor['location'] = { + 'campus': course_campus, + 'building': course_campus, + 'room': '' + } + self.ingestor.ingest_meeting(created_section) + + def start( + self, + verbosity=3, + textbooks=False, + departments_filter=None, + years_and_terms_filter=None + ): + """Start parse.""" + self.verbosity = verbosity + + # Default to hardcoded current year. + years = {'2023', '2022', '2021', '2020'} + terms = {'1', '2', 'V'} + + years_and_terms = dict_filter_by_dict( + {year: [term for term in terms] for year in years}, + years_and_terms_filter + ) + + for year, terms in list(years_and_terms.items()): + self.ingestor['year'] = year + self.year = year + for term in terms: + self.ingestor['term'] = term + self.term = term + self._get_schools() + self._parse_schools() diff --git a/parsing/schools/itcr/data/.gitkeep b/parsing/schools/itcr/data/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/parsing/schools/itcr/logs/.gitkeep b/parsing/schools/itcr/logs/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/static/js/redux/constants/schools.jsx b/static/js/redux/constants/schools.jsx index 88f6393607..38b9a04ee8 100644 --- a/static/js/redux/constants/schools.jsx +++ b/static/js/redux/constants/schools.jsx @@ -23,6 +23,7 @@ export const VALID_SCHOOLS = [ "umich", "chapman", "salisbury", + "itcr" ]; export const getSchoolSpecificInfo = (school) => { @@ -148,6 +149,23 @@ export const getSchoolSpecificInfo = (school) => { 1: "", }, }; + case 'itcr': + return { + primaryDisplay: 'name', + areasName: 'Areas', + departmentsName: 'Deparments', + levelsName: 'Levels', + timesName: 'Times', + courseRegex: '([A-Z]{2}\\d{4})', + campuses: { + 'CAMPUS TECNOLOGICO CENTRAL CARTAGO': 'CAMPUS TECNOLOGICO CENTRAL CARTAGO', + 'CENTRO ACADEMICO DE LIMON': 'CENTRO ACADEMICO DE LIMON', + 'CENTRO ACADEMICO DE ALAJUELA': 'CENTRO ACADEMICO DE ALAJUELA', + 'CAMPUS TECNOLOGICO LOCAL SAN JOSE': 'CAMPUS TECNOLOGICO LOCAL SAN JOSE', + 'CAMPUS TECNOLOGICO LOCAL SAN CARLOS': 'CAMPUS TECNOLOGICO LOCAL SAN CARLOS' + + }, + }; default: return { primaryDisplay: "code", From d3ec30a9de09263c792a9bb32d2ef62807b48c23 Mon Sep 17 00:00:00 2001 From: Stefano Gerli Date: Sun, 5 Mar 2023 19:56:59 +0000 Subject: [PATCH 2/3] fix: prettier linter issues --- static/js/redux/constants/schools.jsx | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/static/js/redux/constants/schools.jsx b/static/js/redux/constants/schools.jsx index 38b9a04ee8..6dce80aaea 100644 --- a/static/js/redux/constants/schools.jsx +++ b/static/js/redux/constants/schools.jsx @@ -23,7 +23,7 @@ export const VALID_SCHOOLS = [ "umich", "chapman", "salisbury", - "itcr" + "itcr", ]; export const getSchoolSpecificInfo = (school) => { @@ -149,21 +149,20 @@ export const getSchoolSpecificInfo = (school) => { 1: "", }, }; - case 'itcr': + case "itcr": return { - primaryDisplay: 'name', - areasName: 'Areas', - departmentsName: 'Deparments', - levelsName: 'Levels', - timesName: 'Times', - courseRegex: '([A-Z]{2}\\d{4})', + primaryDisplay: "name", + areasName: "Areas", + departmentsName: "Deparments", + levelsName: "Levels", + timesName: "Times", + courseRegex: "([A-Z]{2}\\d{4})", campuses: { - 'CAMPUS TECNOLOGICO CENTRAL CARTAGO': 'CAMPUS TECNOLOGICO CENTRAL CARTAGO', - 'CENTRO ACADEMICO DE LIMON': 'CENTRO ACADEMICO DE LIMON', - 'CENTRO ACADEMICO DE ALAJUELA': 'CENTRO ACADEMICO DE ALAJUELA', - 'CAMPUS TECNOLOGICO LOCAL SAN JOSE': 'CAMPUS TECNOLOGICO LOCAL SAN JOSE', - 'CAMPUS TECNOLOGICO LOCAL SAN CARLOS': 'CAMPUS TECNOLOGICO LOCAL SAN CARLOS' - + "CAMPUS TECNOLOGICO CENTRAL CARTAGO": "CAMPUS TECNOLOGICO CENTRAL CARTAGO", + "CENTRO ACADEMICO DE LIMON": "CENTRO ACADEMICO DE LIMON", + "CENTRO ACADEMICO DE ALAJUELA": "CENTRO ACADEMICO DE ALAJUELA", + "CAMPUS TECNOLOGICO LOCAL SAN JOSE": "CAMPUS TECNOLOGICO LOCAL SAN JOSE", + "CAMPUS TECNOLOGICO LOCAL SAN CARLOS": "CAMPUS TECNOLOGICO LOCAL SAN CARLOS", }, }; default: From 082ad7e3ac1715a634c98fbd48a5f50d73fabe03 Mon Sep 17 00:00:00 2001 From: Stefano Gerli Date: Sun, 5 Mar 2023 20:09:38 +0000 Subject: [PATCH 3/3] fix: python lint issues --- courses/serializers.py | 2 +- parsing/schools/itcr/__init__.py | 3 +- parsing/schools/itcr/courses.py | 137 +++++++++++++++++-------------- 3 files changed, 77 insertions(+), 65 deletions(-) diff --git a/courses/serializers.py b/courses/serializers.py index f3ac2d1ac3..36b267666a 100644 --- a/courses/serializers.py +++ b/courses/serializers.py @@ -119,7 +119,7 @@ def get_regexed_courses(self, course): "umich": r"([A-Z]{2,8}\s\d{3})", "chapman": r"([A-Z]{2,4}\s\d{3})", "salisbury": r"([A-Z]{3,4} \d{2,3})", - "itcr": r"([A-Z]{2}\d{4})" + "itcr": r"([A-Z]{2}\d{4})", } course_code_to_name = {} if self.context["school"] in school_to_course_regex: diff --git a/parsing/schools/itcr/__init__.py b/parsing/schools/itcr/__init__.py index e4c9c3323a..44d9c9c76f 100644 --- a/parsing/schools/itcr/__init__.py +++ b/parsing/schools/itcr/__init__.py @@ -11,4 +11,5 @@ # GNU General Public License for more details. from parsing.schools import load_school_logger -load_school_logger('itcr') + +load_school_logger("itcr") diff --git a/parsing/schools/itcr/courses.py b/parsing/schools/itcr/courses.py index 2e534127cf..497b612e5d 100644 --- a/parsing/schools/itcr/courses.py +++ b/parsing/schools/itcr/courses.py @@ -20,6 +20,7 @@ import urllib3 + urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) @@ -35,15 +36,15 @@ class Parser(BaseParser): verbosity (TYPE): Description """ - API_URL = 'https://tec-appsext.itcr.ac.cr/guiahorarios/escuela.aspx/' + API_URL = "https://tec-appsext.itcr.ac.cr/guiahorarios/escuela.aspx/" DAY_MAP = { - 'LUNES': 'M', - 'MARTES': 'T', - 'MIERCOLES': 'W', - 'JUEVES': 'R', - 'VIERNES': 'F', - 'SABADO': 'S', - 'DOMINGO': 'U' + "LUNES": "M", + "MARTES": "T", + "MIERCOLES": "W", + "JUEVES": "R", + "VIERNES": "F", + "SABADO": "S", + "DOMINGO": "U", } def __new__(cls, *args, **kwargs): @@ -52,35 +53,37 @@ def __new__(cls, *args, **kwargs): Returns: Parser """ - new_instance = object.__new__(cls) - return new_instance + return object.__new__(cls) def __init__(self, **kwargs): """Construct itcr parser object.""" self.schools = [] self.last_course = {} - super(Parser, self).__init__('itcr', **kwargs) + super(Parser, self).__init__("itcr", **kwargs) def _get_schools(self): - headers = { - 'Content-Type': 'application/json' - } + headers = {"Content-Type": "application/json"} request = self.requester.post( - Parser.API_URL + 'cargaEscuelas', data="{}", headers=headers, verify=False) - self.schools = json.loads(request['d']) + f"{Parser.API_URL}cargaEscuelas", + data="{}", + headers=headers, + verify=False, + ) + self.schools = json.loads(request["d"]) def _get_courses(self, school): - headers = { - 'Content-Type': 'application/json' - } - payload = json.dumps( - {'escuela': school['IDE_DEPTO'], 'ano': self.year}) + headers = {"Content-Type": "application/json"} + payload = json.dumps({"escuela": school["IDE_DEPTO"], "ano": self.year}) request = self.requester.post( - Parser.API_URL + 'getdatosEscuelaAno', data=payload, headers=headers, verify=False) + f"{Parser.API_URL}getdatosEscuelaAno", + data=payload, + headers=headers, + verify=False, + ) try: - data = json.loads(request['d']) + data = json.loads(request["d"]) return data - except: + except Exception: return [] def _parse_schools(self): @@ -90,11 +93,20 @@ def _parse_schools(self): def _parse_school(self, school): courses = self._get_courses(school) if self.term.isdigit(): - courses = [course for course in courses if ( - course['IDE_MODALIDAD'] == "S" and course["IDE_PER_MOD"] == int(self.term))] + courses = [ + course + for course in courses + if ( + course["IDE_MODALIDAD"] == "S" + and course["IDE_PER_MOD"] == int(self.term) + ) + ] elif self.term == "V": - courses = [course for course in courses if ( - course['IDE_MODALIDAD'] == "V" and course["IDE_PER_MOD"] == 1)] + courses = [ + course + for course in courses + if (course["IDE_MODALIDAD"] == "V" and course["IDE_PER_MOD"] == 1) + ] else: courses = [] @@ -106,7 +118,7 @@ def _parse_school(self, school): def _parse_sections(self, courses): res = {} for course in courses: - section_code = course['IDE_MATERIA'] + str(course['IDE_GRUPO']) + section_code = course["IDE_MATERIA"] + str(course["IDE_GRUPO"]) if res.get(section_code, None) is None: res[section_code] = [] res[section_code].append(course) @@ -115,37 +127,39 @@ def _parse_sections(self, courses): def _load_ingestor(self, course, section): try: - num_credits = float(course['CAN_CREDITOS']) - except: + num_credits = float(course["CAN_CREDITOS"]) + except Exception: num_credits = 0 # Load core course fields - self.ingestor['name'] = course['DSC_MATERIA'] - self.ingestor['description'] = '' - self.ingestor['code'] = course['IDE_MATERIA'] - self.ingestor['num_credits'] = num_credits - self.ingestor['department_name'] = course['DSC_DEPTO'] - self.ingestor['campus'] = course['DSC_SEDE'] + self.ingestor["name"] = course["DSC_MATERIA"] + self.ingestor["description"] = "" + self.ingestor["code"] = course["IDE_MATERIA"] + self.ingestor["num_credits"] = num_credits + self.ingestor["department_name"] = course["DSC_DEPTO"] + self.ingestor["campus"] = course["DSC_SEDE"] created_course = self.ingestor.ingest_course() - if self.last_course \ - and created_course['code'] == course['IDE_MATERIA'] \ - and created_course['name'] != course['DSC_MATERIA']: - self.ingestor['section_name'] = course['IDE_MATERIA'] + if ( + self.last_course + and created_course["code"] == course["IDE_MATERIA"] + and created_course["name"] != course["DSC_MATERIA"] + ): + self.ingestor["section_name"] = course["IDE_MATERIA"] self.last_course = created_course for meeting in section: # Load core section fields - self.ingestor['section_code'] = str(meeting["IDE_GRUPO"]) - self.ingestor['instrs'] = meeting["NOM_PROFESOR"] + self.ingestor["section_code"] = str(meeting["IDE_GRUPO"]) + self.ingestor["instrs"] = meeting["NOM_PROFESOR"] - self.ingestor['section_type'] = meeting["TIPO_CURSO"] + self.ingestor["section_type"] = meeting["TIPO_CURSO"] # We have no data on the capacity - self.ingestor['size'] = 1 - self.ingestor['enrollment'] = 0 - self.ingestor['waitlist'] = 0 + self.ingestor["size"] = 1 + self.ingestor["enrollment"] = 0 + self.ingestor["waitlist"] = 0 created_section = self.ingestor.ingest_section(created_course) @@ -154,16 +168,14 @@ def _load_ingestor(self, course, section): self.ingestor["date_start"] = datetime.now() self.ingestor["date_end"] = datetime.now() - self.ingestor['time_start'] = meeting['HINICIO'] - self.ingestor['time_end'] = meeting['HFIN'] - self.ingestor['days'] = [ - Parser.DAY_MAP.get(meeting['NOM_DIA'], '') - ] + self.ingestor["time_start"] = meeting["HINICIO"] + self.ingestor["time_end"] = meeting["HFIN"] + self.ingestor["days"] = [Parser.DAY_MAP.get(meeting["NOM_DIA"], "")] course_campus = f'{meeting["DSC_SEDE"]} ({meeting["TIPO_CURSO"]})' - self.ingestor['location'] = { - 'campus': course_campus, - 'building': course_campus, - 'room': '' + self.ingestor["location"] = { + "campus": course_campus, + "building": course_campus, + "room": "", } self.ingestor.ingest_meeting(created_section) @@ -172,25 +184,24 @@ def start( verbosity=3, textbooks=False, departments_filter=None, - years_and_terms_filter=None + years_and_terms_filter=None, ): """Start parse.""" self.verbosity = verbosity # Default to hardcoded current year. - years = {'2023', '2022', '2021', '2020'} - terms = {'1', '2', 'V'} + years = {"2023", "2022", "2021", "2020"} + terms = {"1", "2", "V"} years_and_terms = dict_filter_by_dict( - {year: [term for term in terms] for year in years}, - years_and_terms_filter + {year: list(terms) for year in years}, years_and_terms_filter ) for year, terms in list(years_and_terms.items()): - self.ingestor['year'] = year + self.ingestor["year"] = year self.year = year for term in terms: - self.ingestor['term'] = term + self.ingestor["term"] = term self.term = term self._get_schools() self._parse_schools()