From 3d41fb080a718d93f45f4e3be9f5bf87001aeb43 Mon Sep 17 00:00:00 2001 From: bedo Date: Tue, 23 Aug 2022 17:14:16 +0200 Subject: [PATCH 01/10] 1st version of bedo scripts, fixtures generation for auth,content,lessons and exams --- .../management/commands/generate_auth_data.py | 630 ++++++++++++++++++ .../commands/generate_content_data.py | 543 +++++++++++++++ requirements/base.txt | 2 +- 3 files changed, 1174 insertions(+), 1 deletion(-) create mode 100644 kolibri/core/auth/management/commands/generate_auth_data.py create mode 100644 kolibri/core/content/management/commands/generate_content_data.py diff --git a/kolibri/core/auth/management/commands/generate_auth_data.py b/kolibri/core/auth/management/commands/generate_auth_data.py new file mode 100644 index 00000000000..0711203ab63 --- /dev/null +++ b/kolibri/core/auth/management/commands/generate_auth_data.py @@ -0,0 +1,630 @@ +import csv +import datetime +import io +import os +import random + +from django.core.management import call_command +from django.core.management.base import BaseCommand +from le_utils.constants import content_kinds + +from kolibri.core.auth.constants import demographics +from kolibri.core.auth.constants import facility_presets +from kolibri.core.auth.constants import role_kinds +from kolibri.core.auth.models import AdHocGroup +from kolibri.core.auth.models import Classroom +from kolibri.core.auth.models import Facility +from kolibri.core.auth.models import FacilityDataset +from kolibri.core.auth.models import FacilityUser +from kolibri.core.auth.models import LearnerGroup +from kolibri.core.content.management.commands.generate_content_data import ( + generate_channels, +) +from kolibri.core.content.management.commands.generate_content_data import ( + get_app_models, +) +from kolibri.core.content.management.commands.generate_content_data import ( + switch_to_memory, +) +from kolibri.core.content.models import ChannelMetadata +from kolibri.core.content.models import ContentNode +from kolibri.core.exams.models import Exam +from kolibri.core.exams.models import ExamAssignment +from kolibri.core.lessons.models import Lesson +from kolibri.core.lessons.models import LessonAssignment +from kolibri.utils.time_utils import local_now + +users_data_iterator = 0 +all_users_base_data = [] + + +def generate_random_id(): + import uuid + + return uuid.uuid4().hex + + +def read_user_data_file(): + # not the best way of reading the csv file i know, how to better read the file? + data_path = os.path.abspath( + os.path.join( + os.path.dirname(__file__), + "..", + "..", + "..", + "logger/management/commands/user_data.csv", + ) + ) + + with io.open(data_path, mode="r", encoding="utf-8") as f: + all_users_base_data = [data for data in csv.DictReader(f)] + return all_users_base_data + + +# ever time called it gets the next (different) user data from user_data.csv +def get_user_base_data(n_facility_users): + global users_data_iterator + users_data_iterator += 1 + # making sure we aren't generating more users than the specefied number of total facility users + if users_data_iterator > n_facility_users: + exit() + return all_users_base_data[users_data_iterator - 1] + + +def generate_facility_user(facility, n_facility_users): + def get_birth_year(user_age): + current_year = datetime.datetime.now().year + return str(current_year - int(user_age)) + + user_data = get_user_base_data(n_facility_users) + + user = FacilityUser( + username=user_data["Username"], + full_name=user_data["GivenName"] + " " + user_data["Surname"], + birth_year=get_birth_year(user_data["Age"]), + gender=random.choice(demographics.choices)[0], + id_number=str(users_data_iterator), + facility=facility, + ) + # dummy password + user.set_password("password") + user.save() + return user + + +# the following function doesn't work, it generates nothing (not throwing errors though) +# and when called it stops the execution of the script! don't know why +def generate_superadmin(n_facility_users): + user_data = get_user_base_data(n_facility_users) + username = user_data["Username"] + "_superuser" + full_name = f"{username} is the device superuser" + FacilityUser.objects.create_superuser( + username, + "password", + full_name=full_name, + ) + + +def generate_facility_dataset(facility_name, device_name): + + preset_to_use = random.choice(facility_presets.choices)[0] + facility_settings = facility_presets.mappings[preset_to_use] + + dataset = FacilityDataset( + description=f"{facility_name} DataSet", + location=device_name, + preset=preset_to_use, + registered=random.choice([True, False]), + ) + for attribute, value in facility_settings.items(): + setattr(dataset, attribute, value) + + dataset.save() + return dataset + + +def generate_facility(facility_name, device_name): + + new_facility = Facility.objects.create( + name=facility_name, + dataset=generate_facility_dataset(facility_name, device_name), + ) + return new_facility + + +def generate_classroom(name, parent_facility): + classroom = Classroom.objects.create(name=name, parent=parent_facility) + return classroom + + +def generate_group(name, parent_classroom): + group = LearnerGroup.objects.create(name=name, parent=parent_classroom) + return group + + +def generate_adhoc_group(name, parent_classroom, learners): + adhoc_group = AdHocGroup.objects.create(name=name, parent=parent_classroom) + # assign learners to adhoc-group (lesson or exam) + adhoc_group.add_learners(learners) + return adhoc_group + + +def generate_lesson(title, description, collection, creator): + + lesson = Lesson.objects.create( + title=title, + description=description, + is_active=random.choice([True, False]), + resources=get_or_generate_lesson_resources(), + collection=collection, + created_by=creator, + ) + return lesson + + +def generate_lesson_assignment(lesson, collection, assigner_user): + + lesson_assignment = LessonAssignment.objects.create( + lesson=lesson, collection=collection, assigned_by=assigner_user + ) + return lesson_assignment + + +def generate_exam(title, collection, creator): + data_model_version = random.randint(0, 2) + q_sources = get_question_sources(data_model_version) + is_active = random.choice([True, False]) + + exam = Exam.objects.create( + title=title, + active=is_active, + question_sources=q_sources, + question_count=len(q_sources), + learners_see_fixed_order=random.choice([True, False]), + collection=collection, + creator=creator, + date_activated=local_now() if is_active else None, + data_model_version=data_model_version, + ) + return exam + + +def generate_exam_assignment(exam, collection, assigner_user): + exam_assignment = ExamAssignment.objects.create( + exam=exam, collection=collection, assigned_by=assigner_user + ) + return exam_assignment + + +# resources (non-topic ContentNodes) for each lesson +def get_or_generate_lesson_resources(): + + lesson_resources = [] + + channels = ChannelMetadata.objects.all() + + # generate a new channel if there are no local channels + if not channels: + channels = generate_channels(n_channels=1, levels=2) + + channel = random.choice(channels) + + channel_resources = ContentNode.objects.filter(channel_id=channel.id).exclude( + kind=content_kinds.TOPIC + ) + + channel_resources = random.sample( + list(channel_resources), + min(random.randint(1, channel_resources.count() - 1), 10), + ) + + for contentnode in channel_resources: + lesson_resources.append( + { + "contentnode_id": contentnode.id, + "channel_id": channel.id, + "content_id": contentnode.content_id, + } + ) + + return lesson_resources + + +# resources (random ids) for each exam +def get_question_sources(v): + def get_json_data(v, q): + + # model_version_to_question_sources_mapper + mapper = { + 0: { + "exercise_id": generate_random_id(), + "number_of_questions": 6, + "title": f"question_{q+1}", + }, + 1: { + "exercise_id": generate_random_id(), + "question_id": generate_random_id(), + "title": f"question_{q+1}", + }, + 2: { + "exercise_id": generate_random_id(), + "question_id": generate_random_id(), + "title": f"question_{q+1}", + "counter_in_exercise": "", + }, + } + + return mapper[v] + + return [get_json_data(v, q) for q in range(random.randint(3, 10))] + + +# flake8: noqa: C901 +def start_generating( + n_facilities, + n_facility_users, + n_facility_admins, + n_facility_coaches, + n_classes, + n_class_coaches, + n_class_learners, + n_groups, + n_group_learners, + n_class_lessons, + n_classs_exams, + n_adhoc_lessons, + n_adhoc_lesson_learners, + n_adhoc_exams, + n_adhoc_exam_learners, +): + + global all_users_base_data + all_users_base_data = read_user_data_file() + + facilities = [] + + for f in range(n_facilities): + new_facility = generate_facility(f"Test Facility_{f+1}", "testing device") + + # authorized users in the facility + facility_coaches_and_admins = [] + + # generating admin/s for the whole facility + for _ in range(n_facility_admins): + new_admin = generate_facility_user(new_facility, n_facility_users) + new_facility.add_admin(new_admin) + facility_coaches_and_admins.append(new_admin) + + # generating coach/s for the whole facility + for _ in range(n_facility_coaches): + facility_coach = generate_facility_user(new_facility, n_facility_users) + new_facility.add_coach(facility_coach) + facility_coaches_and_admins.append(facility_coach) + + # generating class/s + for c in range(n_classes): + class_name = f"class_{c+1}" + random.choice("ABCDEF") + new_class = generate_classroom(class_name, new_facility) + + # generate and assign assignable_coache/s to the class + for _ in range(n_class_coaches): + assignable_coach = generate_facility_user( + new_facility, n_facility_users + ) + # ASSIGNABLE_COACH with respect to the facility + new_facility.add_role(assignable_coach, role_kinds.ASSIGNABLE_COACH) + # COACH with respect to the class + new_class.add_coach(assignable_coach) + + # generate and assign learner/s to the class + all_class_learners = [] + for _ in range(n_class_learners): + class_learner = generate_facility_user(new_facility, n_facility_users) + all_class_learners.append(class_learner) + new_class.add_member(class_learner) + + # 'facility_coaches_and_admins' is constant for all facility classes + # but for each new_class coaches/admins are differnt that's why we construct this for every class + creators_and_assigners_users = [ + *facility_coaches_and_admins, + *new_class.get_coaches(), + *new_class.get_admins(), + ] + + # generating learner_group/s for the above class + for g in range(n_groups): + learner_group = generate_group( + name=f"learner_group_{g}", parent_classroom=new_class + ) + # randomly assign class learners to each learner_group + for class_learner in random.sample( + all_class_learners, n_group_learners + ): + learner_group.add_learner(class_learner) + + # generate and assign lesson/s to the whole class + for l in range(n_class_lessons): + lesson = generate_lesson( + title=f"Lesson_{l+1}", + description=f"Lesson_{l+1} for {class_name}", + collection=new_class, + creator=random.choice(creators_and_assigners_users), + ) + + generate_lesson_assignment( + lesson=lesson, + collection=new_class, + assigner_user=random.choice(creators_and_assigners_users), + ) + + # generate and assign exam/s to the class + for e in range(n_classs_exams): + exam = generate_exam( + title=f"exam_{e+1} for the whole {class_name}", + collection=new_class, + creator=random.choice(creators_and_assigners_users), + ) + + generate_exam_assignment( + exam=exam, + collection=new_class, + assigner_user=random.choice(creators_and_assigners_users), + ) + + # generate lesson/s for specific learners + for l in range(n_adhoc_lessons): + lesson = generate_lesson( + title=f"Lesson_{l+1}", + description=f"Lesson_{l+1} for {class_name}", + collection=new_class, + creator=random.choice(creators_and_assigners_users), + ) + generate_lesson_assignment( + lesson=lesson, + collection=generate_adhoc_group( + name=f"adhoc group_{l+1} for {lesson.title} in {class_name}", + parent_classroom=new_class, + learners=random.sample( + all_class_learners, n_adhoc_lesson_learners + ), + ), + assigner_user=random.choice(creators_and_assigners_users), + ) + + # generate exam/s for specific learners + for e in range(n_adhoc_exams): + + exam = generate_exam( + title=f"exam_{e+1} for specific learners", + collection=new_class, + creator=random.choice(creators_and_assigners_users), + ) + + generate_exam_assignment( + exam=exam, + collection=generate_adhoc_group( + name=f"adhoc group_{l+1} for {exam.title} in {class_name}", + parent_classroom=new_class, + learners=random.sample( + all_class_learners, n_adhoc_exam_learners + ), + ), + assigner_user=random.choice(creators_and_assigners_users), + ) + + # generating left users (not assigned to any collection, just the facility) + n_not_assigned_users = n_facility_users - ( + n_facility_admins + + n_facility_coaches + + ((n_class_coaches + n_class_learners) * n_classes) + ) + + for _ in range(n_not_assigned_users): + generate_facility_user(new_facility, n_facility_users) + + facilities.append(new_facility) + + return facilities + + +class Command(BaseCommand): + def add_arguments(self, parser): + + parser.add_argument( + "--mode", + type=str, + choices=["fixtures", "default_db"], + default="fixtures", + help="where should the data be after generation? dumped into fixtures and deleted, or saved in default db", + ) + + parser.add_argument( + "--facilities", + type=int, + choices=range(1, 10), + default=1, + help="number of facilities to generate", + ) + + parser.add_argument( + "--users", + type=int, + choices=range(20, 500), + default=35, + help="number of total users (learners and coaches and admins) in each facility", + ) + + parser.add_argument( + "--admins", + type=int, + choices=range(1, 5), + default=1, + help="number of facility admins", + ) + + parser.add_argument( + "--coaches", + type=int, + choices=range(1, 10), + default=1, + help="number of facility coaches", + ) + + parser.add_argument( + "--classes", + type=int, + choices=range(1, 30), + default=1, + help="number of classes to generate", + ) + + parser.add_argument( + "--class_coaches", + type=int, + choices=range(1, 5), + default=1, + help="number of assigned coaches per class ", + ) + + parser.add_argument( + "--class_learners", + type=int, + choices=range(1, 100), + default=20, + help="number of learners per class ", + ) + + parser.add_argument( + "--class_lessons", + type=int, + choices=range(1, 20), + default=1, + help="total number of lessons per class", + ) + + parser.add_argument( + "--class_exams", + type=int, + choices=range(1, 20), + default=0, + help="total number of lessons per class", + ) + + parser.add_argument( + "--groups", + type=int, + choices=range(1, 20), + default=0, + help="number of learnergroups to generate per class", + ) + + parser.add_argument( + "--group_members", + type=int, + choices=range(1, 20), + default=5, + help="number of group learners ", + ) + + parser.add_argument( + "--adhoc_lessons", + type=int, + choices=range(1, 20), + default=0, + help="number of lessons assigned for specific learners", + ) + + parser.add_argument( + "--adhoc_lesson_learners", + type=int, + choices=range(1, 20), + default=5, + help="number of learners for the adhoc_lesson", + ) + + parser.add_argument( + "--adhoc_exams", + type=int, + choices=range(1, 20), + default=0, + help="number of exams assigned for specific learners", + ) + + parser.add_argument( + "--adhoc_exam_learners", + type=int, + choices=range(1, 20), + default=5, + help="number of learners for the adhoc_exam", + ) + + def handle(self, *args, **options): + + # Generated Data destination + mode = options["mode"] + + # Facilities + n_facilities = options["facilities"] + n_facility_users = options["users"] + n_facility_admins = options["admins"] + n_facility_coaches = options["coaches"] + + # Classrooms + n_classes = options["classes"] + n_class_coaches = options["class_coaches"] + n_class_learners = options["class_learners"] + n_classs_exams = options["class_exams"] + n_class_lessons = options["class_lessons"] + + # Groups + n_groups = options["groups"] + n_group_learners = options["group_members"] + + # AdHocGroups (for assigning lessons/exams to specific learners) + n_adhoc_lessons = options["adhoc_lessons"] + n_adhoc_lesson_learners = options["adhoc_lesson_learners"] + n_adhoc_exams = options["adhoc_exams"] + n_adhoc_exam_learners = options["adhoc_exam_learners"] + + if mode == "fixtures": + + switch_to_memory() + + facilities = start_generating( + n_facilities, + n_facility_users, + n_facility_admins, + n_facility_coaches, + n_classes, + n_class_coaches, + n_class_learners, + n_groups, + n_group_learners, + n_class_lessons, + n_classs_exams, + n_adhoc_lessons, + n_adhoc_lesson_learners, + n_adhoc_exams, + n_adhoc_exam_learners, + ) + + print( + "\n start dumping fixtures for facilities and all its related data \n" + ) + + # dumping after generation is done + call_command( + "dumpdata", + *get_app_models("kolibriauth", "lessons", "exams"), + indent=4, + # for json file creation to work correctly your pwd (in terminal) have to be ../kolibri/core/auth + # we want to fix that (i.e. creating the file correctly regardless of our current terminal path), how ? + output="fixtures/all_facility_data.json", + interactive=False, + ) + + # although we are in memory (data will be cleared by default) but just in case we didn't switch to memory + [facility.delete() for facility in facilities] + + else: + start_generating() diff --git a/kolibri/core/content/management/commands/generate_content_data.py b/kolibri/core/content/management/commands/generate_content_data.py new file mode 100644 index 00000000000..aa69b75cbe5 --- /dev/null +++ b/kolibri/core/content/management/commands/generate_content_data.py @@ -0,0 +1,543 @@ +import random + +from django.apps import apps +from django.conf import settings +from django.core.management import call_command +from django.core.management.base import BaseCommand +from django.db import connections +from le_utils.constants import content_kinds +from le_utils.constants import format_presets +from le_utils.constants import languages +from le_utils.constants import mastery_criteria +from le_utils.constants.labels import learning_activities +from le_utils.constants.labels.levels import LEVELSLIST +from le_utils.constants.labels.needs import NEEDSLIST +from le_utils.constants.labels.subjects import SUBJECTSLIST + +from kolibri.core.content.models import AssessmentMetaData +from kolibri.core.content.models import ChannelMetadata +from kolibri.core.content.models import ContentNode +from kolibri.core.content.models import ContentTag +from kolibri.core.content.models import File +from kolibri.core.content.models import Language +from kolibri.core.content.models import LocalFile + + +# we are using a set in case we accidentally tried deleting the same object twice +generated_objects = set() + +tags_generated = [] + +IGNORED_KINDS = ["quiz", "zim"] + +ALL_RESOURCES_KINDS = [ + kind.id + for kind in content_kinds.KINDLIST + if kind.id not in IGNORED_KINDS and kind.id != "topic" +] + +RESOURCES_COUNT = len(ALL_RESOURCES_KINDS) +LICENSE_NAME = "testing license" +LICENSE_NAME_DESCRIPTION = ( + "ABC organization authorizes kolibri to use this these resources" +) +LICENSE_OWNER = "ABC org" +MIN_SCHEMA_VERSION = 1 +DEVELOPER_NAME = "bedo khaled" + + +def switch_to_memory(): + print("\n initializing the testing environment in memory....\n") + for db in settings.DATABASES: + settings.DATABASES[db] = { + "ENGINE": "django.db.backends.sqlite3", + "NAME": ":memory:", + } + try: + del connections[db] + except AttributeError: + pass + call_command("migrate", interactive=False, database=db) + + +# get app/s models names that will be dumped +def get_app_models(*apps_names): + models_names = [] + + # first way, extracts only the base (main) models + for app_name in apps_names: + for model in list(apps.get_app_config(app_name).get_models()): + models_names.append(f"{app_name}.{model.__name__}") + + return models_names + + # another way of getting app models (which gets more models but are in lowercase) + + # for app_name in apps_names: + # for model in dict(apps.all_models[app_name]): + # models_names.append(f"{app_name}.{model}") + + # which one should we use ? (i.e which models will be dumped ? the ones extraced through the first or the second way) + + +def generate_random_id(): + import uuid + + return uuid.uuid4().hex + + +# for returning random choices + + +def choices(sequence, k): + return [random.choice(sequence) for _ in range(0, k)] + + +# puprpose: if we have a content node of certain kind what type of file (file_preset) should maps to this node ? + +content_kind_to_file_preset = {} + +# format_presets.PRESETLIST to a dictionary for convenient access +format_prestets_data = {} + +for format_object in format_presets.PRESETLIST: + if format_object.kind: + format_prestets_data[format_object.id] = format_object + + if format_object.kind not in content_kind_to_file_preset: + content_kind_to_file_preset[format_object.kind] = [format_object.id] + else: + content_kind_to_file_preset[format_object.kind].append(format_object.id) + + +def generate_assessmentmetadata(node=None, randomize=False, is_manipulable=False): + number_of_assessments = random.randint(1, 30) + assessment_item_ids = [ + str(generate_random_id()) for _ in range(number_of_assessments) + ] + + random_criteria = random.choice(mastery_criteria.MASTERYCRITERIALIST) + + mapper = { + mastery_criteria.DO_ALL: { + "type": random_criteria, + "n": number_of_assessments, + "m": number_of_assessments, + }, + mastery_criteria.NUM_CORRECT_IN_A_ROW_10: { + "type": random_criteria, + "n": 10, + "m": 10, + }, + mastery_criteria.NUM_CORRECT_IN_A_ROW_2: { + "type": random_criteria, + "n": 2, + "m": 2, + }, + mastery_criteria.NUM_CORRECT_IN_A_ROW_3: { + "type": random_criteria, + "n": 3, + "m": 3, + }, + mastery_criteria.NUM_CORRECT_IN_A_ROW_5: { + "type": random_criteria, + "n": 5, + "m": 5, + }, + mastery_criteria.M_OF_N: { + "type": random_criteria, + "n": random.randint(5, 7), + "m": random.randint(1, 3), + }, + } + + meta_data = AssessmentMetaData.objects.create( + id=generate_random_id(), + contentnode=node, + assessment_item_ids=assessment_item_ids, + number_of_assessments=number_of_assessments, + mastery_model=mapper[random_criteria], + randomize=randomize, + is_manipulable=is_manipulable, + ) + generated_objects.add(meta_data) + return meta_data + + +def generate_some_tags(): + + # dummy tag names + TAG_NAMES = [ + "Math", + "science_related", + "have_fun", + "children", + "experiment", + "bedo_tag", + "course", + "culture", + "introduction", + "whatever", + "another_tag", + "nice tag", + ] + + for tag_name in TAG_NAMES: + + tag = ContentTag.objects.create(tag_name=tag_name, id=generate_random_id()) + tags_generated.append(tag) + generated_objects.add(tag) + + +def get_or_generate_language(lang_id): + try: + return Language.objects.get(id=lang_id) + + except Language.DoesNotExist: + + # fetched languages from le_utils/resources/languagelookup.json + fetched_lang_data = languages.getlang(lang_id) + + if not fetched_lang_data: + return None + new_lang = Language.objects.create( + id=lang_id, + lang_code=fetched_lang_data.primary_code, + lang_subcode=fetched_lang_data.subcode, + lang_name=fetched_lang_data.native_name, + lang_direction=languages.getlang_direction(lang_id), + ) + + generated_objects.add(new_lang) + + return new_lang + + +def generate_localfile(file_preset): + + # this was calculated by taking the average of file_size of localfiles of each extension in QA channel + # so it has to be manully written here as this information doesn't exist, it was calculcated by me, why? + # well instead of just generating random numbers i wanted the file_size values to be more relevant to their corresponding extension + extension_to_file_size = { + "mp4": 16293436.885714285, + "webm": None, + "vtt": 3227.507692307692, + "pdf": 6655360.057142857, + "epub": 13291472.210526315, + "mp3": 2102685.625, + "jpg": 20291943.133333333, + "jpeg": 30457141.25, + "png": 2833124.8260869565, + "gif": None, + "json": 3529.0, + "svg": None, + "graphie": None, + "perseus": 357012.67441860464, + "h5p": 10699889.2, + "zim": None, + "zip": 5285446.041666667, + } + + extensions_choices = format_prestets_data[file_preset].allowed_formats + + extension_to_use = random.choice(extensions_choices) + + new_localfile = LocalFile.objects.create( + id=generate_random_id(), + extension=extension_to_use, + available=True, + file_size=extension_to_file_size[extension_to_use], + ) + + generated_objects.add(new_localfile) + return new_localfile + + +def generate_file(contentnode): + + preset_options = content_kind_to_file_preset[contentnode.kind] + + file_preset = random.choice(preset_options) + + local_file = generate_localfile(file_preset) + + file = File.objects.create( + id=generate_random_id(), + local_file=local_file, + contentnode=contentnode, + lang=contentnode.lang, + supplementary=format_prestets_data[file_preset].supplementary, + thumbnail=format_prestets_data[file_preset].thumbnail, + preset=file_preset, + ) + + return file + + +def generate_channel(name, root_node, channel_id): + + channel = ChannelMetadata.objects.create( + id=channel_id, + name=name, + description="this is the testing channel {name}, generated for testing purposes", + author=DEVELOPER_NAME, + min_schema_version=MIN_SCHEMA_VERSION, + root=root_node, + ) + + return channel + + +def generate_one_contentNode( + kind=None, + title="", + description=None, + channel_id=None, + parent=None, + available=True, + is_root=False, + lang_id="en", + node_tags=[], +): + + kind_to_learninactivity = { + "topic": "", + "slideshow": "", + "document": f"{learning_activities.READ},{learning_activities.REFLECT}", + "video": f"{learning_activities.WATCH},{learning_activities.REFLECT}", + "html5": f"{learning_activities.EXPLORE},{learning_activities.REFLECT}", + "audio": f"{learning_activities.LISTEN},{learning_activities.REFLECT}", + "exercise": f"{learning_activities.PRACTICE},{learning_activities.REFLECT}", + "h5p": f"{learning_activities.EXPLORE}.{learning_activities.REFLECT}", + } + + new_node = ContentNode.objects.create( + id=generate_random_id(), + parent=None if is_root else parent, + channel_id=channel_id, + content_id=generate_random_id(), + kind=kind, + title=title, + lang=get_or_generate_language(lang_id), + license_name=LICENSE_NAME, + license_description=LICENSE_NAME_DESCRIPTION, + description=description, + license_owner=LICENSE_OWNER, + author=DEVELOPER_NAME, + available=available, + learning_activities=kind_to_learninactivity[kind], + categories=",".join(set(choices(SUBJECTSLIST, k=random.randint(1, 10)))), + learner_needs=",".join(set(choices(NEEDSLIST, k=random.randint(1, 5)))), + grade_levels=",".join(set(choices(LEVELSLIST, k=random.randint(1, 2)))), + ) + + if node_tags: + new_node.tags.add(*node_tags) + new_node.save() + + # generate related File object for this node + generate_file(new_node) + + # generate assessment metada for this contentnode if its kind is exercise, correct or no? + if kind == content_kinds.EXERCISE: + generate_assessmentmetadata(node=new_node) + + return new_node + + +def generate_topic( + title="", channel_id=None, parent=None, is_root=False, description="" +): + return generate_one_contentNode( + kind=content_kinds.TOPIC, + title=title, + channel_id=channel_id, + parent=parent, + is_root=is_root, + description=description, + ) + + +def generate_leaf( + title="random leaf node", + resource_kind=None, + channel_id=None, + parent=None, + description="", +): + return generate_one_contentNode( + kind=resource_kind, + title=title, + channel_id=channel_id, + parent=parent, + description=description, + node_tags=random.sample(tags_generated, random.randint(1, 5)), + ) + + +def recurse_and_generate( + parent, + channel_id, + levels, + kind_iterator, + num_children=RESOURCES_COUNT, +): + children = [] + for i in range(num_children): + current_resource_kind = ALL_RESOURCES_KINDS[kind_iterator % RESOURCES_COUNT] + if levels == 0: + current_node = generate_leaf( + title=f"{current_resource_kind}_{i+1}", + resource_kind=current_resource_kind, + channel_id=channel_id, + parent=parent, + ) + + else: + topic_title = f"level {levels}, topic_{i+1}" + # last parent nodes (parent of the actual resources) + if levels == 1: + topic_title = f"level {levels}, {current_resource_kind}_resources" + + current_node = generate_topic( + title=topic_title, + channel_id=channel_id, + parent=parent, + description="", + ) + + current_node.children.add( + *recurse_and_generate( + parent=current_node, + channel_id=channel_id, + levels=levels - 1, + kind_iterator=kind_iterator, + ) + ) + + children.append(current_node) + + kind_iterator += 1 + return children + + +def generate_channels(n_channels, levels): + generated_channels = [] + + print("\n generating channel/s and its related data...\n") + + generate_some_tags() + + for c in range(n_channels): + kind_iterator = 0 + + channel_id = generate_random_id() + + root_node = generate_topic( + title="root node (main folder)", + is_root=True, + channel_id=channel_id, + description="first and main contentnode in this testing tree", + ) + + channel = generate_channel( + name=f"Testing channel _{c+1} of {levels} levels", + root_node=root_node, + channel_id=channel_id, + ) + + # generating tree nodes starting from the root node + root_node.children.add( + *recurse_and_generate( + parent=root_node, + channel_id=channel_id, + levels=levels, + kind_iterator=kind_iterator, + ) + ) + + channel_contents = ContentNode.objects.filter( + channel_id=channel_id, + ).exclude(kind=content_kinds.TOPIC) + + channel.total_resource_count = channel_contents.count() + + for each_content in channel_contents: + if each_content.lang: + channel.included_languages.add(each_content.lang) + + generated_channels.append(channel) + + return generated_channels + + +class Command(BaseCommand): + + help = "generate fixtures/data for the specified app" + + def add_arguments(self, parser): + + parser.add_argument( + "--mode", + type=str, + choices=["fixtures", "default_db"], + default="fixtures", + help="where should the data be after generation? dumped into fixtures and deleted or saved in default db", + ) + + parser.add_argument( + "--n_channels", + type=int, + choices=range(1, 10), + default=1, + help="number of tree levels", + ) + + parser.add_argument( + "--levels", + type=int, + choices=range(1, 10), + default=2, + help="number of tree levels", + ) + + def handle(self, *args, **options): + generating_mode = options["mode"] + n_channels = options["n_channels"] + required_levels = options["levels"] + + if generating_mode == "fixtures": + + # takes much time for switching, alternatives ?? + switch_to_memory() + + channels_generated = generate_channels( + n_channels=n_channels, levels=required_levels + ) + + # dumping after generation is done + print("\n start dumping fixtures for content app \n") + + call_command( + "dumpdata", + *get_app_models("content"), + indent=4, + # for json file creation to work correctly your pwd (in terminal) have to be ../kolibri/core/content + # we want to fix that (i.e. creating the file correctly regardless of our current terminal path), how ? + output="fixtures/all_content_data.json", + interactive=False, + ) + + # although we are in memory (data will be cleared by default) but just in case we didn't switch to memory + [ + each_channel.delete_content_tree_and_files() + for each_channel in channels_generated + ] + + [ + each_generated_object.delete() + for each_generated_object in generated_objects + ] + + else: + generate_channels(n_channels=n_channels, levels=required_levels) diff --git a/requirements/base.txt b/requirements/base.txt index df3a4a20eed..cc85d6f1ccb 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -13,7 +13,7 @@ cheroot==8.5.2 magicbus==4.1.2 futures==3.1.1 # Temporarily pinning this until we can do a Python 2/3 compatible solution of newer versions # pyup: <=3.1.1 more-itertools==5.0.0 # Last Python 2.7 friendly release # pyup: <6.0 -le-utils==0.1.38 +le-utils==0.1.41 jsonfield==2.0.2 requests-toolbelt==0.9.1 morango==0.6.14 From aff830a5455c758ac0c6bc2a4aeba72243468b5a Mon Sep 17 00:00:00 2001 From: bedo Date: Wed, 24 Aug 2022 17:17:25 +0200 Subject: [PATCH 02/10] 2nd version of bedo scripts (fixtures generation) for content,kolibriauth,lessons and exams apps --- .../management/commands/generate_auth_data.py | 143 +++++++++++------- .../commands/generate_content_data.py | 58 +++---- 2 files changed, 110 insertions(+), 91 deletions(-) diff --git a/kolibri/core/auth/management/commands/generate_auth_data.py b/kolibri/core/auth/management/commands/generate_auth_data.py index 0711203ab63..8fd1e40e038 100644 --- a/kolibri/core/auth/management/commands/generate_auth_data.py +++ b/kolibri/core/auth/management/commands/generate_auth_data.py @@ -1,6 +1,5 @@ import csv import datetime -import io import os import random @@ -20,9 +19,6 @@ from kolibri.core.content.management.commands.generate_content_data import ( generate_channels, ) -from kolibri.core.content.management.commands.generate_content_data import ( - get_app_models, -) from kolibri.core.content.management.commands.generate_content_data import ( switch_to_memory, ) @@ -32,6 +28,7 @@ from kolibri.core.exams.models import ExamAssignment from kolibri.core.lessons.models import Lesson from kolibri.core.lessons.models import LessonAssignment +from kolibri.core.utils.csv import open_csv_for_reading from kolibri.utils.time_utils import local_now users_data_iterator = 0 @@ -44,24 +41,28 @@ def generate_random_id(): return uuid.uuid4().hex -def read_user_data_file(): - # not the best way of reading the csv file i know, how to better read the file? - data_path = os.path.abspath( - os.path.join( - os.path.dirname(__file__), - "..", - "..", - "..", - "logger/management/commands/user_data.csv", +def read_user_data_file(file_path): + + if not file_path: + file_path = os.path.abspath( + os.path.join( + os.path.dirname(__file__), + "..", + "..", + "..", + "logger", + "management", + "commands", + "user_data.csv", + ) ) - ) - with io.open(data_path, mode="r", encoding="utf-8") as f: - all_users_base_data = [data for data in csv.DictReader(f)] - return all_users_base_data + global all_users_base_data + file_reader = open_csv_for_reading(file_path) + all_users_base_data = [data for data in csv.DictReader(file_reader)] -# ever time called it gets the next (different) user data from user_data.csv +# ever time gets called it gets the next (different) user data from user_data.csv def get_user_base_data(n_facility_users): global users_data_iterator users_data_iterator += 1 @@ -97,7 +98,7 @@ def get_birth_year(user_age): def generate_superadmin(n_facility_users): user_data = get_user_base_data(n_facility_users) username = user_data["Username"] + "_superuser" - full_name = f"{username} is the device superuser" + full_name = "{} is the device superuser".format(username) FacilityUser.objects.create_superuser( username, "password", @@ -111,7 +112,7 @@ def generate_facility_dataset(facility_name, device_name): facility_settings = facility_presets.mappings[preset_to_use] dataset = FacilityDataset( - description=f"{facility_name} DataSet", + description="{} DataSet".format(facility_name), location=device_name, preset=preset_to_use, registered=random.choice([True, False]), @@ -203,7 +204,7 @@ def get_or_generate_lesson_resources(): channels = ChannelMetadata.objects.all() - # generate a new channel if there are no local channels + # generate new channel/s if there are no local channels if not channels: channels = generate_channels(n_channels=1, levels=2) @@ -239,17 +240,17 @@ def get_json_data(v, q): 0: { "exercise_id": generate_random_id(), "number_of_questions": 6, - "title": f"question_{q+1}", + "title": "question_{}".format(q + 1), }, 1: { "exercise_id": generate_random_id(), "question_id": generate_random_id(), - "title": f"question_{q+1}", + "title": "question_{}".format(q + 1), }, 2: { "exercise_id": generate_random_id(), "question_id": generate_random_id(), - "title": f"question_{q+1}", + "title": "question_{}".format(q + 1), "counter_in_exercise": "", }, } @@ -276,15 +277,17 @@ def start_generating( n_adhoc_lesson_learners, n_adhoc_exams, n_adhoc_exam_learners, + data_path, ): - global all_users_base_data - all_users_base_data = read_user_data_file() + read_user_data_file(data_path) facilities = [] for f in range(n_facilities): - new_facility = generate_facility(f"Test Facility_{f+1}", "testing device") + new_facility = generate_facility( + facility_name="Facility_{}".format(f + 1), device_name="testing device" + ) # authorized users in the facility facility_coaches_and_admins = [] @@ -303,7 +306,7 @@ def start_generating( # generating class/s for c in range(n_classes): - class_name = f"class_{c+1}" + random.choice("ABCDEF") + class_name = "class_{}".format(c + 1) + random.choice("ABCDEF") new_class = generate_classroom(class_name, new_facility) # generate and assign assignable_coache/s to the class @@ -334,7 +337,7 @@ def start_generating( # generating learner_group/s for the above class for g in range(n_groups): learner_group = generate_group( - name=f"learner_group_{g}", parent_classroom=new_class + name="learner_group_{}".format(g), parent_classroom=new_class ) # randomly assign class learners to each learner_group for class_learner in random.sample( @@ -345,8 +348,8 @@ def start_generating( # generate and assign lesson/s to the whole class for l in range(n_class_lessons): lesson = generate_lesson( - title=f"Lesson_{l+1}", - description=f"Lesson_{l+1} for {class_name}", + title="Lesson_{}".format(l + 1), + description="Lesson_{} for {}".format(l + 1, class_name), collection=new_class, creator=random.choice(creators_and_assigners_users), ) @@ -360,7 +363,7 @@ def start_generating( # generate and assign exam/s to the class for e in range(n_classs_exams): exam = generate_exam( - title=f"exam_{e+1} for the whole {class_name}", + title="exam_{} for the whole {}".format(e + 1, class_name), collection=new_class, creator=random.choice(creators_and_assigners_users), ) @@ -374,15 +377,15 @@ def start_generating( # generate lesson/s for specific learners for l in range(n_adhoc_lessons): lesson = generate_lesson( - title=f"Lesson_{l+1}", - description=f"Lesson_{l+1} for {class_name}", + title="Lesson_{}".format(l + 1), + description="Lesson_{} for {}".format(l + 1, class_name), collection=new_class, creator=random.choice(creators_and_assigners_users), ) generate_lesson_assignment( lesson=lesson, collection=generate_adhoc_group( - name=f"adhoc group_{l+1} for {lesson.title} in {class_name}", + name="adhoc_{} in {}".format(lesson.title, class_name), parent_classroom=new_class, learners=random.sample( all_class_learners, n_adhoc_lesson_learners @@ -395,7 +398,7 @@ def start_generating( for e in range(n_adhoc_exams): exam = generate_exam( - title=f"exam_{e+1} for specific learners", + title="exam_{} for specific learners".format(e + 1), collection=new_class, creator=random.choice(creators_and_assigners_users), ) @@ -403,7 +406,7 @@ def start_generating( generate_exam_assignment( exam=exam, collection=generate_adhoc_group( - name=f"adhoc group_{l+1} for {exam.title} in {class_name}", + name="adhoc_{} in {}".format(exam.title, class_name), parent_classroom=new_class, learners=random.sample( all_class_learners, n_adhoc_exam_learners @@ -558,11 +561,21 @@ def add_arguments(self, parser): help="number of learners for the adhoc_exam", ) + parser.add_argument( + "--data_path", + type=str, + default="", + help="path to the csv file which containts users base data", + ) + def handle(self, *args, **options): # Generated Data destination mode = options["mode"] + # Csv data file_path + data_path = options["data_path"] + # Facilities n_facilities = options["facilities"] n_facility_users = options["users"] @@ -591,21 +604,22 @@ def handle(self, *args, **options): switch_to_memory() facilities = start_generating( - n_facilities, - n_facility_users, - n_facility_admins, - n_facility_coaches, - n_classes, - n_class_coaches, - n_class_learners, - n_groups, - n_group_learners, - n_class_lessons, - n_classs_exams, - n_adhoc_lessons, - n_adhoc_lesson_learners, - n_adhoc_exams, - n_adhoc_exam_learners, + n_facilities=n_facilities, + n_facility_users=n_facility_users, + n_facility_admins=n_facility_admins, + n_facility_coaches=n_facility_coaches, + n_classes=n_classes, + n_class_coaches=n_class_coaches, + n_class_learners=n_class_learners, + n_groups=n_groups, + n_group_learners=n_group_learners, + n_class_lessons=n_class_lessons, + n_classs_exams=n_classs_exams, + n_adhoc_lessons=n_adhoc_lessons, + n_adhoc_lesson_learners=n_adhoc_lesson_learners, + n_adhoc_exams=n_adhoc_exams, + n_adhoc_exam_learners=n_adhoc_exam_learners, + data_path=data_path, ) print( @@ -615,10 +629,12 @@ def handle(self, *args, **options): # dumping after generation is done call_command( "dumpdata", - *get_app_models("kolibriauth", "lessons", "exams"), + "kolibriauth", + "lessons", + "exams", indent=4, # for json file creation to work correctly your pwd (in terminal) have to be ../kolibri/core/auth - # we want to fix that (i.e. creating the file correctly regardless of our current terminal path), how ? + # we want to fix that (i.e. creating the file correctly regardless of our current pwd ), how ? output="fixtures/all_facility_data.json", interactive=False, ) @@ -627,4 +643,21 @@ def handle(self, *args, **options): [facility.delete() for facility in facilities] else: - start_generating() + start_generating( + n_facilities=n_facilities, + n_facility_users=n_facility_users, + n_facility_admins=n_facility_admins, + n_facility_coaches=n_facility_coaches, + n_classes=n_classes, + n_class_coaches=n_class_coaches, + n_class_learners=n_class_learners, + n_groups=n_groups, + n_group_learners=n_group_learners, + n_class_lessons=n_class_lessons, + n_classs_exams=n_classs_exams, + n_adhoc_lessons=n_adhoc_lessons, + n_adhoc_lesson_learners=n_adhoc_lesson_learners, + n_adhoc_exams=n_adhoc_exams, + n_adhoc_exam_learners=n_adhoc_exam_learners, + data_path=data_path, + ) diff --git a/kolibri/core/content/management/commands/generate_content_data.py b/kolibri/core/content/management/commands/generate_content_data.py index aa69b75cbe5..3761a6bc1a0 100644 --- a/kolibri/core/content/management/commands/generate_content_data.py +++ b/kolibri/core/content/management/commands/generate_content_data.py @@ -1,6 +1,5 @@ import random -from django.apps import apps from django.conf import settings from django.core.management import call_command from django.core.management.base import BaseCommand @@ -46,6 +45,7 @@ DEVELOPER_NAME = "bedo khaled" +# takes much time to migrate, alternatives ? def switch_to_memory(): print("\n initializing the testing environment in memory....\n") for db in settings.DATABASES: @@ -60,26 +60,6 @@ def switch_to_memory(): call_command("migrate", interactive=False, database=db) -# get app/s models names that will be dumped -def get_app_models(*apps_names): - models_names = [] - - # first way, extracts only the base (main) models - for app_name in apps_names: - for model in list(apps.get_app_config(app_name).get_models()): - models_names.append(f"{app_name}.{model.__name__}") - - return models_names - - # another way of getting app models (which gets more models but are in lowercase) - - # for app_name in apps_names: - # for model in dict(apps.all_models[app_name]): - # models_names.append(f"{app_name}.{model}") - - # which one should we use ? (i.e which models will be dumped ? the ones extraced through the first or the second way) - - def generate_random_id(): import uuid @@ -87,14 +67,11 @@ def generate_random_id(): # for returning random choices - - def choices(sequence, k): return [random.choice(sequence) for _ in range(0, k)] # puprpose: if we have a content node of certain kind what type of file (file_preset) should maps to this node ? - content_kind_to_file_preset = {} # format_presets.PRESETLIST to a dictionary for convenient access @@ -303,12 +280,20 @@ def generate_one_contentNode( kind_to_learninactivity = { "topic": "", "slideshow": "", - "document": f"{learning_activities.READ},{learning_activities.REFLECT}", - "video": f"{learning_activities.WATCH},{learning_activities.REFLECT}", - "html5": f"{learning_activities.EXPLORE},{learning_activities.REFLECT}", - "audio": f"{learning_activities.LISTEN},{learning_activities.REFLECT}", - "exercise": f"{learning_activities.PRACTICE},{learning_activities.REFLECT}", - "h5p": f"{learning_activities.EXPLORE}.{learning_activities.REFLECT}", + "document": "{},{}".format( + learning_activities.READ, learning_activities.REFLECT + ), + "video": "{},{}".format(learning_activities.WATCH, learning_activities.REFLECT), + "html5": "{},{}".format( + learning_activities.EXPLORE, learning_activities.REFLECT + ), + "audio": "{},{}".format( + learning_activities.LISTEN, learning_activities.REFLECT + ), + "exercise": "{},{}".format( + learning_activities.PRACTICE, learning_activities.REFLECT + ), + "h5p": "{}.{}".format(learning_activities.EXPLORE, learning_activities.REFLECT), } new_node = ContentNode.objects.create( @@ -387,17 +372,19 @@ def recurse_and_generate( current_resource_kind = ALL_RESOURCES_KINDS[kind_iterator % RESOURCES_COUNT] if levels == 0: current_node = generate_leaf( - title=f"{current_resource_kind}_{i+1}", + title="{}_{}".format(current_resource_kind, i + 1), resource_kind=current_resource_kind, channel_id=channel_id, parent=parent, ) else: - topic_title = f"level {levels}, topic_{i+1}" + topic_title = "level {}, topic_{}".format(levels, i + 1) # last parent nodes (parent of the actual resources) if levels == 1: - topic_title = f"level {levels}, {current_resource_kind}_resources" + topic_title = "level {}, {}_resources".format( + levels, current_resource_kind + ) current_node = generate_topic( title=topic_title, @@ -441,7 +428,7 @@ def generate_channels(n_channels, levels): ) channel = generate_channel( - name=f"Testing channel _{c+1} of {levels} levels", + name="Testing channel _{} of {} levels".format(c + 1, levels), root_node=root_node, channel_id=channel_id, ) @@ -508,7 +495,6 @@ def handle(self, *args, **options): if generating_mode == "fixtures": - # takes much time for switching, alternatives ?? switch_to_memory() channels_generated = generate_channels( @@ -520,7 +506,7 @@ def handle(self, *args, **options): call_command( "dumpdata", - *get_app_models("content"), + "content", indent=4, # for json file creation to work correctly your pwd (in terminal) have to be ../kolibri/core/content # we want to fix that (i.e. creating the file correctly regardless of our current terminal path), how ? From 2df152577419311cd8be5574ba18d02c97d68691 Mon Sep 17 00:00:00 2001 From: bedo Date: Tue, 30 Aug 2022 21:51:16 +0200 Subject: [PATCH 03/10] 3rd version of fixtures/default_db generation for content, kolibriauth, lessons and exams apps --- .../management/commands/generate_auth_data.py | 155 +++---- .../commands/generate_content_data.py | 396 ++++++++++-------- 2 files changed, 310 insertions(+), 241 deletions(-) diff --git a/kolibri/core/auth/management/commands/generate_auth_data.py b/kolibri/core/auth/management/commands/generate_auth_data.py index 8fd1e40e038..4e90584a9d4 100644 --- a/kolibri/core/auth/management/commands/generate_auth_data.py +++ b/kolibri/core/auth/management/commands/generate_auth_data.py @@ -1,7 +1,9 @@ import csv import datetime +import logging import os import random +import uuid from django.core.management import call_command from django.core.management.base import BaseCommand @@ -31,14 +33,7 @@ from kolibri.core.utils.csv import open_csv_for_reading from kolibri.utils.time_utils import local_now -users_data_iterator = 0 -all_users_base_data = [] - - -def generate_random_id(): - import uuid - - return uuid.uuid4().hex +logger = logging.getLogger(__name__) def read_user_data_file(file_path): @@ -57,34 +52,33 @@ def read_user_data_file(file_path): ) ) - global all_users_base_data file_reader = open_csv_for_reading(file_path) - all_users_base_data = [data for data in csv.DictReader(file_reader)] + return [data for data in csv.DictReader(file_reader)] + + +def generator(): + for i in range(500): + yield i + i += 1 -# ever time gets called it gets the next (different) user data from user_data.csv -def get_user_base_data(n_facility_users): - global users_data_iterator - users_data_iterator += 1 - # making sure we aren't generating more users than the specefied number of total facility users - if users_data_iterator > n_facility_users: - exit() - return all_users_base_data[users_data_iterator - 1] +users_data_iterator = generator() -def generate_facility_user(facility, n_facility_users): +def generate_facility_user(facility, all_users_base_data): def get_birth_year(user_age): current_year = datetime.datetime.now().year return str(current_year - int(user_age)) - user_data = get_user_base_data(n_facility_users) + i = next(users_data_iterator) + user_data = all_users_base_data[i] user = FacilityUser( username=user_data["Username"], full_name=user_data["GivenName"] + " " + user_data["Surname"], birth_year=get_birth_year(user_data["Age"]), gender=random.choice(demographics.choices)[0], - id_number=str(users_data_iterator), + id_number=str(i), facility=facility, ) # dummy password @@ -95,8 +89,10 @@ def get_birth_year(user_age): # the following function doesn't work, it generates nothing (not throwing errors though) # and when called it stops the execution of the script! don't know why -def generate_superadmin(n_facility_users): - user_data = get_user_base_data(n_facility_users) +def generate_superadmin(all_users_base_data): + i = next(users_data_iterator) + user_data = all_users_base_data[i] + username = user_data["Username"] + "_superuser" full_name = "{} is the device superuser".format(username) FacilityUser.objects.create_superuser( @@ -206,7 +202,7 @@ def get_or_generate_lesson_resources(): # generate new channel/s if there are no local channels if not channels: - channels = generate_channels(n_channels=1, levels=2) + channels = generate_channels(n_channels=1, levels=2, n_children=3) channel = random.choice(channels) @@ -238,18 +234,18 @@ def get_json_data(v, q): # model_version_to_question_sources_mapper mapper = { 0: { - "exercise_id": generate_random_id(), + "exercise_id": uuid.uuid4().hex, "number_of_questions": 6, "title": "question_{}".format(q + 1), }, 1: { - "exercise_id": generate_random_id(), - "question_id": generate_random_id(), + "exercise_id": uuid.uuid4().hex, + "question_id": uuid.uuid4().hex, "title": "question_{}".format(q + 1), }, 2: { - "exercise_id": generate_random_id(), - "question_id": generate_random_id(), + "exercise_id": uuid.uuid4().hex, + "question_id": uuid.uuid4().hex, "title": "question_{}".format(q + 1), "counter_in_exercise": "", }, @@ -263,9 +259,9 @@ def get_json_data(v, q): # flake8: noqa: C901 def start_generating( n_facilities, - n_facility_users, n_facility_admins, n_facility_coaches, + n_not_assigned_users, n_classes, n_class_coaches, n_class_learners, @@ -277,13 +273,13 @@ def start_generating( n_adhoc_lesson_learners, n_adhoc_exams, n_adhoc_exam_learners, - data_path, + file_path, ): - read_user_data_file(data_path) - facilities = [] + all_users_base_data = read_user_data_file(file_path) + for f in range(n_facilities): new_facility = generate_facility( facility_name="Facility_{}".format(f + 1), device_name="testing device" @@ -294,13 +290,13 @@ def start_generating( # generating admin/s for the whole facility for _ in range(n_facility_admins): - new_admin = generate_facility_user(new_facility, n_facility_users) + new_admin = generate_facility_user(new_facility, all_users_base_data) new_facility.add_admin(new_admin) facility_coaches_and_admins.append(new_admin) # generating coach/s for the whole facility for _ in range(n_facility_coaches): - facility_coach = generate_facility_user(new_facility, n_facility_users) + facility_coach = generate_facility_user(new_facility, all_users_base_data) new_facility.add_coach(facility_coach) facility_coaches_and_admins.append(facility_coach) @@ -312,7 +308,7 @@ def start_generating( # generate and assign assignable_coache/s to the class for _ in range(n_class_coaches): assignable_coach = generate_facility_user( - new_facility, n_facility_users + new_facility, all_users_base_data ) # ASSIGNABLE_COACH with respect to the facility new_facility.add_role(assignable_coach, role_kinds.ASSIGNABLE_COACH) @@ -322,18 +318,24 @@ def start_generating( # generate and assign learner/s to the class all_class_learners = [] for _ in range(n_class_learners): - class_learner = generate_facility_user(new_facility, n_facility_users) + class_learner = generate_facility_user( + new_facility, all_users_base_data + ) all_class_learners.append(class_learner) new_class.add_member(class_learner) # 'facility_coaches_and_admins' is constant for all facility classes # but for each new_class coaches/admins are differnt that's why we construct this for every class creators_and_assigners_users = [ - *facility_coaches_and_admins, *new_class.get_coaches(), *new_class.get_admins(), ] + [ + creators_and_assigners_users.append(authorized_facility_user) + for authorized_facility_user in facility_coaches_and_admins + ] + # generating learner_group/s for the above class for g in range(n_groups): learner_group = generate_group( @@ -416,14 +418,8 @@ def start_generating( ) # generating left users (not assigned to any collection, just the facility) - n_not_assigned_users = n_facility_users - ( - n_facility_admins - + n_facility_coaches - + ((n_class_coaches + n_class_learners) * n_classes) - ) - for _ in range(n_not_assigned_users): - generate_facility_user(new_facility, n_facility_users) + generate_facility_user(new_facility, all_users_base_data) facilities.append(new_facility) @@ -437,8 +433,20 @@ def add_arguments(self, parser): "--mode", type=str, choices=["fixtures", "default_db"], - default="fixtures", - help="where should the data be after generation? dumped into fixtures and deleted, or saved in default db", + default="default_db", + help="data destination after generation, dumped into fixtures and deleted, or saved in default db", + ) + + parser.add_argument( + "--fixtures_path", + type=str, + ) + + parser.add_argument( + "--file_path", + type=str, + default="", + help="path to the csv file which containts users base data", ) parser.add_argument( @@ -450,11 +458,11 @@ def add_arguments(self, parser): ) parser.add_argument( - "--users", + "--not_assigned_users", type=int, - choices=range(20, 500), - default=35, - help="number of total users (learners and coaches and admins) in each facility", + choices=range(5, 20), + default=5, + help="number of facility users that aren't assigned to any collection", ) parser.add_argument( @@ -477,7 +485,7 @@ def add_arguments(self, parser): "--classes", type=int, choices=range(1, 30), - default=1, + default=2, help="number of classes to generate", ) @@ -500,8 +508,8 @@ def add_arguments(self, parser): parser.add_argument( "--class_lessons", type=int, - choices=range(1, 20), - default=1, + choices=range(5, 20), + default=5, help="total number of lessons per class", ) @@ -561,26 +569,22 @@ def add_arguments(self, parser): help="number of learners for the adhoc_exam", ) - parser.add_argument( - "--data_path", - type=str, - default="", - help="path to the csv file which containts users base data", - ) - def handle(self, *args, **options): # Generated Data destination mode = options["mode"] - # Csv data file_path - data_path = options["data_path"] + # Fixtures File destination + fixtures_path = options["fixtures_path"] + + # users_base_data file path + file_path = options["file_path"] # Facilities n_facilities = options["facilities"] - n_facility_users = options["users"] n_facility_admins = options["admins"] n_facility_coaches = options["coaches"] + n_not_assigned_users = options["not_assigned_users"] # Classrooms n_classes = options["classes"] @@ -599,15 +603,21 @@ def handle(self, *args, **options): n_adhoc_exams = options["adhoc_exams"] n_adhoc_exam_learners = options["adhoc_exam_learners"] + logger.info("\n start generating facility/s...\n") + if mode == "fixtures": + if not fixtures_path: + raise ValueError( + "\n--fixtures_path is missing : please provide a fixtures file path" + ) switch_to_memory() facilities = start_generating( n_facilities=n_facilities, - n_facility_users=n_facility_users, n_facility_admins=n_facility_admins, n_facility_coaches=n_facility_coaches, + n_not_assigned_users=n_not_assigned_users, n_classes=n_classes, n_class_coaches=n_class_coaches, n_class_learners=n_class_learners, @@ -619,12 +629,10 @@ def handle(self, *args, **options): n_adhoc_lesson_learners=n_adhoc_lesson_learners, n_adhoc_exams=n_adhoc_exams, n_adhoc_exam_learners=n_adhoc_exam_learners, - data_path=data_path, + file_path=file_path, ) - print( - "\n start dumping fixtures for facilities and all its related data \n" - ) + logger.info("\n dumping and creating fixtures for generated channels... \n") # dumping after generation is done call_command( @@ -633,9 +641,7 @@ def handle(self, *args, **options): "lessons", "exams", indent=4, - # for json file creation to work correctly your pwd (in terminal) have to be ../kolibri/core/auth - # we want to fix that (i.e. creating the file correctly regardless of our current pwd ), how ? - output="fixtures/all_facility_data.json", + output=fixtures_path, interactive=False, ) @@ -645,9 +651,9 @@ def handle(self, *args, **options): else: start_generating( n_facilities=n_facilities, - n_facility_users=n_facility_users, n_facility_admins=n_facility_admins, n_facility_coaches=n_facility_coaches, + n_not_assigned_users=n_not_assigned_users, n_classes=n_classes, n_class_coaches=n_class_coaches, n_class_learners=n_class_learners, @@ -659,5 +665,6 @@ def handle(self, *args, **options): n_adhoc_lesson_learners=n_adhoc_lesson_learners, n_adhoc_exams=n_adhoc_exams, n_adhoc_exam_learners=n_adhoc_exam_learners, - data_path=data_path, + file_path=file_path, ) + logger.info("\n done\n") diff --git a/kolibri/core/content/management/commands/generate_content_data.py b/kolibri/core/content/management/commands/generate_content_data.py index 3761a6bc1a0..65697513ee7 100644 --- a/kolibri/core/content/management/commands/generate_content_data.py +++ b/kolibri/core/content/management/commands/generate_content_data.py @@ -1,4 +1,6 @@ +import logging import random +import uuid from django.conf import settings from django.core.management import call_command @@ -22,11 +24,15 @@ from kolibri.core.content.models import LocalFile +logger = logging.getLogger(__name__) + + # we are using a set in case we accidentally tried deleting the same object twice generated_objects = set() tags_generated = [] +# they are not used in kolibri yet IGNORED_KINDS = ["quiz", "zim"] ALL_RESOURCES_KINDS = [ @@ -47,7 +53,7 @@ # takes much time to migrate, alternatives ? def switch_to_memory(): - print("\n initializing the testing environment in memory....\n") + logger.info("\n initializing the testing environment in memory....\n") for db in settings.DATABASES: settings.DATABASES[db] = { "ENGINE": "django.db.backends.sqlite3", @@ -60,20 +66,11 @@ def switch_to_memory(): call_command("migrate", interactive=False, database=db) -def generate_random_id(): - import uuid - - return uuid.uuid4().hex - - # for returning random choices def choices(sequence, k): return [random.choice(sequence) for _ in range(0, k)] -# puprpose: if we have a content node of certain kind what type of file (file_preset) should maps to this node ? -content_kind_to_file_preset = {} - # format_presets.PRESETLIST to a dictionary for convenient access format_prestets_data = {} @@ -81,17 +78,95 @@ def choices(sequence, k): if format_object.kind: format_prestets_data[format_object.id] = format_object - if format_object.kind not in content_kind_to_file_preset: - content_kind_to_file_preset[format_object.kind] = [format_object.id] - else: - content_kind_to_file_preset[format_object.kind].append(format_object.id) + +# purpose : if we have a node of certain kind what type of main_file_preset (not supplementary) should map to that node +content_kind_to_main_file_preset = { + content_kinds.VIDEO: [format_presets.VIDEO_LOW_RES, format_presets.VIDEO_HIGH_RES], + content_kinds.AUDIO: [format_presets.AUDIO], + content_kinds.EXERCISE: [format_presets.EXERCISE, format_presets.QTI_ZIP], + content_kinds.DOCUMENT: [format_presets.DOCUMENT, format_presets.EPUB], + content_kinds.HTML5: [format_presets.HTML5_ZIP], + content_kinds.SLIDESHOW: [ + format_presets.SLIDESHOW_IMAGE, + format_presets.SLIDESHOW_MANIFEST, + ], + content_kinds.H5P: [format_presets.H5P_ZIP], + content_kinds.TOPIC: [format_presets.TOPIC_THUMBNAIL], +} + +# purpose : generates thumbnail preset along with the main file preset (both map to the same node) +main_file_preset_to_thumbnail_preset = { + # just two exceptions as these file_preset are very common together + format_presets.SLIDESHOW_IMAGE: [ + format_presets.SLIDESHOW_THUMBNAIL, + format_presets.SLIDESHOW_MANIFEST, + ], + format_presets.SLIDESHOW_MANIFEST: [format_presets.SLIDESHOW_IMAGE], + format_presets.VIDEO_LOW_RES: [format_presets.VIDEO_THUMBNAIL], + format_presets.VIDEO_HIGH_RES: [format_presets.VIDEO_THUMBNAIL], + format_presets.AUDIO: [format_presets.AUDIO_THUMBNAIL], + format_presets.EXERCISE: [format_presets.EXERCISE_THUMBNAIL], + format_presets.ZIM: [format_presets.ZIM_THUMBNAIL], + format_presets.HTML5_ZIP: [format_presets.HTML5_THUMBNAIL], + format_presets.H5P_ZIP: [format_presets.H5P_THUMBNAIL], + format_presets.QTI_ZIP: [format_presets.QTI_THUMBNAIL], + format_presets.DOCUMENT: [format_presets.DOCUMENT_THUMBNAIL], + format_presets.EPUB: [format_presets.DOCUMENT_THUMBNAIL], +} + + +def generate_some_tags(): + + # dummy tag names + TAG_NAMES = [ + "Math", + "science_related", + "have_fun", + "children", + "experiment", + "bedo_tag", + "course", + "culture", + "introduction", + "whatever", + "another_tag", + "nice tag", + ] + + for tag_name in TAG_NAMES: + + tag = ContentTag.objects.create(tag_name=tag_name, id=uuid.uuid4().hex) + tags_generated.append(tag) + generated_objects.add(tag) + + +def get_or_generate_language(lang_id): + try: + return Language.objects.get(id=lang_id) + + except Language.DoesNotExist: + + # fetched languages from le_utils/resources/languagelookup.json + fetched_lang_data = languages.getlang(lang_id) + + if not fetched_lang_data: + return None + new_lang = Language.objects.create( + id=lang_id, + lang_code=fetched_lang_data.primary_code, + lang_subcode=fetched_lang_data.subcode, + lang_name=fetched_lang_data.native_name, + lang_direction=languages.getlang_direction(lang_id), + ) + + generated_objects.add(new_lang) + + return new_lang def generate_assessmentmetadata(node=None, randomize=False, is_manipulable=False): number_of_assessments = random.randint(1, 30) - assessment_item_ids = [ - str(generate_random_id()) for _ in range(number_of_assessments) - ] + assessment_item_ids = [str(uuid.uuid4().hex) for _ in range(number_of_assessments)] random_criteria = random.choice(mastery_criteria.MASTERYCRITERIALIST) @@ -129,7 +204,7 @@ def generate_assessmentmetadata(node=None, randomize=False, is_manipulable=False } meta_data = AssessmentMetaData.objects.create( - id=generate_random_id(), + id=uuid.uuid4().hex, contentnode=node, assessment_item_ids=assessment_item_ids, number_of_assessments=number_of_assessments, @@ -141,53 +216,18 @@ def generate_assessmentmetadata(node=None, randomize=False, is_manipulable=False return meta_data -def generate_some_tags(): - - # dummy tag names - TAG_NAMES = [ - "Math", - "science_related", - "have_fun", - "children", - "experiment", - "bedo_tag", - "course", - "culture", - "introduction", - "whatever", - "another_tag", - "nice tag", - ] - - for tag_name in TAG_NAMES: - - tag = ContentTag.objects.create(tag_name=tag_name, id=generate_random_id()) - tags_generated.append(tag) - generated_objects.add(tag) - - -def get_or_generate_language(lang_id): - try: - return Language.objects.get(id=lang_id) - - except Language.DoesNotExist: - - # fetched languages from le_utils/resources/languagelookup.json - fetched_lang_data = languages.getlang(lang_id) - - if not fetched_lang_data: - return None - new_lang = Language.objects.create( - id=lang_id, - lang_code=fetched_lang_data.primary_code, - lang_subcode=fetched_lang_data.subcode, - lang_name=fetched_lang_data.native_name, - lang_direction=languages.getlang_direction(lang_id), - ) +def generate_channel(name, root_node, channel_id): - generated_objects.add(new_lang) + channel = ChannelMetadata.objects.create( + id=channel_id, + name=name, + description="Testing channel generated by Bedo {}".format(name), + author=DEVELOPER_NAME, + min_schema_version=MIN_SCHEMA_VERSION, + root=root_node, + ) - return new_lang + return channel def generate_localfile(file_preset): @@ -220,7 +260,7 @@ def generate_localfile(file_preset): extension_to_use = random.choice(extensions_choices) new_localfile = LocalFile.objects.create( - id=generate_random_id(), + id=uuid.uuid4().hex, extension=extension_to_use, available=True, file_size=extension_to_file_size[extension_to_use], @@ -232,47 +272,45 @@ def generate_localfile(file_preset): def generate_file(contentnode): - preset_options = content_kind_to_file_preset[contentnode.kind] + main_preset = random.choice(content_kind_to_main_file_preset[contentnode.kind]) + + # generating the thumbnail_preset file (not supplementary preset) + # aka checking if it's not a prest of 'topic' node (e.g. topic_thumbnail) + if main_preset in main_file_preset_to_thumbnail_preset: - file_preset = random.choice(preset_options) + thumbnail_preset = random.choice( + main_file_preset_to_thumbnail_preset[main_preset] + ) - local_file = generate_localfile(file_preset) + File.objects.create( + id=uuid.uuid4().hex, + local_file=generate_localfile(thumbnail_preset), + contentnode=contentnode, + lang=contentnode.lang, + supplementary=format_prestets_data[thumbnail_preset].supplementary, + thumbnail=format_prestets_data[thumbnail_preset].thumbnail, + preset=thumbnail_preset, + ) - file = File.objects.create( - id=generate_random_id(), - local_file=local_file, + # generating the main_preset file (most probably a renderable resource) + File.objects.create( + id=uuid.uuid4().hex, + local_file=generate_localfile(main_preset), contentnode=contentnode, lang=contentnode.lang, - supplementary=format_prestets_data[file_preset].supplementary, - thumbnail=format_prestets_data[file_preset].thumbnail, - preset=file_preset, + supplementary=format_prestets_data[main_preset].supplementary, + thumbnail=format_prestets_data[main_preset].thumbnail, + preset=main_preset, ) - return file - - -def generate_channel(name, root_node, channel_id): - - channel = ChannelMetadata.objects.create( - id=channel_id, - name=name, - description="this is the testing channel {name}, generated for testing purposes", - author=DEVELOPER_NAME, - min_schema_version=MIN_SCHEMA_VERSION, - root=root_node, - ) - - return channel - def generate_one_contentNode( - kind=None, - title="", - description=None, - channel_id=None, + kind, + title, + channel_id, + description="", parent=None, available=True, - is_root=False, lang_id="en", node_tags=[], ): @@ -297,10 +335,10 @@ def generate_one_contentNode( } new_node = ContentNode.objects.create( - id=generate_random_id(), - parent=None if is_root else parent, + id=uuid.uuid4().hex, + parent=parent, channel_id=channel_id, - content_id=generate_random_id(), + content_id=uuid.uuid4().hex, kind=kind, title=title, lang=get_or_generate_language(lang_id), @@ -330,24 +368,21 @@ def generate_one_contentNode( return new_node -def generate_topic( - title="", channel_id=None, parent=None, is_root=False, description="" -): +def generate_topic(parent=None, title="topic node", channel_id=None, description=""): return generate_one_contentNode( kind=content_kinds.TOPIC, title=title, channel_id=channel_id, parent=parent, - is_root=is_root, description=description, ) def generate_leaf( - title="random leaf node", + parent, + title="leaf node", resource_kind=None, channel_id=None, - parent=None, description="", ): return generate_one_contentNode( @@ -360,87 +395,86 @@ def generate_leaf( ) -def recurse_and_generate( - parent, - channel_id, - levels, - kind_iterator, - num_children=RESOURCES_COUNT, -): - children = [] - for i in range(num_children): - current_resource_kind = ALL_RESOURCES_KINDS[kind_iterator % RESOURCES_COUNT] - if levels == 0: - current_node = generate_leaf( - title="{}_{}".format(current_resource_kind, i + 1), - resource_kind=current_resource_kind, - channel_id=channel_id, - parent=parent, - ) +def recurse_and_generate(parent, channel_id, levels, kind, n_children): - else: - topic_title = "level {}, topic_{}".format(levels, i + 1) - # last parent nodes (parent of the actual resources) - if levels == 1: - topic_title = "level {}, {}_resources".format( - levels, current_resource_kind + if levels == 0: + children_nodes = [] + for child_number in range(n_children): + children_nodes.append( + generate_leaf( + title="{} content_{}".format(kind, child_number + 1), + resource_kind=kind, + channel_id=channel_id, + parent=parent, ) + ) + return children_nodes - current_node = generate_topic( - title=topic_title, - channel_id=channel_id, + else: + current_node = generate_topic( + title="Level {} {}_resources".format(levels, kind), + channel_id=channel_id, + parent=parent, + description="", + ) + + current_node.children.add( + *recurse_and_generate( parent=parent, - description="", + channel_id=channel_id, + levels=levels - 1, + kind=kind, + n_children=n_children, ) + ) - current_node.children.add( - *recurse_and_generate( - parent=current_node, - channel_id=channel_id, - levels=levels - 1, - kind_iterator=kind_iterator, - ) - ) + return [current_node] - children.append(current_node) - kind_iterator += 1 - return children +def generate_tree_nodes(root_node, channel_id, levels, n_children): + for each_kind in ALL_RESOURCES_KINDS: + root_node.children.add( + *recurse_and_generate( + parent=root_node, + channel_id=channel_id, + levels=levels, + kind=each_kind, + n_children=n_children, + ) + ) + + +def generate_channels(n_channels, levels, n_children): -def generate_channels(n_channels, levels): generated_channels = [] - print("\n generating channel/s and its related data...\n") + logger.info("\n generating channel/s and its related data...\n") generate_some_tags() for c in range(n_channels): - kind_iterator = 0 - channel_id = generate_random_id() + channel_id = uuid.uuid4().hex root_node = generate_topic( - title="root node (main folder)", - is_root=True, + title="Root Node of Channel_{}".format(c + 1), channel_id=channel_id, - description="first and main contentnode in this testing tree", + description="First Node of channel tree", ) channel = generate_channel( - name="Testing channel _{} of {} levels".format(c + 1, levels), + name="Channel_{} of {} levels".format(c + 1, levels), root_node=root_node, channel_id=channel_id, ) # generating tree nodes starting from the root node - root_node.children.add( - *recurse_and_generate( - parent=root_node, - channel_id=channel_id, - levels=levels, - kind_iterator=kind_iterator, - ) + generate_tree_nodes( + root_node=root_node, + channel_id=channel_id, + levels=levels, + n_children=n_children, ) channel_contents = ContentNode.objects.filter( @@ -468,12 +502,17 @@ def add_arguments(self, parser): "--mode", type=str, choices=["fixtures", "default_db"], - default="fixtures", - help="where should the data be after generation? dumped into fixtures and deleted or saved in default db", + default="default_db", + help="data destination after generation, dumped into fixtures and deleted, or saved in default db", ) parser.add_argument( - "--n_channels", + "--fixtures_path", + type=str, + ) + + parser.add_argument( + "--channels", type=int, choices=range(1, 10), default=1, @@ -488,29 +527,48 @@ def add_arguments(self, parser): help="number of tree levels", ) + parser.add_argument( + "--children", + type=int, + choices=range(1, 10), + default=3, + help="number of content resources children", + ) + def handle(self, *args, **options): + generating_mode = options["mode"] - n_channels = options["n_channels"] + n_channels = options["channels"] required_levels = options["levels"] + n_children = options["children"] + + # Fixtures File destination + fixtures_path = options["fixtures_path"] + + logger.info("\n start generating channel/s...\n") if generating_mode == "fixtures": + if not fixtures_path: + raise ValueError( + "\n--fixtures_path is missing : please provide a fixtures file path" + ) + switch_to_memory() channels_generated = generate_channels( - n_channels=n_channels, levels=required_levels + n_channels=n_channels, levels=required_levels, n_children=n_children ) - # dumping after generation is done - print("\n start dumping fixtures for content app \n") + logger.info( + "\n dumping and creating fixtures for facilities and its data... \n" + ) call_command( "dumpdata", "content", indent=4, - # for json file creation to work correctly your pwd (in terminal) have to be ../kolibri/core/content - # we want to fix that (i.e. creating the file correctly regardless of our current terminal path), how ? - output="fixtures/all_content_data.json", + output=fixtures_path, interactive=False, ) @@ -526,4 +584,8 @@ def handle(self, *args, **options): ] else: - generate_channels(n_channels=n_channels, levels=required_levels) + + generate_channels( + n_channels=n_channels, levels=required_levels, n_children=n_children + ) + logger.info("\n done\n") From e562e82a71ebe379b7a97586062b875959da6ed6 Mon Sep 17 00:00:00 2001 From: bedo Date: Tue, 30 Aug 2022 22:12:47 +0200 Subject: [PATCH 04/10] 4th version of fixtures/default_db generation for content, kolibriauth, lessons and exams apps --- .../management/commands/generate_auth_data.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/kolibri/core/auth/management/commands/generate_auth_data.py b/kolibri/core/auth/management/commands/generate_auth_data.py index 4e90584a9d4..3ac55f3c2d4 100644 --- a/kolibri/core/auth/management/commands/generate_auth_data.py +++ b/kolibri/core/auth/management/commands/generate_auth_data.py @@ -324,11 +324,18 @@ def start_generating( all_class_learners.append(class_learner) new_class.add_member(class_learner) - # 'facility_coaches_and_admins' is constant for all facility classes - # but for each new_class coaches/admins are differnt that's why we construct this for every class - creators_and_assigners_users = [ - *new_class.get_coaches(), - *new_class.get_admins(), + # 'facility_coaches_and_admins' is the same across all facility classes + # but for each new_class class_coaches/class_admins are differnt that's why we construct this for every class + creators_and_assigners_users = [] + + [ + creators_and_assigners_users.append(class_coach) + for class_coach in new_class.get_coaches() + ] + + [ + creators_and_assigners_users.append(class_admin) + for class_admin in new_class.get_admins() ] [ From 41f192ccd6a4692485fd8a76072baeb7ab822962 Mon Sep 17 00:00:00 2001 From: bedo Date: Mon, 5 Sep 2022 01:42:33 +0200 Subject: [PATCH 05/10] default to random if no resource_kind is specefied else generate a tree of only that resource_kidn --- .../management/commands/generate_auth_data.py | 14 +- .../commands/generate_content_data.py | 136 +++++++++--------- 2 files changed, 77 insertions(+), 73 deletions(-) diff --git a/kolibri/core/auth/management/commands/generate_auth_data.py b/kolibri/core/auth/management/commands/generate_auth_data.py index 3ac55f3c2d4..4625a2a21fd 100644 --- a/kolibri/core/auth/management/commands/generate_auth_data.py +++ b/kolibri/core/auth/management/commands/generate_auth_data.py @@ -281,6 +281,7 @@ def start_generating( all_users_base_data = read_user_data_file(file_path) for f in range(n_facilities): + new_facility = generate_facility( facility_name="Facility_{}".format(f + 1), device_name="testing device" ) @@ -449,6 +450,8 @@ def add_arguments(self, parser): type=str, ) + parser.add_argument("--seed", type=int, default=1, help="Random seed") + parser.add_argument( "--file_path", type=str, @@ -578,6 +581,8 @@ def add_arguments(self, parser): def handle(self, *args, **options): + seed_n = options["seed"] + # Generated Data destination mode = options["mode"] @@ -610,13 +615,16 @@ def handle(self, *args, **options): n_adhoc_exams = options["adhoc_exams"] n_adhoc_exam_learners = options["adhoc_exam_learners"] - logger.info("\n start generating facility/s...\n") + # Set the random seed so that all operations will be randomized predictably + random.seed(seed_n) + + logger.info("\nstart generating facility/s...\n") if mode == "fixtures": if not fixtures_path: raise ValueError( - "\n--fixtures_path is missing : please provide a fixtures file path" + "\n--fixtures_path is missing : please provide a fixtures file path\n" ) switch_to_memory() @@ -639,7 +647,7 @@ def handle(self, *args, **options): file_path=file_path, ) - logger.info("\n dumping and creating fixtures for generated channels... \n") + logger.info("\ndumping and creating fixtures for generated facilites... \n") # dumping after generation is done call_command( diff --git a/kolibri/core/content/management/commands/generate_content_data.py b/kolibri/core/content/management/commands/generate_content_data.py index 65697513ee7..9c3b4240c8a 100644 --- a/kolibri/core/content/management/commands/generate_content_data.py +++ b/kolibri/core/content/management/commands/generate_content_data.py @@ -26,13 +26,12 @@ logger = logging.getLogger(__name__) - # we are using a set in case we accidentally tried deleting the same object twice generated_objects = set() tags_generated = [] -# they are not used in kolibri yet +# not used in kolibri yet IGNORED_KINDS = ["quiz", "zim"] ALL_RESOURCES_KINDS = [ @@ -41,7 +40,6 @@ if kind.id not in IGNORED_KINDS and kind.id != "topic" ] -RESOURCES_COUNT = len(ALL_RESOURCES_KINDS) LICENSE_NAME = "testing license" LICENSE_NAME_DESCRIPTION = ( "ABC organization authorizes kolibri to use this these resources" @@ -221,7 +219,7 @@ def generate_channel(name, root_node, channel_id): channel = ChannelMetadata.objects.create( id=channel_id, name=name, - description="Testing channel generated by Bedo {}".format(name), + description="Testing channel generated by Bedo", author=DEVELOPER_NAME, min_schema_version=MIN_SCHEMA_VERSION, root=root_node, @@ -380,14 +378,13 @@ def generate_topic(parent=None, title="topic node", channel_id=None, description def generate_leaf( parent, - title="leaf node", - resource_kind=None, - channel_id=None, + resource_kind, + channel_id, description="", ): return generate_one_contentNode( kind=resource_kind, - title=title, + title="{} resource".format(resource_kind), channel_id=channel_id, parent=parent, description=description, @@ -395,61 +392,48 @@ def generate_leaf( ) -def recurse_and_generate(parent, channel_id, levels, kind, n_children): - - if levels == 0: - children_nodes = [] - for child_number in range(n_children): - children_nodes.append( - generate_leaf( - title="{} content_{}".format(kind, child_number + 1), - resource_kind=kind, +def recurse_and_generate(channel_id, parent, levels, n_children, resources_kind): + children = [] + for i in range(n_children): + if levels == 0: + node = generate_leaf( + parent=parent, + channel_id=channel_id, + resource_kind=resources_kind + if resources_kind + else random.choice(ALL_RESOURCES_KINDS), + ) + else: + node = generate_topic( + parent=parent, + channel_id=channel_id, + title="Level_{} Topic_{}".format(levels, i + 1), + ) + node.children.add( + *recurse_and_generate( channel_id=channel_id, parent=parent, + levels=levels - 1, + n_children=n_children, + resources_kind=resources_kind, ) ) - return children_nodes + children.append(node) + return children - else: - current_node = generate_topic( - title="Level {} {}_resources".format(levels, kind), - channel_id=channel_id, - parent=parent, - description="", - ) - current_node.children.add( - *recurse_and_generate( - parent=parent, - channel_id=channel_id, - levels=levels - 1, - kind=kind, - n_children=n_children, - ) - ) - - return [current_node] +def generate_channels(n_channels, levels, n_children, resources_kind=None): + generated_channels = [] -def generate_tree_nodes(root_node, channel_id, levels, n_children): + if resources_kind: - for each_kind in ALL_RESOURCES_KINDS: - root_node.children.add( - *recurse_and_generate( - parent=root_node, - channel_id=channel_id, - levels=levels, - kind=each_kind, - n_children=n_children, - ) + logger.info( + "\n generating Channel/s of {} resources ...\n".format(resources_kind) ) + else: - -def generate_channels(n_channels, levels, n_children): - - generated_channels = [] - - logger.info("\n generating channel/s and its related data...\n") + logger.info("\n generating Channel/s of random resources kinds...\n") generate_some_tags() @@ -464,17 +448,19 @@ def generate_channels(n_channels, levels, n_children): ) channel = generate_channel( - name="Channel_{} of {} levels".format(c + 1, levels), + name="Testing Channel_{}".format(c + 1), root_node=root_node, channel_id=channel_id, ) - # generating tree nodes starting from the root node - generate_tree_nodes( - root_node=root_node, - channel_id=channel_id, - levels=levels, - n_children=n_children, + root_node.children.add( + *recurse_and_generate( + channel_id=channel_id, + parent=root_node, + levels=levels, + n_children=n_children, + resources_kind=resources_kind, + ) ) channel_contents = ContentNode.objects.filter( @@ -511,6 +497,8 @@ def add_arguments(self, parser): type=str, ) + parser.add_argument("--seed", type=int, default=1, help="Random seed") + parser.add_argument( "--channels", type=int, @@ -532,23 +520,26 @@ def add_arguments(self, parser): type=int, choices=range(1, 10), default=3, - help="number of content resources children", + help="number of each node children", ) + parser.add_argument("--resources_kind", type=str, choices=ALL_RESOURCES_KINDS) + def handle(self, *args, **options): + seed_n = options["seed"] generating_mode = options["mode"] + fixtures_path = options["fixtures_path"] + n_channels = options["channels"] required_levels = options["levels"] n_children = options["children"] + resources_kind = options["resources_kind"] - # Fixtures File destination - fixtures_path = options["fixtures_path"] - - logger.info("\n start generating channel/s...\n") + # Set the random seed so that all operations will be randomized predictably + random.seed(seed_n) if generating_mode == "fixtures": - if not fixtures_path: raise ValueError( "\n--fixtures_path is missing : please provide a fixtures file path" @@ -557,11 +548,14 @@ def handle(self, *args, **options): switch_to_memory() channels_generated = generate_channels( - n_channels=n_channels, levels=required_levels, n_children=n_children + n_channels=n_channels, + levels=required_levels, + n_children=n_children, + resources_kind=resources_kind, ) logger.info( - "\n dumping and creating fixtures for facilities and its data... \n" + "\ndumping and creating fixtures for facilities and its data... \n" ) call_command( @@ -584,8 +578,10 @@ def handle(self, *args, **options): ] else: - generate_channels( - n_channels=n_channels, levels=required_levels, n_children=n_children + n_channels=n_channels, + levels=required_levels, + n_children=n_children, + resources_kind=resources_kind, ) - logger.info("\n done\n") + logger.info("\n done \n") From 10399cdfab9992b0a56fc4236b95dd9f182a5721 Mon Sep 17 00:00:00 2001 From: bedo Date: Mon, 5 Sep 2022 23:13:18 +0200 Subject: [PATCH 06/10] avoidance of hardcoding strings --- .../commands/generate_content_data.py | 62 ++++++++++--------- 1 file changed, 33 insertions(+), 29 deletions(-) diff --git a/kolibri/core/content/management/commands/generate_content_data.py b/kolibri/core/content/management/commands/generate_content_data.py index 9c3b4240c8a..081fafe30a4 100644 --- a/kolibri/core/content/management/commands/generate_content_data.py +++ b/kolibri/core/content/management/commands/generate_content_data.py @@ -7,6 +7,7 @@ from django.core.management.base import BaseCommand from django.db import connections from le_utils.constants import content_kinds +from le_utils.constants import file_formats from le_utils.constants import format_presets from le_utils.constants import languages from le_utils.constants import mastery_criteria @@ -94,7 +95,7 @@ def choices(sequence, k): # purpose : generates thumbnail preset along with the main file preset (both map to the same node) main_file_preset_to_thumbnail_preset = { - # just two exceptions as these file_preset are very common together + # two exceptions as these file_preset are very common together format_presets.SLIDESHOW_IMAGE: [ format_presets.SLIDESHOW_THUMBNAIL, format_presets.SLIDESHOW_MANIFEST, @@ -230,27 +231,26 @@ def generate_channel(name, root_node, channel_id): def generate_localfile(file_preset): - # this was calculated by taking the average of file_size of localfiles of each extension in QA channel - # so it has to be manully written here as this information doesn't exist, it was calculcated by me, why? - # well instead of just generating random numbers i wanted the file_size values to be more relevant to their corresponding extension + # precalculated by taking the average file_size of actual existing localfiles + # instead of just random generated sizes, these are more relvant to the corresponding extension extension_to_file_size = { - "mp4": 16293436.885714285, - "webm": None, - "vtt": 3227.507692307692, - "pdf": 6655360.057142857, - "epub": 13291472.210526315, - "mp3": 2102685.625, - "jpg": 20291943.133333333, - "jpeg": 30457141.25, - "png": 2833124.8260869565, - "gif": None, - "json": 3529.0, - "svg": None, - "graphie": None, - "perseus": 357012.67441860464, - "h5p": 10699889.2, - "zim": None, - "zip": 5285446.041666667, + file_formats.MP4: 5933249, + file_formats.WEBM: None, + file_formats.VTT: 1242, + file_formats.PDF: 6655360, + file_formats.EPUB: 13291472, + file_formats.MP3: 2102685, + file_formats.JPG: 5604683, + file_formats.JPEG: 30433803, + file_formats.PNG: 609113, + file_formats.GIF: None, + file_formats.JSON: 3529, + file_formats.SVG: None, + file_formats.GRAPHIE: None, + file_formats.PERSEUS: 131841, + file_formats.H5P: 10699889, + file_formats.ZIM: None, + file_formats.HTML5: 1315774, } extensions_choices = format_prestets_data[file_preset].allowed_formats @@ -314,22 +314,26 @@ def generate_one_contentNode( ): kind_to_learninactivity = { - "topic": "", - "slideshow": "", - "document": "{},{}".format( + content_kinds.TOPIC: "", + content_kinds.SLIDESHOW: "", + content_kinds.DOCUMENT: "{},{}".format( learning_activities.READ, learning_activities.REFLECT ), - "video": "{},{}".format(learning_activities.WATCH, learning_activities.REFLECT), - "html5": "{},{}".format( + content_kinds.VIDEO: "{},{}".format( + learning_activities.WATCH, learning_activities.REFLECT + ), + content_kinds.HTML5: "{},{}".format( learning_activities.EXPLORE, learning_activities.REFLECT ), - "audio": "{},{}".format( + content_kinds.AUDIO: "{},{}".format( learning_activities.LISTEN, learning_activities.REFLECT ), - "exercise": "{},{}".format( + content_kinds.EXERCISE: "{},{}".format( learning_activities.PRACTICE, learning_activities.REFLECT ), - "h5p": "{}.{}".format(learning_activities.EXPLORE, learning_activities.REFLECT), + content_kinds.H5P: "{}.{}".format( + learning_activities.EXPLORE, learning_activities.REFLECT + ), } new_node = ContentNode.objects.create( From 58f4bffd55bc2d259b93599dd5b29b640d90c06b Mon Sep 17 00:00:00 2001 From: bedo Date: Tue, 6 Sep 2022 00:14:11 +0200 Subject: [PATCH 07/10] adding forgotten updates --- .../commands/generate_content_data.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/kolibri/core/content/management/commands/generate_content_data.py b/kolibri/core/content/management/commands/generate_content_data.py index 081fafe30a4..f4303fe01c2 100644 --- a/kolibri/core/content/management/commands/generate_content_data.py +++ b/kolibri/core/content/management/commands/generate_content_data.py @@ -163,7 +163,7 @@ def get_or_generate_language(lang_id): return new_lang -def generate_assessmentmetadata(node=None, randomize=False, is_manipulable=False): +def generate_assessmentmetadata(node): number_of_assessments = random.randint(1, 30) assessment_item_ids = [str(uuid.uuid4().hex) for _ in range(number_of_assessments)] @@ -208,8 +208,8 @@ def generate_assessmentmetadata(node=None, randomize=False, is_manipulable=False assessment_item_ids=assessment_item_ids, number_of_assessments=number_of_assessments, mastery_model=mapper[random_criteria], - randomize=randomize, - is_manipulable=is_manipulable, + randomize=random.choice([True, False]), + is_manipulable=random.choice([True, False]), ) generated_objects.add(meta_data) return meta_data @@ -430,14 +430,11 @@ def generate_channels(n_channels, levels, n_children, resources_kind=None): generated_channels = [] - if resources_kind: - - logger.info( - "\n generating Channel/s of {} resources ...\n".format(resources_kind) + logger.info( + "\n generating Channel/s of {} resources ...\n".format( + resources_kind if resources_kind else "random" ) - else: - - logger.info("\n generating Channel/s of random resources kinds...\n") + ) generate_some_tags() From 1f9ddc077a7b60001c0fca97020b73e752970cc1 Mon Sep 17 00:00:00 2001 From: bedo Date: Sun, 11 Sep 2022 11:00:01 +0200 Subject: [PATCH 08/10] minimum number of assessments, updated some parameters --- .../core/auth/management/commands/generate_auth_data.py | 6 ++++-- .../content/management/commands/generate_content_data.py | 7 ++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/kolibri/core/auth/management/commands/generate_auth_data.py b/kolibri/core/auth/management/commands/generate_auth_data.py index 4625a2a21fd..97e5b5b233f 100644 --- a/kolibri/core/auth/management/commands/generate_auth_data.py +++ b/kolibri/core/auth/management/commands/generate_auth_data.py @@ -9,6 +9,7 @@ from django.core.management.base import BaseCommand from le_utils.constants import content_kinds +from kolibri.core.auth.apps import KolibriAuthConfig from kolibri.core.auth.constants import demographics from kolibri.core.auth.constants import facility_presets from kolibri.core.auth.constants import role_kinds @@ -33,6 +34,7 @@ from kolibri.core.utils.csv import open_csv_for_reading from kolibri.utils.time_utils import local_now + logger = logging.getLogger(__name__) @@ -202,7 +204,7 @@ def get_or_generate_lesson_resources(): # generate new channel/s if there are no local channels if not channels: - channels = generate_channels(n_channels=1, levels=2, n_children=3) + channels = generate_channels(n_channels=2) channel = random.choice(channels) @@ -652,7 +654,7 @@ def handle(self, *args, **options): # dumping after generation is done call_command( "dumpdata", - "kolibriauth", + KolibriAuthConfig.label, "lessons", "exams", indent=4, diff --git a/kolibri/core/content/management/commands/generate_content_data.py b/kolibri/core/content/management/commands/generate_content_data.py index f4303fe01c2..71ac9a3d794 100644 --- a/kolibri/core/content/management/commands/generate_content_data.py +++ b/kolibri/core/content/management/commands/generate_content_data.py @@ -16,6 +16,7 @@ from le_utils.constants.labels.needs import NEEDSLIST from le_utils.constants.labels.subjects import SUBJECTSLIST +from kolibri.core.content.apps import KolibriContentConfig from kolibri.core.content.models import AssessmentMetaData from kolibri.core.content.models import ChannelMetadata from kolibri.core.content.models import ContentNode @@ -164,7 +165,7 @@ def get_or_generate_language(lang_id): def generate_assessmentmetadata(node): - number_of_assessments = random.randint(1, 30) + number_of_assessments = random.randint(10, 35) assessment_item_ids = [str(uuid.uuid4().hex) for _ in range(number_of_assessments)] random_criteria = random.choice(mastery_criteria.MASTERYCRITERIALIST) @@ -426,7 +427,7 @@ def recurse_and_generate(channel_id, parent, levels, n_children, resources_kind) return children -def generate_channels(n_channels, levels, n_children, resources_kind=None): +def generate_channels(n_channels=1, levels=2, n_children=3, resources_kind=None): generated_channels = [] @@ -561,7 +562,7 @@ def handle(self, *args, **options): call_command( "dumpdata", - "content", + KolibriContentConfig.label, indent=4, output=fixtures_path, interactive=False, From 8b82fe7bbf3fa1386c9685b68e8a9bbd6d55bddc Mon Sep 17 00:00:00 2001 From: bedo Date: Mon, 12 Sep 2022 14:32:18 +0200 Subject: [PATCH 09/10] Loggs generation v1 --- .../commands/generate_interactions.py | 583 ++++++++++++++++++ 1 file changed, 583 insertions(+) create mode 100644 kolibri/core/logger/management/commands/generate_interactions.py diff --git a/kolibri/core/logger/management/commands/generate_interactions.py b/kolibri/core/logger/management/commands/generate_interactions.py new file mode 100644 index 00000000000..d62dd23974b --- /dev/null +++ b/kolibri/core/logger/management/commands/generate_interactions.py @@ -0,0 +1,583 @@ +import datetime +import logging +import os +import random +import uuid + +import pytz +from django.core.management import call_command +from django.core.management.base import BaseCommand +from django.utils import timezone +from le_utils.constants import content_kinds + +from kolibri.core.auth.management.commands.generate_auth_data import generate_facility +from kolibri.core.auth.management.commands.generate_auth_data import ( + generate_facility_user, +) +from kolibri.core.auth.management.commands.generate_auth_data import read_user_data_file +from kolibri.core.auth.models import Facility +from kolibri.core.auth.models import FacilityUser +from kolibri.core.content.management.commands.generate_content_data import ( + generate_channels, +) +from kolibri.core.content.management.commands.generate_content_data import ( + switch_to_memory, +) +from kolibri.core.content.models import ContentNode +from kolibri.core.logger.apps import KolibriLoggerConfig +from kolibri.core.logger.models import AttemptLog +from kolibri.core.logger.models import ContentSessionLog +from kolibri.core.logger.models import ContentSummaryLog +from kolibri.core.logger.models import MasteryLog +from kolibri.core.logger.models import UserSessionLog + + +logger = logging.getLogger(__name__) + + +def get_or_generate_resources(n_channels): + nodes = ContentNode.objects.exclude(kind=content_kinds.TOPIC) + if not nodes: + generate_channels(n_channels=n_channels) + return ContentNode.objects.exclude(kind=content_kinds.TOPIC) + + return nodes + + +def get_or_generate_new_facility(): + facilities = Facility.objects.all() + if not facilities: + return generate_facility("a nice Facility for interactions", "Testing device") + return facilities[0] + + +def get_or_generate_facility_users(n_users, facility): + existing_users = FacilityUser.objects.all() + + if existing_users.count() < n_users: + new_users = [] + file_path = os.path.abspath( + os.path.join(os.path.dirname(__file__), "user_data.csv") + ) + users_data = read_user_data_file(file_path) + for _ in range(n_users): + new_users.append(generate_facility_user(facility, users_data)) + return new_users + + return existing_users[:n_users] + + +# make default values here in case we called this function directly without the Command +def get_random_date_within_range(start=None, end=None): + if not start: + start = datetime.datetime(2022, 1, 1, 0, 0, 0, 0, pytz.UTC) + if not end: + end = timezone.now() + return start + (end - start) * random.random() + + +# get completion_timestamp for ContentSummaryLog infered from ContentSessionLog +def get_completion_timestamp(content_session_logs): + cumulative_progress = 0 + for content_log in content_session_logs: + cumulative_progress = min(cumulative_progress + content_log.progress, 1.0) + if cumulative_progress == 1.0: + return content_log.end_timestamp + + return None + + +# get random number of content_sessions with random progress +def get_n_content_sessions(is_exercise_resource): + random_progress_set = [random.random() for _ in range(random.randint(1, 5))] + acc_progress = sum(random_progress_set) + + if not is_exercise_resource or acc_progress < 1.0: + return random_progress_set + + # make those random progresses summing to 1 if it's exercise and its accumulative progress >=1 + return [progress / acc_progress for progress in random_progress_set] + + +def generate_attempt_log( + user, + start_timestamp, + end_timestamp, + attempt_duration, + item, + hinted, + is_correct, + completed, + interaction_history, + sessionlog, + masterylog, +): + AttemptLog.objects.create( + user=user, + item=item, + start_timestamp=start_timestamp, + end_timestamp=end_timestamp, + time_spent=attempt_duration, + completion_timestamp="to do", + hinted=hinted, + complete=completed, + correct=is_correct, + answer={}, + simple_answer="", + interaction_history=interaction_history, + error=random.choice([True, False]), + masterylog=masterylog, + sessionlog=sessionlog, + ) + + +def generate_attempts_logs_for_exercise_sessions( + user, contentsessionlogs, masterylog, assessments +): + + exercise_completed = masterylog.complete + + for sessionlog in contentsessionlogs: + + # since mastery_criterion contains (just for now) 5 questions + n_questions = 5 + + attempt_duration_sec = sessionlog.time_spent / n_questions + + # first question attempt starts within a second after the content session starts + start_timestamp = sessionlog.start_timestamp + datetime.timedelta( + milliseconds=random.randint(100, 1000) + ) + + for _ in range(n_questions): + correct_answer = random.choice([True, False]) + + # if it's wrong answer then it's 1/5 probablity (low) that there is was a hint + # else (correct_answer=True) then 'not correct_answer' evaluates to False and then no hint + hinted = random.choices([False, not correct_answer], weights=[5, 1], k=1)[0] + + if hinted: + first_interaction = {"correct": False, "type": "hint"} + else: + first_interaction = {"correct": correct_answer, "type": "answer"} + + end_timestamp = min( + start_timestamp + datetime.timedelta(seconds=attempt_duration_sec), + sessionlog.end_timestamp, + ) + + interaction_history = first_interaction + + AttemptLog.objects.create( + user=user, + item=random.choice(assessments), + start_timestamp=min(start_timestamp, sessionlog.end_timestamp), + end_timestamp=end_timestamp, + time_spent=attempt_duration_sec, + completion_timestamp=end_timestamp if exercise_completed else None, + hinted=hinted, + complete=exercise_completed, + correct=correct_answer, + answer={}, + simple_answer="", + interaction_history=interaction_history, + masterylog=masterylog, + sessionlog=sessionlog, + ) + start_timestamp += datetime.timedelta(seconds=attempt_duration_sec) + + +def generate_content_session_log( + content_node, + progress, + start_timestamp, + end_timestamp, + session_active_time, + user=None, + visitor_id=None, +): + return ContentSessionLog.objects.create( + user=user, + visitor_id=visitor_id, + channel_id=content_node.channel_id, + content_id=content_node.content_id, + start_timestamp=start_timestamp, + end_timestamp=end_timestamp, + time_spent=session_active_time, + progress=progress, + kind=content_node.kind, + ) + + +def generate_user_content_session_logs( + user, content_node, kolibri_user_session, user_session_duration +): + content_session_logs = [] + + content_sessions = get_n_content_sessions( + is_exercise_resource=content_node.kind == content_kinds.EXERCISE + ) + + content_session_startime = kolibri_user_session.start_timestamp + + # divide user_session duration by the number of content_sessions to : + # 1- get equal duration time for each content_session + # 2- make sure each content_session duration is within the range of user_session duration in kolibri + # better ideas ? + diff_between_each_content_session = user_session_duration / len(content_sessions) + + for each_progress in content_sessions: + + # time of interaction with a resource up to 12 mins + content_session_min = random.random() * 12 + + # randomly decide whether idle_time is zero or a portion of session_duration_mins + idle_time = random.choice([0, random.random() * content_session_min]) + + end_timestamp = content_session_startime + datetime.timedelta( + seconds=content_session_min * 60 + ) + + content_session = generate_content_session_log( + user=user, + content_node=content_node, + progress=each_progress, + # basically making sure any timestamp doesn't exceed the max_time + # ( which is the last_interaction_timestamp of the current user session) + start_timestamp=min( + content_session_startime, + kolibri_user_session.last_interaction_timestamp, + ), + end_timestamp=min( + end_timestamp, kolibri_user_session.last_interaction_timestamp + ), + session_active_time=(content_session_min - idle_time) * 60, + ) + content_session_startime += datetime.timedelta( + seconds=diff_between_each_content_session * 60 + ) + + content_session_logs.append(content_session) + + return content_session_logs + + +def generate_visitor_content_session_logs( + visitor_id, content_node, content_session_startime +): + + for each_progress in get_n_content_sessions( + is_exercise_resource=content_node.kind == content_kinds.EXERCISE + ): + + # time of interaction with a resource up to 10 mins + content_session_min = random.random() * 10 + + # randomly decide whether idle_time is zero or part of content_session_time_min + idle_time = random.choice([0, random.random() * content_session_min]) + + generate_content_session_log( + visitor_id=visitor_id, + content_node=content_node, + progress=each_progress, + start_timestamp=content_session_startime, + end_timestamp=content_session_startime + + datetime.timedelta(seconds=content_session_min * 60), + session_active_time=(content_session_min - idle_time) * 60, + ) + # since these are content sessions for a visitor so we are not constrained by a user_session_log time + # next session start time, will be after (0:2 days, random part of a day) + content_session_startime += datetime.timedelta( + days=random.randint(0, 2), seconds=(1440 * random.random()) * 60 + ) + + +def generate_content_summary_log(content_session_logs): + return ContentSummaryLog.objects.create( + user=content_session_logs[0].user, + kind=content_session_logs[0].kind, + content_id=content_session_logs[0].content_id, + channel_id=content_session_logs[0].channel_id, + start_timestamp=min( + each_session.start_timestamp for each_session in content_session_logs + ), + end_timestamp=max( + each_session.end_timestamp for each_session in content_session_logs + ), + completion_timestamp=get_completion_timestamp(content_session_logs), + time_spent=sum( + each_session.time_spent for each_session in content_session_logs + ), + progress=min( + sum(each_session.progress for each_session in content_session_logs), 1.0 + ), + ) + + +def generate_mastery_log(summary_log): + + return MasteryLog.objects.create( + user=summary_log.user, + summarylog=summary_log, + mastery_criterion={"m": 5, "n": 5, "type": "m_of_n"}, + mastery_level=1 if summary_log.progress >= 1.0 else -1, + start_timestamp=summary_log.start_timestamp, + end_timestamp=summary_log.end_timestamp, + completion_timestamp=summary_log.completion_timestamp, + complete=summary_log.progress >= 1.0, + time_spent=summary_log.time_spent, + ) + + +def generate_user_session_log( + user, session_start_time, session_duration_min, time_guaranteed=False +): + + last_interaction_time = session_start_time + datetime.timedelta( + seconds=session_duration_min * 60 + ) + + if not time_guaranteed and random.randint(0, 1) == 1: + last_interaction_time = session_start_time + + return UserSessionLog.objects.create( + user=user, + start_timestamp=session_start_time, + last_interaction_timestamp=last_interaction_time, + ) + + +# flake8: noqa: C901 +def generate_interactions( + n_users, + n_visitors, + start_time_range, + end_time_range, + user_session_duration, + affected_channels, +): + + # get or generate facility + facility = get_or_generate_new_facility() + + # get or generate channel/s whose resources will be interacted with by the users/visitors + content_nodes = get_or_generate_resources(n_channels=affected_channels) + + # Generate content_sessions_logs for kolibri visitors (anonymous users) + for _ in range(n_visitors): + generate_visitor_content_session_logs( + visitor_id=uuid.uuid4().hex, + content_node=random.choice(content_nodes), + content_session_startime=get_random_date_within_range( + start=start_time_range, end=end_time_range + ), + ) + + # get or generate facility users who will have interactions + users = get_or_generate_facility_users(n_users=n_users, facility=facility) + + # Generate authenticated facility users and their relevant interactions/logs + for user in users: + + random_content_node = random.choice(content_nodes) + + # generate user session in kolibri + # as a result all the following interactions start_timestamp will be >= session start_timestamp + kolibri_user_session = generate_user_session_log( + user=user, + session_start_time=get_random_date_within_range( + start=start_time_range, end=end_time_range + ), + session_duration_min=user_session_duration, + ) + + # generate interactions if only there is an available session time for current usr + if ( + kolibri_user_session.start_timestamp + == kolibri_user_session.last_interaction_timestamp + ): + continue + + # generate random number of user interactions with specific resource/content + user_content_session_logs = generate_user_content_session_logs( + content_node=random_content_node, + kolibri_user_session=kolibri_user_session, + user_session_duration=user_session_duration, + user=user, + ) + + # generate summary of user interactions for the above generated content logs + user_content_summary_logs = generate_content_summary_log( + user_content_session_logs + ) + + if random_content_node.kind == content_kinds.EXERCISE: + + mastery_log = generate_mastery_log(user_content_summary_logs) + + assessments = random.choice( + random_content_node.assessmentmetadata.all() + ).assessment_item_ids + + if assessments: + # generate attemptlogs (will be multiple for each content session log) for that exercise + generate_attempts_logs_for_exercise_sessions( + user=user, + contentsessionlogs=user_content_session_logs, + masterylog=mastery_log, + assessments=assessments, + ) + + # if completion is achieved then randomly decide to generate another content_session_logs with anotehr user session in kolibri + # as users do often review resources after completion as well + if user_content_summary_logs.completion_timestamp and random.randint(0, 1) == 1: + second_user_session = generate_user_session_log( + user=user, + session_start_time=kolibri_user_session.last_interaction_timestamp + + datetime.timedelta( + days=random.randint(1, 2), seconds=(1440 * random.random()) * 60 + ), + session_duration_min=user_session_duration, + time_guaranteed=True, + ) + generate_user_content_session_logs( + content_node=random_content_node, + kolibri_user_session=second_user_session, + user_session_duration=user_session_duration, + user=user, + ) + + +class Command(BaseCommand): + def add_arguments(self, parser): + + parser.add_argument( + "--mode", + type=str, + choices=["fixtures", "default_db"], + default="default_db", + help="data destination after generation, dumped into fixtures and deleted, or saved in default db", + ) + + parser.add_argument("--fixtures_path", type=str, default=None) + + parser.add_argument("--seed", type=int, default=1, help="Random seed") + + parser.add_argument( + "--users", type=int, default=20, help="number of authenticated users" + ) + + parser.add_argument( + "--visitors", type=int, default=3, help="number of anonymous users" + ) + + parser.add_argument("--affected_channels", type=int, default=1) + + parser.add_argument( + "--start_time", + type=lambda d: datetime.datetime.strptime(d, "%Y,%m,%d"), + default=None, + help="minimum sessions start_timestamp (default = 2022,1,1)", + ) + + parser.add_argument( + "--end_time", + type=lambda d: datetime.datetime.strptime(d, "%Y,%m,%d"), + default=None, + help="maximum sessions start_timestamp (default = current time ) ", + ) + + parser.add_argument( + "--session", + type=int, + default=15, + choices=range(15, 120), + help="user session duration in kolibri range (15 : 120) mins", + ) + + parser.add_argument( + "--n_sessions", + type=int, + default=1, + choices=range(1, 10), + help="number of user sessions in kolibri", + ) + + parser.add_argument( + "--n_resources", + type=int, + default=1, + choices=range(1, 5), + help="number of resources each user has interacted with", + ) + + def handle(self, *args, **options): + mode = options["mode"] + seed_n = options["seed"] + fixtures_path = options["fixtures_path"] + + affected_channels = options["affected_channels"] + n_users = options["users"] + n_visitors = options["visitors"] + start_time_range = options["start_time"] + end_time_range = options["end_time"] + user_session_duration = options["session"] + + # good to have also + # n_sessions = options["n_sessions"] + # n_resources = options["n_resources"] + + # Set the random seed so that all operations will be randomized predictably + random.seed(seed_n) + + if mode == "fixtures": + + if not fixtures_path: + raise ValueError( + "\n fixtures_path is missing : please provide a fixtures file path\n" + ) + + switch_to_memory() + + logger.info( + "\n generating sessions/logs and interactions for {} authenticated users and {} visitors...\n".format( + n_users, n_visitors + ) + ) + + generate_interactions( + n_users=n_users, + n_visitors=n_visitors, + start_time_range=start_time_range, + end_time_range=end_time_range, + user_session_duration=user_session_duration, + affected_channels=affected_channels, + ) + + logger.info("\n creating fixtures... \n") + + # dumping after generation is done + call_command( + "dumpdata", + KolibriLoggerConfig.label, + indent=4, + output=fixtures_path, + interactive=False, + ) + + else: + logger.info( + "\n generating sessions/logs and interactions for {} users and {} visitors...\n".format( + n_users, n_visitors + ) + ) + + generate_interactions( + n_users=n_users, + n_visitors=n_visitors, + start_time_range=start_time_range, + end_time_range=end_time_range, + user_session_duration=user_session_duration, + affected_channels=affected_channels, + ) + logger.info("\n done\n") From 9fe264c5d70c2be22eed0712a790c6127ded2e4e Mon Sep 17 00:00:00 2001 From: bedo Date: Fri, 23 Sep 2022 17:39:18 +0200 Subject: [PATCH 10/10] create device settings if not exist --- .../management/commands/generate_auth_data.py | 18 ++++++++++++------ .../commands/generate_interactions.py | 2 +- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/kolibri/core/auth/management/commands/generate_auth_data.py b/kolibri/core/auth/management/commands/generate_auth_data.py index 97e5b5b233f..da4c4878955 100644 --- a/kolibri/core/auth/management/commands/generate_auth_data.py +++ b/kolibri/core/auth/management/commands/generate_auth_data.py @@ -27,6 +27,8 @@ ) from kolibri.core.content.models import ChannelMetadata from kolibri.core.content.models import ContentNode +from kolibri.core.device.utils import device_provisioned +from kolibri.core.device.utils import provision_device from kolibri.core.exams.models import Exam from kolibri.core.exams.models import ExamAssignment from kolibri.core.lessons.models import Lesson @@ -34,7 +36,6 @@ from kolibri.core.utils.csv import open_csv_for_reading from kolibri.utils.time_utils import local_now - logger = logging.getLogger(__name__) @@ -433,6 +434,11 @@ def start_generating( facilities.append(new_facility) + # if device has not been provisioned, set it up + + if not device_provisioned(): + provision_device() + return facilities @@ -521,7 +527,7 @@ def add_arguments(self, parser): "--class_lessons", type=int, choices=range(5, 20), - default=5, + default=3, help="total number of lessons per class", ) @@ -529,7 +535,7 @@ def add_arguments(self, parser): "--class_exams", type=int, choices=range(1, 20), - default=0, + default=3, help="total number of lessons per class", ) @@ -537,7 +543,7 @@ def add_arguments(self, parser): "--groups", type=int, choices=range(1, 20), - default=0, + default=1, help="number of learnergroups to generate per class", ) @@ -561,7 +567,7 @@ def add_arguments(self, parser): "--adhoc_lesson_learners", type=int, choices=range(1, 20), - default=5, + default=0, help="number of learners for the adhoc_lesson", ) @@ -577,7 +583,7 @@ def add_arguments(self, parser): "--adhoc_exam_learners", type=int, choices=range(1, 20), - default=5, + default=0, help="number of learners for the adhoc_exam", ) diff --git a/kolibri/core/logger/management/commands/generate_interactions.py b/kolibri/core/logger/management/commands/generate_interactions.py index d62dd23974b..841f05734df 100644 --- a/kolibri/core/logger/management/commands/generate_interactions.py +++ b/kolibri/core/logger/management/commands/generate_interactions.py @@ -468,7 +468,7 @@ def add_arguments(self, parser): ) parser.add_argument( - "--visitors", type=int, default=3, help="number of anonymous users" + "--visitors", type=int, default=5, help="number of anonymous users" ) parser.add_argument("--affected_channels", type=int, default=1)