diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000000..1dad209ed5 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1 @@ +August 10 2024: The language modes (that keep track of programming language names, corresponding extensions, and spoken forms) have been migrated to a CSV file. If you have made changes directly to the language_modes.py file to customize that information, you can migrate your changes by letting the new community code run to generate the CSV file and then editing it. Items within the same column can be separated by semicolons if you want to have multiple spoken forms or file extensions associated with the same language. diff --git a/core/edit_text_file.py b/core/edit_text_file.py index f246baf763..e5cb92ca42 100644 --- a/core/edit_text_file.py +++ b/core/edit_text_file.py @@ -19,6 +19,7 @@ "alphabet": "alphabet.csv", "directories": "directories.csv", "file extensions": "file_extensions.csv", + "language modes": "language_modes.csv", "search engines": "search_engines.csv", "system paths": "system_paths.csv", "unix utilities": "unix_utilities.csv", diff --git a/core/modes/language_modes.py b/core/modes/language_modes.py index 936c147b12..75fc00a513 100644 --- a/core/modes/language_modes.py +++ b/core/modes/language_modes.py @@ -1,65 +1,13 @@ -from talon import Context, Module, actions - -# Maps language mode names to the extensions that activate them. Only put things -# here which have a supported language mode; that's why there are so many -# commented out entries. TODO: make this a csv file? -language_extensions = { - # 'assembly': 'asm s', - # 'bash': 'bashbook sh', - "batch": "bat", - "c": "c h", - # 'cmake': 'cmake', - # "cplusplus": "cpp hpp", - "csharp": "cs", - "css": "css", - # 'elisp': 'el', - # 'elm': 'elm', - "gdb": "gdb", - "go": "go", - "java": "java", - "javascript": "js", - "javascriptreact": "jsx", - # "json": "json", - "kotlin": "kt", - "lua": "lua", - "markdown": "md", - # 'perl': 'pl', - "php": "php", - # 'powershell': 'ps1', - "python": "py", - "protobuf": "proto", - "r": "r", - # 'racket': 'rkt', - "ruby": "rb", - "rust": "rs", - "scala": "scala", - "scss": "scss", - # 'snippets': 'snippets', - "sql": "sql", - "stata": "do ado", - "talon": "talon", - "talonlist": "talon-list", - "terraform": "tf", - "tex": "tex", - "typescript": "ts", - "typescriptreact": "tsx", - # 'vba': 'vba', - "vimscript": "vim vimrc", - # html doesn't actually have a language mode, but we do have snippets. - "html": "html", -} - -# Override speakable forms for language modes. If not present, a language mode's -# name is used directly. -language_name_overrides = { - "cplusplus": ["see plus plus"], - "csharp": ["see sharp"], - "css": ["c s s"], - "gdb": ["g d b"], - "go": ["go", "go lang", "go language"], - "r": ["are language"], - "tex": ["tech", "lay tech", "latex"], -} +from typing import Callable + +from talon import Context, Module, actions, resource + +from ..user_settings import ( + compute_csv_path, + compute_spoken_form_to_key_dictionary, + create_three_columns_csv_from_default_if_nonexistent, + get_key_value_pairs_and_spoken_forms_from_three_column_csv, +) mod = Module() @@ -74,20 +22,165 @@ mod.tag("code_language_forced", "This tag is active when a language mode is forced") mod.list("language_mode", desc="Name of a programming language mode.") -ctx.lists["self.language_mode"] = { - name: language - for language in language_extensions - for name in language_name_overrides.get(language, [language]) -} - # Maps extension to languages. -extension_lang_map = { - "." + ext: language - for language, extensions in language_extensions.items() - for ext in extensions.split() -} - -language_ids = set(language_extensions.keys()) +extension_lang_map = None + +language_ids = None +language_extensions = None + +SETTINGS_FILENAME = "language_modes.csv" +settings_filepath = compute_csv_path(SETTINGS_FILENAME) + +LANGUAGE_HEADERS = ["language", "extensions", "spoken_forms"] + +language_mode_update_callbacks = {} + + +def make_sure_settings_file_exists(): + # Maps language mode names to the extensions that activate them and language spoken forms. Only put things + # here which have a supported language mode or snippets; that's why there are so many + # commented out entries. + default_csv_contents = [ + # ['assembly', ('asm', 's'),], + # ['bash', ('bashbook', 'sh'),], + [ + "batch", + ("bat",), + ], + [ + "c", + ("c", "h"), + ], + # ['cmake', ('cmake',),], + # ["cplusplus", ("cpp hpp",), ("see plus plus",)], + ["csharp", ("cs",), ("see sharp",)], + ["css", ("css",), ("c s s",)], + # ['elisp', ('el'),], + # ['elm', ('elm'),], + ["gdb", ("gdb",), ("g d b",)], + ["go", ("go",), ("go", "go lang", "go language")], + ["java", ("java",)], + ["javascript", ("js",)], + ["javascriptreact", ("jsx",)], + # ["json", ("json",),], + [ + "kotlin", + ("kt",), + ], + [ + "lua", + ("lua",), + ], + [ + "markdown", + ("md",), + ], + # ['perl', ('pl',),], + [ + "php", + ("php",), + ], + # ['powershell', ('ps1',),], + [ + "python", + ("py",), + ], + [ + "protobuf", + ("proto",), + ], + ["r", ("r"), ("are language",)], + # ['racket', ('rkt,'),], + [ + "ruby", + ("rb",), + ], + [ + "rust", + ("rs",), + ], + [ + "scala", + ("scala",), + ], + [ + "scss", + ("scss",), + ], + # ['snippets', ('snippets',),], + [ + "sql", + ("sql",), + ], + [ + "stata", + ("do", "ado"), + ], + [ + "talon", + ("talon",), + ], + [ + "talonlist", + ("talon-list",), + ], + [ + "terraform", + ("tf",), + ], + ["tex", ("tex",), ("tech", "lay tech", "latex")], + [ + "typescript", + ("ts",), + ], + [ + "typescriptreact", + ("tsx",), + ], + # ['vba', ('vba',),], + [ + "vimscript", + ("vim", "vimrc"), + ], + # html doesn't actually have a language mode, but we do have snippets. + [ + "html", + ("html",), + ], + ] + create_three_columns_csv_from_default_if_nonexistent( + SETTINGS_FILENAME, LANGUAGE_HEADERS, default_csv_contents + ) + + +make_sure_settings_file_exists() + + +@resource.watch(settings_filepath) +def load_language_modes(path: str): + global language_extensions, extension_lang_map, language_ids + make_sure_settings_file_exists() + language_extensions, language_spoken_forms = ( + get_key_value_pairs_and_spoken_forms_from_three_column_csv( + SETTINGS_FILENAME, + LANGUAGE_HEADERS, + ) + ) + ctx.lists["self.language_mode"] = compute_spoken_form_to_key_dictionary( + language_extensions, language_spoken_forms + ) + extension_lang_map = { + "." + ext: language + for language, extensions in language_extensions.items() + for ext in extensions + } + language_ids = set(language_extensions.keys()) + + for callback in language_mode_update_callbacks.values(): + callback() + + +load_language_modes(settings_filepath) forced_language = "" @@ -122,3 +215,9 @@ def code_clear_language_mode(): global forced_language forced_language = "" ctx.tags = [] + + def register_language_mode_on_update_callback( + name: str, callback: Callable[[], None] + ): + """Registers a callback to be called when the language mode csv is updated""" + language_mode_update_callbacks[name] = callback diff --git a/core/snippets/snippets.py b/core/snippets/snippets.py index e04f10ebfa..5624f92a8a 100644 --- a/core/snippets/snippets.py +++ b/core/snippets/snippets.py @@ -23,17 +23,26 @@ desc="Directory (relative to Talon user) containing additional snippets", ) -context_map = { - # `_` represents the global context, ie snippets available regardless of language - "_": Context(), -} -snippets_map = {} +context_map = None + + +def create_context_map(): + global context_map + context_map = { + # `_` represents the global context, ie snippets available regardless of language + "_": Context(), + } + + # Create a context for each defined language + for lang in language_ids: + ctx = Context() + ctx.matches = f"code.language: {lang}" + context_map[lang] = ctx + -# Create a context for each defined language -for lang in language_ids: - ctx = Context() - ctx.matches = f"code.language: {lang}" - context_map[lang] = ctx +create_context_map() + +snippets_map = {} def get_setting_dir(): @@ -199,6 +208,11 @@ def create_lists( return snippets_map, insertions, insertions_phrase, wrappers +def on_language_modes_update(): + create_context_map() + update_snippets() + + def on_ready(): fs.watch(str(SNIPPETS_DIR), lambda _1, _2: update_snippets()) @@ -206,6 +220,9 @@ def on_ready(): fs.watch(str(get_setting_dir()), lambda _1, _2: update_snippets()) update_snippets() + actions.user.register_language_mode_on_update_callback( + "snippets", on_language_modes_update + ) app.register("ready", on_ready) diff --git a/core/user_settings.py b/core/user_settings.py index 2630f2c518..93874a8ccb 100644 --- a/core/user_settings.py +++ b/core/user_settings.py @@ -1,4 +1,5 @@ import csv +import io import os from pathlib import Path @@ -75,3 +76,132 @@ def append_to_csv(filename: str, rows: dict[str, str]): writer.writerow([]) for key, value in rows.items(): writer.writerow([key] if key == value else [value, key]) + + +def get_key_value_pairs_and_spoken_forms_from_three_column_csv( + filename: str, headers: tuple[str, str, str] +): + """Retrieves a list from a CSV of the form name,values,spoken_forms""" + path = compute_csv_path(filename) + + rows = _obtain_rows_from_csv(path) + + result = _convert_rows_from_file_with_headers_to_key_value_pairs_and_spoken_forms( + rows, filename, headers + ) + return result + + +def create_three_columns_csv_from_default_if_nonexistent( + filename: str, + headers: tuple[str, str, str], + default: list[list[str, tuple[str], tuple[str]]], +): + path = compute_csv_path(filename) + if not path.is_file(): + _create_three_columns_csv_from_default(path, headers, default) + + +def _create_three_columns_csv_from_default(path, headers, default): + with open(path, "w", encoding="utf-8", newline="") as file: + writer = csv.writer(file) + writer.writerow(headers) + for row_tuple in default: + row = _compute_row_for_three_column_csv(row_tuple) + writer.writerow(row) + + +def _compute_row_for_three_column_csv(input_tuple): + if len(input_tuple) == 3: + name, values, spoken_forms = input_tuple + else: + name, values = input_tuple + spoken_forms = None + values_text = _compute_values_packed_into_column(values) + row = [name, values_text] + if spoken_forms: + spoken_forms_text = _compute_values_packed_into_column(spoken_forms) + row.append(spoken_forms_text) + return row + + +def _compute_values_packed_into_column(values): + output = io.StringIO() + writer = csv.writer(output, delimiter=";") + writer.writerow(values) + result = output.getvalue().strip() + return result + + +def _obtain_rows_from_csv(path): + with open(str(path), "r", newline="") as f: + rows = list(csv.reader(f)) + return rows + + +def _convert_rows_from_file_with_headers_to_key_value_pairs_and_spoken_forms( + rows, filename, headers +): + key_value_pairs = {} + spoken_forms = {} + if len(rows) >= 2: + _complain_if_invalid_headers_found_in_file(rows, headers, filename) + for row in rows[1:]: + if len(row) == 0: + # Windows newlines are sometimes read as empty rows. :champagne: + continue + elif len(row) == 1: + print(f"{filename}: Ignoring row with only one value: {row}.") + continue + elif len(row) == 2: + name, values_text = row + new_spoken_forms_text = "" + else: + if len(row) > 3: + print( + f'"{filename}": More than three values in row: {row}.' + + " Ignoring the extras." + ) + name, values_text, new_spoken_forms_text = row[:3] + name = name.strip() + values = _get_intermediate_values_from_column(values_text) + key_value_pairs[name] = values + if new_spoken_forms_text: + spoken_forms[name] = _get_intermediate_values_from_column( + new_spoken_forms_text + ) + return key_value_pairs, spoken_forms + + +def _get_intermediate_values_from_column(values_text): + reader = csv.reader([values_text], delimiter=";") + values = next(reader) + values = [value.strip() for value in values] + return values + + +def _complain_if_invalid_headers_found_in_file(rows, expected_headers, filename): + actual_headers = rows[0] + if not actual_headers == list(expected_headers): + print( + f'"{filename}": Malformed headers - {actual_headers}.' + + f" Should be {list(expected_headers)}. Ignoring row." + ) + + +def compute_csv_path(filename: str): + path = SETTINGS_DIR / filename + assert filename.endswith(".csv") + return path + + +def compute_spoken_form_to_key_dictionary(key_value_pairs, spoken_forms): + if spoken_forms: + result = { + name: key + for key in key_value_pairs + for name in spoken_forms.get(key, [key]) + } + else: + result = {key: key for key in key_value_pairs} + return result