diff --git a/ChangeLog b/ChangeLog index 8df5e19674..e4ff641ed2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -9,6 +9,7 @@ Version 5.8.0 (XXX 2020) * English dict: support for archaic/poetic abbreviations * English dict: introduce OH link for vocatives/invocations. * English dict: improved parsing of imperatives. + * Add !!word/ link-parser command for displaying extended word dict info. Version 5.7.0 (13 Sept 2019) * Minor efficiency improvements to the SQL-backed dictionary. diff --git a/data/command-help-en.txt b/data/command-help-en.txt index 11e11aed0c..2338123643 100644 --- a/data/command-help-en.txt +++ b/data/command-help-en.txt @@ -305,3 +305,61 @@ Examples: !dialect=irish !dialect=irish,headline !dialect=instructions,bad-spelling:2.2 + +[!] +This command is for debugging the dictionary or the library. +It gets as an argument a word, and optionally a regex and flags. +It splits the given word to tokens according to the current language, +and for each token it prints its matching dictionary words along with its +expression or disjunct list. The word may include a wildcard * to find +multiple matches, and a subscript can be used to limit the matches to this +subscript only. + +Examples ("test.n" is an example word): + +Show the expression: + !!test.n + +Show the expression using macro tags: + !!test.n/m +Each macro tag is followed by its content on the same line. +The other lines are direct expression components (before and after a macro). + +Show also low-level memory details of the expression: + !!test.n/l + +Show the disjuncts (without duplicates): + !!test.n// + +Show selected disjuncts according to the supplied regex: + !!test.n/ Wd .*<-->.*@M\b/ + +Display all the words that start with "test": + !!test* + +Display all the words that start with "test" and have subscript ".q": + !!test*.q + +A sample output of a disjunct-list display: + Token "test.n" matches: + test.n 8509 disjuncts + + Token "test.n" disjuncts: + test.n 4273/4501 disjuncts + + ... + test.n: [4070]1.500= Wd @hCO Ds**c <--> Ss*s @M NM + ... + +In the this sample output: + 8509 Number of disjuncts in the dictionary expression. + 4501 Number of disjuncts after applying cost-max. + 4273 Number of disjuncts w/o duplicates. + 4070 Disjunct ordinal number. + 1.500 Disjunct cost. + = A separator to enable regex anchoring. + <--> A separator of the "-" (LHS) and "+" (RHS) connector lists. + +These variables affect the output: +Disjuncts, expressions: !dialect +Disjuncts only: !cost-max diff --git a/link-grammar/dict-common/dict-common.c b/link-grammar/dict-common/dict-common.c index 1af1af61ae..01ed00a42a 100644 --- a/link-grammar/dict-common/dict-common.c +++ b/link-grammar/dict-common/dict-common.c @@ -303,6 +303,8 @@ void dictionary_delete(Dictionary dict) free_dialect(dict->dialect); free(dict->dialect_tag.name); string_id_delete(dict->dialect_tag.set); + if (dict->macro_tag != NULL) free(dict->macro_tag->name); + free(dict->macro_tag); free((void *)dict->suppress_warning); free_regexs(dict->regex_root); diff --git a/link-grammar/dict-common/dict-common.h b/link-grammar/dict-common/dict-common.h index 89c3a5722b..dac23cf4df 100644 --- a/link-grammar/dict-common/dict-common.h +++ b/link-grammar/dict-common/dict-common.h @@ -94,6 +94,7 @@ struct Dictionary_s Dialect *dialect; /* "4.0.dialect" info */ expression_tag dialect_tag; /* Expression dialect tag info */ + expression_tag *macro_tag; /* Macro tags for expression debug */ /* Affixes are used during the tokenization stage. */ Dictionary affix_table; diff --git a/link-grammar/dict-common/dict-structures.h b/link-grammar/dict-common/dict-structures.h index 25970ef683..8074589528 100644 --- a/link-grammar/dict-common/dict-structures.h +++ b/link-grammar/dict-common/dict-structures.h @@ -38,7 +38,7 @@ static const int cost_max_dec_places = 3; static const double cost_epsilon = 1E-5; #define EXPTAG_SZ 100 /* Initial size for the Exptag array. */ -typedef enum { Exptag_none=0, Exptag_dialect } Exptag_type; +typedef enum { Exptag_none=0, Exptag_dialect, Exptag_macro } Exptag_type; /** * The Exp structure defined below comprises the expression trees that are diff --git a/link-grammar/dict-common/dict-utils.h b/link-grammar/dict-common/dict-utils.h index e94bd1eb0a..43a9828091 100644 --- a/link-grammar/dict-common/dict-utils.h +++ b/link-grammar/dict-common/dict-utils.h @@ -15,12 +15,17 @@ #define _DICT_UTILS_H_ #include "dict-common.h" +#include "utilities.h" // dyn_str /* Exp utilities ... */ void free_Exp(Exp *); int size_of_expression(Exp *); Exp * copy_Exp(Exp *, Pool_desc *, Parse_Options); bool is_exp_like_empty_word(Dictionary dict, Exp *); +void prt_exp_all(dyn_str *,Exp *, int, Dictionary); +#ifdef DEBUG +void prt_exp(Exp *, int); +#endif /* DEBUG */ /* X_node utilities ... */ X_node * catenate_X_nodes(X_node *, X_node *); diff --git a/link-grammar/dict-common/print-dict.c b/link-grammar/dict-common/print-dict.c index 0832207493..1ecd6916a6 100644 --- a/link-grammar/dict-common/print-dict.c +++ b/link-grammar/dict-common/print-dict.c @@ -18,9 +18,13 @@ #include "dict-defines.h" #include "dict-file/word-file.h" #include "dict-file/read-dict.h" +#include "dict-utils.h" // copy_Exp +#include "disjunct-utils.h" #include "print/print.h" #include "print/print-util.h" #include "regex-morph.h" +#include "tokenize/tokenize.h" // word_add +#include "utilities.h" // GNU_UNUSED /* ======================================================================== */ @@ -42,166 +46,176 @@ const char *cost_stringify(double cost) return buf; } -static void print_expression_tag(Dictionary dict, dyn_str *e, const Exp *n) -{ - if ((NULL == dict) || (Exptag_none == n->tag_type)) return; +#define MACRO_INDENTATION 4 - dyn_strcat(e, "]"); - dyn_strcat(e, dict->dialect_tag.name[n->tag_id]); +static void print_expression_tag_start(Dictionary dict, dyn_str *e, const Exp *n, + int *indent) +{ + switch (n->tag_type) + { + case Exptag_none: + break; + case Exptag_dialect: + dyn_strcat(e, "["); + break; + case Exptag_macro: + if (*indent < 0) break; + dyn_strcat(e, "\n"); + for(int i = 0; i < *indent; i++) dyn_strcat(e, " "); + dyn_strcat(e, dict->macro_tag->name[n->tag_id]); + dyn_strcat(e, ": "); + *(indent) += MACRO_INDENTATION; + break; + default: + for(int i = 0; i < *indent; i++) dyn_strcat(e, " "); + append_string(e, "Unknown tag type %d: ", (int)n->tag_type); + *(indent) += MACRO_INDENTATION; + } } -/** - * print the expression, in infix-style - */ -static dyn_str *print_expression_parens(Dictionary dict, dyn_str *e, - const Exp * n, int need_parens) +static void print_expression_tag_end(Dictionary dict, dyn_str *e, const Exp *n, + int *indent) { - Exp *operand; - int i, icost; - double dcost; + if (NULL == dict) return; - if (n == NULL) + switch (n->tag_type) { - dyn_strcat(e, "NULL expression"); - return e; + case Exptag_none: + break; + case Exptag_dialect: + dyn_strcat(e, "]"); + dyn_strcat(e, dict->dialect_tag.name[n->tag_id]); + break; + case Exptag_macro: + if (*indent < 0) break; + dyn_strcat(e, "\n"); + for(int i = 0; i < *indent - MACRO_INDENTATION/2; i++) + dyn_strcat(e, " "); + (*indent) -= MACRO_INDENTATION; + break; + default: + /* Handled in print_expression_tag_start(). */ + ; } +} - if (n->cost < -cost_epsilon) +static void get_expression_cost(const Exp *e, unsigned int *icost, double *dcost) +{ + if (e->cost < -cost_epsilon) { - icost = 1; - dcost = n->cost; + *icost = 1; + *dcost = e->cost; } - else if (cost_eq(n->cost, 0.0)) + else if (cost_eq(e->cost, 0.0)) { /* avoid [X+]-0.00 */ - icost = 0; - dcost = 0; + *icost = 0; + *dcost = 0; } else { - icost = (int) (n->cost); - dcost = n->cost - icost; - if (dcost > cost_epsilon) + *icost = (int) (e->cost); + *dcost = e->cost - *icost; + if (*dcost > cost_epsilon) { - dcost = n->cost; - icost = 1; + *dcost = e->cost; + *icost = 1; } else { - if (icost > 4) + if (*icost > 4) { /* don't print too many [] levels */ - dcost = icost; - icost = 1; + *dcost = *icost; + *icost = 1; } else { - dcost = 0; + *dcost = 0; } } } +} - if (Exptag_none != n->tag_type) dyn_strcat(e, "["); - - /* print the connector only */ - if (n->type == CONNECTOR_type) - { - for (i=0; imulti) dyn_strcat(e, "@"); - append_string(e, "%s%c", n->condesc?n->condesc->string:"(null)", n->dir); - for (i=0; ioperand_first; - if (operand == NULL) - { - for (i=0; ioperand_first; - for (i=0; itype == OR_type) && (o != NULL) && (o->type == AND_type) && + (NULL == o->operand_first) && (o->cost == 0) && + (o->tag_type = Exptag_none); +} - /* look for optional, and print only that */ - if ((n->type == OR_type) && operand && (operand->type == AND_type) && - operand->cost == 0 && (NULL == operand->operand_first)) - { - dyn_strcat(e, "{"); - if (NULL == operand->operand_next) dyn_strcat(e, "error-no-next"); - else print_expression_parens(dict, e, operand->operand_next, false); - dyn_strcat(e, "}"); - for (i=0; ioperand_first; - /* get a funny "and optional" when it's a named expression thing. */ - if ((n->type == AND_type) && (operand->operand_next == NULL)) + if (n->type == CONNECTOR_type) { - for (i=0; imulti) dyn_strcat(e, "@"); + dyn_strcat(e, n->condesc ? n->condesc->string : "error-null-connector"); + dyn_strcat(e, (const char []){ n->dir, '\0' }); } - - if (n->type == AND_type) dyn_strcat(e, " & "); - if (n->type == OR_type) dyn_strcat(e, " or "); - - /* print right side of binary expr */ - operand = operand->operand_next; - if (operand == NULL) + else if (is_expression_optional(n)) { - if (n->type == OR_type) - dyn_strcat(e, "error-no-next"); + dyn_strcat(e, "{"); + if (NULL == opd->operand_next) + dyn_strcat(e, "error-no-next"); /* unary OR */ else - dyn_strcat(e, "()"); + print_expression_parens(dict, e, opd->operand_next, false, indent); + dyn_strcat(e, "}"); } else { - do + if (n->type == AND_type) + opr = " & "; + else if (n->type == OR_type) + opr = " or "; + else + append_string(e, "error-exp-type-%d", (int)n->type); + + if (opr != NULL) { - if (operand->type == n->type) - { - print_expression_parens(dict, e, operand, false); - } - else - { - print_expression_parens(dict, e, operand, true); - } + /* (opd == NULL) means this is a null expression. */ + if (((icost == 0) && need_parens) || (opd == NULL)) dyn_strcat(e, "("); - operand = operand->operand_next; - if (operand != NULL) + if ((opd == NULL) && (n->type == OR_type)) + dyn_strcat(e, "error-zeroary-or"); + + for (Exp *l = opd; l != NULL; l = l->operand_next) { - if (n->type == AND_type) dyn_strcat(e, " & "); - if (n->type == OR_type) dyn_strcat(e, " or "); + print_expression_parens(dict, e, l, true, indent); + + if (l->operand_next != NULL) + dyn_strcat(e, opr); + else if ((n->type == OR_type) && (l == n->operand_first)) + dyn_strcat(e, " or error-no-next"); /* unary OR */ } - } while (operand != NULL); - } - for (i=0; itype, e->dir, e->multi, cost_stringify(e->cost)); + if (e->type != CONNECTOR_type) + { + for (e = e->operand_next; e != NULL; e = e->operand_next) prt_exp(e, i+2); + } + else + { + for(int j =0; jcondesc->string); + } +} +#endif + +static const char *stringify_Exp_type(Exp_type type) +{ + static TLS char unknown_type[32] = ""; + const char *type_str; + + if (type > 0 && type <= 3) + { + type_str = ((const char *[]) {"OR", "AND", "CONNECTOR"}) [type-1]; + } + else + { + snprintf(unknown_type, sizeof(unknown_type), "unknown_type-%d", + (int)(type)); + type_str = unknown_type; + } + + return type_str; +} + +static const char *stringify_Exp_tag(Exp *e, Dictionary dict) +{ + static TLS char tag_info[64]; + + switch (e->tag_type) + { + case Exptag_none: + return ""; + case Exptag_dialect: + if (dict == NULL) + { + snprintf(tag_info, sizeof(tag_info), " dialect_tag=%u", + e->tag_id); + } + else + { + snprintf(tag_info, sizeof(tag_info), " dialect_tag=%s", + dict->dialect_tag.name[e->tag_id]); + } + break; + case Exptag_macro: + if (dict == NULL) + { + snprintf(tag_info, sizeof(tag_info), " macro_tag"); + } + else + { + snprintf(tag_info, sizeof(tag_info), " macro_tag=%s", + dict->macro_tag->name[e->tag_id]); + } + break; + default: + snprintf(tag_info, sizeof(tag_info), " unknown_tag_type-%d", + (int)(e->tag_type)); + ; + } + + return tag_info; +} + +static bool is_ASAN_uninitialized(uintptr_t a) +{ + static const uintptr_t asan_uninitialized = (uintptr_t)0xbebebebebebebebeULL; + + return (a == asan_uninitialized); +} + +void prt_exp_all(dyn_str *s, Exp *e, int i, Dictionary dict) +{ + if (is_ASAN_uninitialized((uintptr_t)e)) + { + dyn_strcat(s, "e=UNINITIALIZED\n"); + return; + } + if (e == NULL) return; + + for(int j =0; jtype)); + + if (is_ASAN_uninitialized((uintptr_t)e->operand_first)) + dyn_strcat(s, " (UNINITIALIZED operand_first)"); + if (is_ASAN_uninitialized((uintptr_t)e->operand_next)) + dyn_strcat(s, " (UNINITIALIZED operand_next)"); + + if (e->type != CONNECTOR_type) + { + int operand_count = 0; + for (Exp *opd = e->operand_first; NULL != opd; opd = opd->operand_next) + { + operand_count++; + if (is_ASAN_uninitialized((uintptr_t)opd->operand_next)) + { + append_string(s, " (operand %d: UNINITIALIZED operand_next)\n", + operand_count); + return; + } + } + append_string(s, " (%d operand%s) cost=%s%s\n", operand_count, + operand_count == 1 ? "" : "s", cost_stringify(e->cost), + stringify_Exp_tag(e, dict)); + for (Exp *opd = e->operand_first; NULL != opd; opd = opd->operand_next) + { + prt_exp_all(s, opd, i+2, dict); + } + } + else + { + append_string(s, " %s%s%c cost=%s%s\n", + e->multi ? "@" : "", + e->condesc ? e->condesc->string : "(condesc=(null))", + e->dir, cost_stringify(e->cost), + stringify_Exp_tag(e, dict)); + } } +GNUC_UNUSED static void prt_exp_mem(Exp *e) +{ + dyn_str *s = dyn_str_new(); + + prt_exp_all(s, e, 0, NULL); + char *e_str = dyn_str_take(s); + printf("%s", e_str); + free(e_str); +} + +/* ================ Display word expressions / disjuncts ================= */ -/* ======================================================================= */ +const char do_display_expr; /* a sentinel to request an expression display */ /** * Display the information about the given word. @@ -236,9 +398,9 @@ const char *lg_exp_stringify(const Exp *n) * In this case no split is done. */ static char *display_word_split(Dictionary dict, - const char * word, - Parse_Options opts, - char * (*display)(Dictionary, const char *)) + const char * word, Parse_Options opts, + char * (*display)(Dictionary, const char *, const void **), + const char **arg) { Sentence sent; @@ -256,13 +418,58 @@ static char *display_word_split(Dictionary dict, int spell_option = parse_options_get_spell_guess(opts); parse_options_set_spell_guess(opts, 0); sent = sentence_create(pword, dict); - if (0 == sentence_split(sent, opts)) + + if (pword[0] == '<' && pword[strlen(pword)-1] == '>') { - /* List the splits */ - print_sentence_word_alternatives(s, sent, false, NULL, NULL); - /* List the disjuncts information. */ - print_sentence_word_alternatives(s, sent, false, display, NULL); + /* Dictionary macro - don't split. */ + if (!word0_set(sent, pword, opts)) goto display_word_split_error; } + else + { + if (0 != sentence_split(sent, opts)) goto display_word_split_error; + } + + /* List the splits */ + print_sentence_word_alternatives(s, sent, false, NULL, NULL, NULL); + /* List the expression / disjunct information */ + + /* Initialize the callback arguments */ + const void *carg[3] = { /*regex*/NULL, /*flags*/NULL, opts }; + + Regex_node *rn = NULL; + if (arg != NULL) + { + carg[1] = arg[1]; /* flags */ + if (arg[0] == &do_display_expr) + { + carg[0] = &do_display_expr; + } + else if (arg[0] != NULL) + { + /* A regex is specified, which means displaying disjuncts. */ + if (arg[0][0] != '\0') + { + rn = malloc(sizeof(Regex_node)); + rn->name = strdup("Disjunct regex"); + rn->pattern = strdup(arg[0]); + rn->re = NULL; + rn->neg = false; + rn->next = NULL; + + if (compile_regexs(rn, NULL) != 0) + { + prt_error("Error: Failed to compile regex \"%s\".\n", arg[0]); + return strdup(""); /* not NULL (NULL means no dict entry) */ + } + + carg[0] = rn; + } + } + } + print_sentence_word_alternatives(s, sent, false, display, carg, NULL); + if (rn != NULL) free_regexs(rn); + +display_word_split_error: sentence_delete(sent); parse_options_set_spell_guess(opts, spell_option); @@ -328,7 +535,7 @@ static char *display_counts(const char *word, Dict_node *dn) { dyn_str *s = dyn_str_new(); - append_string(s, "matches:\n"); + dyn_strcat(s, "matches:\n"); for (; dn != NULL; dn = dn->right) { append_string(s, " %-*s %8u disjuncts", @@ -339,32 +546,61 @@ static char *display_counts(const char *word, Dict_node *dn) { append_string(s, " <%s>", dn->file->file); } - append_string(s, "\n\n"); + dyn_strcat(s, "\n\n"); } return dyn_str_take(s); } /** - * Display the number of disjuncts associated with this dict node + * Display the expressions associated with this dict node. */ -static char *display_expr(Dictionary dict, const char *word, Dict_node *dn) +static char *display_expr(Dictionary dict, const char *word, Dict_node *dn, + const void **arg) { - dyn_str *s = dyn_str_new(); + const char *flags = arg[1]; + const Parse_Options opts = (Parse_Options)arg[2]; + bool show_macros = ((flags != NULL) && (strchr(flags, 'm') != NULL)); + bool low_level = ((flags != NULL) && (strchr(flags, 'l') != NULL)); - append_string(s, "expressions:\n"); + /* copy_Exp() needs an Exp memory pool. */ + Pool_desc *Exp_pool = pool_new(__func__, "Exp", /*num_elements*/256, + sizeof(Exp), /*zero_out*/false, + /*align*/false, /*exact*/false); + + dyn_str *s = dyn_str_new(); + dyn_strcat(s, "expressions:\n"); for (; dn != NULL; dn = dn->right) { - const char *expstr = lg_exp_stringify_with_tags(dict, dn->exp); + Exp *e = copy_Exp(dn->exp, Exp_pool, opts); /* assign dialect costs */ + pool_reuse(Exp_pool); + + if (low_level) + { + append_string(s, " %s\n", dn->string); + prt_exp_all(s, e, 0, dict); + dyn_strcat(s, "\n\n"); + } + + const char *expstr = lg_exp_stringify_with_tags(dict, e, show_macros); append_string(s, " %-*s %s", display_width(DJ_COL_WIDTH, dn->string), dn->string, expstr); - append_string(s, "\n\n"); + dyn_strcat(s, "\n\n"); } + + if (Exp_pool != NULL) pool_delete(Exp_pool); return dyn_str_take(s); } -static char *display_word_info(Dictionary dict, const char * word) +/** + * A callback function to display \p word number of disjuncts and file name. + * + * @arg Callback args (unused). + * @return String to display. Must be freed by the caller. + */ +static char *display_word_info(Dictionary dict, const char *word, + const void **arg) { const char * regex_name; Dict_node *dn_head; @@ -381,21 +617,38 @@ static char *display_word_info(Dictionary dict, const char * word) regex_name = match_regex(dict->regex_root, word); if (regex_name) { - return display_word_info(dict, regex_name); + return display_word_info(dict, regex_name, arg); } return NULL; } -static char *display_word_expr(Dictionary dict, const char * word) +/** + * A callback function to display \p word expressions or disjuncts. + * @param arg Callback data as follows: + * arg[0]: &do_display_expr or disjunct selection regex. + * arg[1]: flags + * argv[2]: Parse_Options + * @return String to display. Must be freed by the caller. + */ +static char *display_word_expr(Dictionary dict, const char *word, + const void **arg) { const char * regex_name; Dict_node *dn_head; + char *out = NULL; dn_head = dictionary_lookup_wild(dict, word); if (dn_head) { - char *out = display_expr(dict, word, dn_head); + if (arg[0] == &do_display_expr) + { + out = display_expr(dict, word, dn_head, arg); + } + else + { + out = display_disjuncts(dict, dn_head, arg); + } free_lookup_list(dict, dn_head); return out; } @@ -404,19 +657,57 @@ static char *display_word_expr(Dictionary dict, const char * word) regex_name = match_regex(dict->regex_root, word); if (regex_name) { - return display_word_expr(dict, regex_name); + return display_word_expr(dict, regex_name, arg); } return NULL; } +/** + * Break "word", "word/flags" or "word/regex/flags" into components. + * "regex" and "flags" are optional. "word/" means an empty regex. + * \p re and \p flags can be both NULL. + * @param re[out] the regex component, unless \c NULL. + * @param flags[out] the flags component, unless \c NULL. + * @return The word component. + */ +static const char *display_word_extract(char *word, const char **re, + const char **flags) +{ + if (re != NULL) *re = NULL; + if (flags != NULL) *flags = NULL; + + char *r = strchr(word, '/'); + if (r == NULL) return word; + *r = '\0'; + + if (re != NULL) + { + char *f = strchr(r + 1, '/'); + if (f != NULL) + { + *re = r + 1; + *f = '\0'; + *flags = f + 1; /* disjunct display flags */ + } + else + { + *flags = r + 1; /* expression display flags */ + } + } + return word; +} + /** * dict_display_word_info() - display the information about the given word. */ -char *dict_display_word_info(Dictionary dict, const char * word, +char *dict_display_word_info(Dictionary dict, const char *word, Parse_Options opts) { - return display_word_split(dict, word, opts, display_word_info); + char *wordbuf = strdupa(word); + word = display_word_extract(wordbuf, NULL, NULL); + + return display_word_split(dict, word, opts, display_word_info, NULL); } /** @@ -424,5 +715,12 @@ char *dict_display_word_info(Dictionary dict, const char * word, */ char *dict_display_word_expr(Dictionary dict, const char * word, Parse_Options opts) { - return display_word_split(dict, word, opts, display_word_expr); + const char *arg[2]; + char *wordbuf = strdupa(word); + word = display_word_extract(wordbuf, &arg[0], &arg[1]); + + /* If no regex component, then it's a request to display expressions. */ + if (arg[0] == NULL) arg[0] = &do_display_expr; + + return display_word_split(dict, word, opts, display_word_expr, arg); } diff --git a/link-grammar/dict-file/dictionary.c b/link-grammar/dict-file/dictionary.c index 7d84a81cfc..dbf035ece7 100644 --- a/link-grammar/dict-file/dictionary.c +++ b/link-grammar/dict-file/dictionary.c @@ -141,6 +141,12 @@ dictionary_six_str(const char * lang, dict->dialect_tag.set = string_id_create(); condesc_init(dict, 1<<13); Exp_pool_size = 1<<13; + + if (!test_enabled("no-macro-tag")) + { + dict->macro_tag = malloc(sizeof(*dict->macro_tag)); + memset(dict->macro_tag, 0, sizeof(*dict->macro_tag)); + } } else { diff --git a/link-grammar/dict-file/read-dict.c b/link-grammar/dict-file/read-dict.c index 36752e2010..43c36c082b 100644 --- a/link-grammar/dict-file/read-dict.c +++ b/link-grammar/dict-file/read-dict.c @@ -937,6 +937,29 @@ static Exp * make_dir_connector(Dictionary dict, int i) } /* ======================================================================== */ +/** + * Add an optional macro/word tag, for expression debugging. + * Enabled by !test="macro-tag". This tag is used only in expression printing. + */ +static unsigned int exptag_macro_add(Dictionary dict, const char *tag) +{ + expression_tag *mt = dict->macro_tag; + if (mt == NULL) return 0; + + if (mt->num == mt->size) + { + if (mt->num == 0) + mt->size = 128; + else + mt->size *= 2; + + mt->name = realloc(mt->name, mt->size * sizeof(*mt->name)); + } + mt->name[mt->num] = tag; + + return mt->num++; +} + /** * make_connector() -- make a node for a connector or dictionary word. * @@ -970,8 +993,10 @@ static Exp * make_connector(Dictionary dict) return NULL; } - /* Wrap it in a unary node as a placeholder for a cost if needed. */ + /* Wrap it in a unary node as a placeholder for a macro tag and cost. */ n = make_unary_node(dict->Exp_pool, dn->exp); + n->tag_id = exptag_macro_add(dict, dn->string); + if (n->tag_id != 0) n->tag_type = Exptag_macro; file_free_lookup(dn_head); } @@ -1182,7 +1207,7 @@ static Exp *make_expression(Dictionary dict) badchar); return NULL; } - if (nl->tag_id != Exptag_none) + if (nl->tag_type != Exptag_none) { nl = make_unary_node(dict->Exp_pool, nl); } diff --git a/link-grammar/disjunct-utils.c b/link-grammar/disjunct-utils.c index 671a0369eb..3bbd132e48 100644 --- a/link-grammar/disjunct-utils.c +++ b/link-grammar/disjunct-utils.c @@ -1,7 +1,7 @@ /*************************************************************************/ /* Copyright (c) 2004 */ /* Daniel Sleator, David Temperley, and John Lafferty */ -/* Copyright 2018, 2019, Amir Plivatsky */ +/* Copyright 2018-2020, Amir Plivatsky */ /* All rights reserved */ /* */ /* Use of the link grammar parsing system is subject to the terms of the */ @@ -14,8 +14,12 @@ #include "api-structures.h" // Sentence #include "connectors.h" +#include "dict-common/dict-structures.h" +#include "dict-common/dict-utils.h" // copy_Exp +#include "dict-common/regex-morph.h" // match_regex #include "disjunct-utils.h" #include "memory-pool.h" +#include "prepare/build-disjuncts.h" #include "print/print-util.h" #include "tokenize/tok-structures.h" // XXX TODO provide gword access methods! #include "tokenize/word-structures.h" @@ -413,41 +417,74 @@ void count_disjuncts_and_connectors(Sentence sent, unsigned int *dca, } /* ================ Print disjuncts and connectors ============== */ +static bool is_flag(uint32_t flags, char flag) +{ + return (flags>>(flag-'a')) & 1; +} + +static uint32_t make_flag(char flag) +{ + return 1<<(flag-'a'); +} + /* Print one connector with all the details. - * mCnameD(nearest_word, length_limit)x - * optional m: "@" for multi (else nothing) - * Cname: Connector name - * Optional D: "-" / "+" (if dir != -1) - * Optional : tracon_id (if not 0) - * Optional (nearest_word, length_limit): if both are not 0 + * mCnameD{refcount}(nearest_word, length_limit)x + * Optional m: "@" for multi (else nothing). + * Cname: Connector name. + * Optional D: "-" / "+" (if dir != -1). + * Optional : (flag 't'). + * Optional [nearest_word, length_limit or farthest_word]: (flag 'l'). * x: Shallow/deep indication as "s" / "d" (if shallow != -1) */ -void print_one_connector(Connector * e, int dir, int shallow) +void dyn_print_one_connector(dyn_str *s, Connector *e, int dir, int shallow, + uint32_t flags) { - printf("%s%s", e->multi ? "@" : "", connector_string(e)); - if (-1 != dir) printf("%c", "-+"[dir]); - if (e->tracon_id) - { - if ((-1 != shallow) && e->refcount) - printf("<%d,%d>", e->tracon_id, e->refcount); - else - printf("<%d>", e->tracon_id); - } - printf("(%d,%d)", e->nearest_word, e->length_limit); + if (e->multi) + dyn_strcat(s, "@"); + dyn_strcat(s, connector_string(e)); + if (-1 != dir) dyn_strcat(s, &"-+"[dir]); + if (is_flag(flags, 't') && e->tracon_id) + append_string(s, "<%d>", e->tracon_id); + if (is_flag(flags, 'r') && e->refcount) + append_string(s, "{%d}",e->refcount); + if (is_flag(flags, 'l')) + append_string(s, "(%d,%d)", e->nearest_word, e->length_limit); if (-1 != shallow) - printf("%c", (0 == shallow) ? 'd' : 's'); + dyn_strcat(s, (0 == shallow) ? "d" : "s"); +} + +void print_one_connector(Connector *e, int dir, int shallow, uint32_t flags) +{ + dyn_str *s = dyn_str_new(); + + dyn_print_one_connector(s, e, dir, shallow, flags); + + char *t = dyn_str_take(s); + puts(t); + free(t); } -void print_connector_list(Connector * e) +void dyn_print_connector_list(dyn_str *s, Connector *e, uint32_t flags) { - for (;e != NULL; e=e->next) + for (;e != NULL; e = e->next) { - print_one_connector(e, /*dir*/-1, /*shallow*/-1); - if (e->next != NULL) printf(" "); + dyn_print_one_connector(s, e, /*dir*/-1, /*shallow*/-1, flags); + if (e->next != NULL) dyn_strcat(s, " "); } } -void print_disjunct_list(Disjunct * dj) +void print_connector_list(Connector *e, uint32_t flags) +{ + dyn_str *s = dyn_str_new(); + + dyn_print_connector_list(s, e, flags); + + char *t = dyn_str_take(s); + puts(t); + free(t); +} + +void dyn_print_disjunct_list(dyn_str *s, Disjunct *dj, uint32_t flags) { int i = 0; char word[MAX_WORD + 32]; @@ -458,26 +495,137 @@ void print_disjunct_list(Disjunct * dj) lg_strlcpy(word, dj->word_string, sizeof(word)); patch_subscript_mark(word); - printf("%16s", word); - if (print_disjunct_address) printf("(%p)", dj); - printf(": "); + append_string(s, "%16s", word); + if (print_disjunct_address) append_string(s, "(%p)", dj); + dyn_strcat(s, ": "); - printf("[%d](%s) ", i++, cost_stringify(dj->cost)); + append_string(s, "[%d]%s= ", i++, cost_stringify(dj->cost)); - print_connector_list(dj->left); - printf(" <--> "); - print_connector_list(dj->right); - printf("\n"); + dyn_print_connector_list(s, dj->left, flags); + dyn_strcat(s, " <--> "); + dyn_print_connector_list(s, dj->right, flags); + dyn_strcat(s, "\n"); } } void print_all_disjuncts(Sentence sent) { - for (WordIdx w = 0; w < sent->length; w++) + dyn_str *s = dyn_str_new(); + uint32_t flags = make_flag('l') | make_flag('t'); + + for (WordIdx w = 0; w < sent->length; w++) + { + append_string(s, "Word %zu:\n", w); + dyn_print_disjunct_list(s, sent->word[w].d, flags); + + } + + char *t = dyn_str_take(s); + puts(t); + free(t); +} + +/** + * Display the disjuncts of expressions in \p dn. + */ +char *display_disjuncts(Dictionary dict, const Dict_node *dn, const void **arg) +{ + const void *rn = arg[0]; + const char *flags = arg[1]; + const Parse_Options opts = (Parse_Options)arg[2]; + double max_cost = opts->disjunct_cost; + + uint32_t int_flags = 0; + if (flags != NULL) + { + for (const char *f = flags; *f != '\0'; f++) + int_flags |= make_flag(*f); + } + + /* build_disjuncts_for_exp() needs memory pools for efficiency. */ + Sentence dummy_sent = sentence_create("", dict); /* For memory pools. */ + dummy_sent->Disjunct_pool = pool_new(__func__, "Disjunct", + /*num_elements*/8192, sizeof(Disjunct), + /*zero_out*/false, /*align*/false, false); + dummy_sent->Connector_pool = pool_new(__func__, "Connector", + /*num_elements*/65536, sizeof(Connector), + /*zero_out*/true, /*align*/false, false); + + /* copy_Exp() needs an Exp memory pool. */ + Pool_desc *Exp_pool = pool_new(__func__, "Exp", /*num_elements*/256, + sizeof(Exp), /*zero_out*/false, + /*align*/false, /*exact*/false); + + dyn_str *s = dyn_str_new(); + dyn_strcat(s, "disjuncts:\n"); + for (; dn != NULL; dn = dn->right) + { + /* Use copy_Exp() to assign dialect cost. */ + Exp *e = copy_Exp(dn->exp, Exp_pool, opts); + Disjunct *d = build_disjuncts_for_exp(dummy_sent, e, dn->string, NULL, + max_cost, NULL); + unsigned int dnum0 = count_disjuncts(d); + d = eliminate_duplicate_disjuncts(d); + unsigned int dnum1 = count_disjuncts(d); + + dyn_str *dyn_pdl = dyn_str_new(); + dyn_print_disjunct_list(dyn_pdl, d, int_flags); + char *dliststr = dyn_str_take(dyn_pdl); + + pool_reuse(Exp_pool); + pool_reuse(dummy_sent->Disjunct_pool); + pool_reuse(dummy_sent->Connector_pool); + + /* Count number of disjuncts with tunnel connectors. */ + unsigned int tnum = 0; + for (const char *p = dliststr; *p != '\0'; p++) + if ((p[0] == ' ') && (p[1] == 'x')) tnum++; + + unsigned int dnum_selected = 0; + dyn_str *selected = NULL; + char *dstr = dliststr; + char *end; + if (rn != NULL) + { + selected = dyn_str_new(); + + do + { + end = strchr(dstr, '\n'); + *end = '\0'; + if (match_regex(rn , dstr) != NULL) + { + dyn_strcat(selected, dstr); + dyn_strcat(selected, "\n"); + dnum_selected++; + } + + dstr = end + 1; + } while (*dstr != '\0'); + + free(dliststr); + dliststr = dyn_str_take(selected); + } + + append_string(s, " %s %u/%u disjuncts", dn->string, dnum1, dnum0); + if (tnum != 0) append_string(s, " (%u tunnels)", tnum); + dyn_strcat(s, "\n"); + dyn_strcat(s, dliststr); + dyn_strcat(s, "\n"); + free(dliststr); + + if (rn != NULL) { - printf("Word %zu:\n", w); - print_disjunct_list(sent->word[w].d); + if (dnum_selected == dnum1) + dyn_strcat(s, "(all the disjuncts matched)\n\n"); + else + append_string(s, "(%u disjuncts matched)\n\n", dnum_selected); } + } + pool_delete(Exp_pool); + sentence_delete(dummy_sent); + + return dyn_str_take(s); } /* ============= Connector encoding, sharing and packing ============= */ diff --git a/link-grammar/disjunct-utils.h b/link-grammar/disjunct-utils.h index b76af9be9c..463785a3c2 100644 --- a/link-grammar/disjunct-utils.h +++ b/link-grammar/disjunct-utils.h @@ -72,10 +72,14 @@ Tracon_sharing *pack_sentence_for_parsing(Sentence); void free_tracon_sharing(Tracon_sharing *); void count_disjuncts_and_connectors(Sentence, unsigned int *, unsigned int *); -void print_one_connector(Connector *, int, int); -void print_connector_list(Connector *); -void print_disjunct_list(Disjunct *); +void print_one_connector(Connector *, int, int, uint32_t); +void dyn_print_one_connector(dyn_str *s, Connector *, int, int, uint32_t); +void print_connector_list(Connector *, uint32_t); +void dyn_print_connector_list(dyn_str *s, Connector *, uint32_t); +void print_disjunct_list(Disjunct *, uint32_t); +void dyn_print_disjunct_list(dyn_str *s, Disjunct *, uint32_t); void print_all_disjuncts(Sentence); +char *display_disjuncts(Dictionary, const Dict_node *, const void **); /* Save and restore sentence disjuncts */ typedef struct diff --git a/link-grammar/prepare/build-disjuncts.c b/link-grammar/prepare/build-disjuncts.c index 90ea419d63..f75fa813dc 100644 --- a/link-grammar/prepare/build-disjuncts.c +++ b/link-grammar/prepare/build-disjuncts.c @@ -16,6 +16,7 @@ #include "build-disjuncts.h" #include "connectors.h" #include "dict-common/dict-structures.h" // Exp_struct, lg_exp_stringify +#include "dict-common/dict-common.h" // Dictionary #include "disjunct-utils.h" #include "utilities.h" @@ -261,37 +262,35 @@ build_disjunct(Sentence sent, Clause * cl, const char * string, for (; cl != NULL; cl = cl->next) { if (NULL == cl->c) continue; /* no connectors */ + if (cl->maxcost > cost_cutoff) continue; - if (cl->maxcost <= cost_cutoff) + if (NULL == sent) /* For the SAT-parser, until fixed. */ { - if (NULL == sent) /* For the SAT-parser, until fixed. */ - { - ndis = xalloc(sizeof(Disjunct)); - } - else - { - ndis = pool_alloc(sent->Disjunct_pool); - connector_pool = sent->Connector_pool; - } - ndis->left = ndis->right = NULL; - - /* Build a list of connectors from the Tconnectors. */ - for (Tconnector *t = cl->c; t != NULL; t = t->next) - { - Connector *n = connector_new(connector_pool, t->e->condesc, opts); - Connector **loc = ('-' == t->e->dir) ? &ndis->left : &ndis->right; + ndis = xalloc(sizeof(Disjunct)); + } + else + { + ndis = pool_alloc(sent->Disjunct_pool); + connector_pool = sent->Connector_pool; + } + ndis->left = ndis->right = NULL; - n->multi = t->e->multi; - n->next = *loc; /* prepend the connector to the current list */ - *loc = n; /* update the connector list */ - } + /* Build a list of connectors from the Tconnectors. */ + for (Tconnector *t = cl->c; t != NULL; t = t->next) + { + Connector *n = connector_new(connector_pool, t->e->condesc, opts); + Connector **loc = ('-' == t->e->dir) ? &ndis->left : &ndis->right; - ndis->word_string = string; - ndis->cost = cl->cost; - ndis->originating_gword = (gword_set*)gs; /* XXX remove constness */ - ndis->next = dis; - dis = ndis; + n->multi = t->e->multi; + n->next = *loc; /* prepend the connector to the current list */ + *loc = n; /* update the connector list */ } + + ndis->word_string = string; + ndis->cost = cl->cost; + ndis->originating_gword = (gword_set*)gs; /* XXX remove constness */ + ndis->next = dis; + dis = ndis; } return dis; } @@ -345,96 +344,4 @@ GNUC_UNUSED static void print_clause_list(Clause * c) printf("\n"); } } - -/* There is a much better lg_exp_stringify() elsewhere - * This one is for low-level debug. */ -GNUC_UNUSED void prt_exp(Exp *e, int i) -{ - if (e == NULL) return; - - for(int j =0; jtype, e->dir, e->multi, cost_stringify(e->cost)); - if (e->type != CONNECTOR_type) - { - for (e = e->operand_next; e != NULL; e = e->operand_next) prt_exp(e, i+2); - } - else - { - for(int j =0; jcondesc->string); - } -} - -static const char *stringify_Exp_type(Exp_type type) -{ - static TLS char unknown_type[32] = ""; - const char *type_str; - - if (type > 0 && type <= 3) - { - type_str = ((const char *[]) {"OR", "AND", "CONNECTOR"}) [type-1]; - } - else - { - snprintf(unknown_type, sizeof(unknown_type)-1, "unknown_type-%d", - (int)(type)); - type_str = unknown_type; - } - - return type_str; -} - -static bool is_ASAN_uninitialized(uintptr_t a) -{ - static const uintptr_t asan_uninitialized = (uintptr_t)0xbebebebebebebebeULL; - - return (a == asan_uninitialized); -} - -GNUC_UNUSED void prt_exp_mem(Exp *e, int i) -{ - if (is_ASAN_uninitialized((uintptr_t)e)) - { - printf ("e=UNINITIALIZED\n"); - return; - } - if (e == NULL) return; - - for(int j =0; jtype)); - - if (is_ASAN_uninitialized((uintptr_t)e->operand_first)) - printf(" (UNINITIALIZED operand_first)"); - if (is_ASAN_uninitialized((uintptr_t)e->operand_next)) - printf(" (UNINITIALIZED operand_next)"); - - if (e->type != CONNECTOR_type) - { - int operand_count = 0; - for (Exp *opd = e->operand_first; NULL != opd; opd = opd->operand_next) - { - operand_count++; - if (is_ASAN_uninitialized((uintptr_t)opd->operand_next)) - { - printf(" (operand %d: UNINITIALIZED operand_next)\n", operand_count); - return; - } - } - printf(" (%d operand%s) cost=%s\n", operand_count, - operand_count == 1 ? "" : "s", cost_stringify(e->cost)); - - for (Exp *opd = e->operand_first; NULL != opd; opd = opd->operand_next) - { - prt_exp_mem(opd, i+2); - } - } - else - { - printf(" %s%s%c cost=%s\n", - e->multi ? "@" : "", - e->condesc ? e->condesc->string : "(condesc=(null))", - e->dir, cost_stringify(e->cost)); - } -} #endif /* DEBUG */ diff --git a/link-grammar/prepare/build-disjuncts.h b/link-grammar/prepare/build-disjuncts.h index da01f67446..6e0732db4a 100644 --- a/link-grammar/prepare/build-disjuncts.h +++ b/link-grammar/prepare/build-disjuncts.h @@ -20,10 +20,4 @@ Disjunct *build_disjuncts_for_exp(Sentence sent, Exp *, const char *, const gword_set *, double cost_cutoff, Parse_Options opts); - -#ifdef DEBUG -void prt_exp(Exp *, int); -void prt_exp_mem(Exp *, int); -#endif /* DEBUG */ - #endif /* _LINKGRAMMAR_BUILD_DISJUNCTS_H */ diff --git a/link-grammar/print/print.c b/link-grammar/print/print.c index 4945be5ccf..5bb5d8e608 100644 --- a/link-grammar/print/print.c +++ b/link-grammar/print/print.c @@ -1258,24 +1258,24 @@ static const char * header(bool print_ps_header) /** * Print elements of the 2D-word-array produced for the parsers. * - * - print_sentence_word_alternatives(s, sent, false, NULL, tokenpos) + * - print_sentence_word_alternatives(s, sent, false, NULL, NULL, tokenpos) * If a pointer to struct "tokenpos" is given, return through it the index of * the first occurrence in the sentence of the given token. This is used to * prevent duplicate information display for repeated morphemes (if there are * multiples splits, each of several morphemes, otherwise some of them may * repeat). * - * - print_sentence_word_alternatives(s, sent, true, NULL, NULL) + * - print_sentence_word_alternatives(s, sent, true, NULL, NULL, NULL) * If debugprint is "true", this is a debug printout of the sentence. (The * debug printouts are with level 0 because this function is invoked for debug * on certain positive level.) * * - * - print_sentence_word_alternatives(s, sent, false, display_func, NULL) + * - print_sentence_word_alternatives(s, sent, false, display_func, arg, NULL) * Iterate over the sentence words and their alternatives. Handle each - * alternative using the display_func function if it is supplied, or else (if it - * is NULL) just print them. It is used to display disjunct information when - * command !!word is used. + * alternative using display_func(..., arg) if it is supplied, or else (if + * display_func is NULL) just print them. It is used to display disjunct + * information when command !!word is used. * FIXME In the current version (using Wordgraph) the "alternatives" in the * word-array don't necessarily consist of real word alternatives. * @@ -1288,8 +1288,10 @@ struct tokenpos /* First position of the given token - to prevent duplicates */ size_t ai; }; -void print_sentence_word_alternatives(dyn_str *s, Sentence sent, bool debugprint, - char * (*display)(Dictionary, const char *), struct tokenpos * tokenpos) +void print_sentence_word_alternatives(dyn_str *s, Sentence sent, + bool debugprint, + char * (*display)(Dictionary, const char *, const void **), + const void **arg, struct tokenpos *tokenpos) { size_t wi; /* Internal sentence word index */ size_t ai; /* Index of a word alternative */ @@ -1312,11 +1314,12 @@ void print_sentence_word_alternatives(dyn_str *s, Sentence sent, bool debugprint { /* For analyzing words we need to ignore the left/right walls */ if (dict->left_wall_defined && - (0 == strcmp(sent->word[0].unsplit_word, LEFT_WALL_WORD))) + ((NULL != sent->word[0].alternatives[0])) && + (0 == strcmp(sent->word[0].alternatives[0], LEFT_WALL_WORD))) first_sentence_word = 1; if (dict->right_wall_defined && - ((NULL != sent->word[sentlen-1].unsplit_word)) && - (0 == strcmp(sent->word[sentlen-1].unsplit_word, RIGHT_WALL_WORD))) + ((NULL != sent->word[sentlen-1].alternatives[0])) && + (0 == strcmp(sent->word[sentlen-1].alternatives[0], RIGHT_WALL_WORD))) sentlen--; /* Find if a word got split. This is indicated by: @@ -1431,7 +1434,7 @@ void print_sentence_word_alternatives(dyn_str *s, Sentence sent, bool debugprint { struct tokenpos firstpos = { wt }; - print_sentence_word_alternatives(s, sent, false, NULL, &firstpos); + print_sentence_word_alternatives(s, sent, false, NULL, NULL, &firstpos); if (((firstpos.wi != wi) || (firstpos.ai != ai)) && firstpos.wi >= first_sentence_word) // allow !!LEFT_WORD { @@ -1462,7 +1465,7 @@ void print_sentence_word_alternatives(dyn_str *s, Sentence sent, bool debugprint * Display the features of the token. */ if ((NULL == tokenpos) && (NULL != display)) { - char *info = display(sent->dict, wt); + char *info = display(sent->dict, wt, arg); if (NULL == info) return; append_string(s, "Token \"%s\" ", wt); diff --git a/link-grammar/print/print.h b/link-grammar/print/print.h index 83f206e63c..a54e646e36 100644 --- a/link-grammar/print/print.h +++ b/link-grammar/print/print.h @@ -22,7 +22,8 @@ void print_disjunct_counts(Sentence sent); struct tokenpos; void print_sentence_word_alternatives(dyn_str *, Sentence, bool, - char * (*)(Dictionary, const char *), struct tokenpos *); + char * (*)(Dictionary, const char *, const void **), const void **arg, + struct tokenpos *); // Used for debug/error printing void print_sentence_context(Sentence, dyn_str*); diff --git a/link-grammar/tokenize/tok-structures.h b/link-grammar/tokenize/tok-structures.h index be337d26b2..b96c2f91ac 100644 --- a/link-grammar/tokenize/tok-structures.h +++ b/link-grammar/tokenize/tok-structures.h @@ -183,5 +183,4 @@ struct Wordgraph_pathpos_s /* Only for sane_morphism(). */ const Gword **path; /* Linkage candidate wordgraph path */ }; - #endif diff --git a/link-grammar/tokenize/tokenize.c b/link-grammar/tokenize/tokenize.c index 08322c83e2..cf0e6f96f0 100644 --- a/link-grammar/tokenize/tokenize.c +++ b/link-grammar/tokenize/tokenize.c @@ -2971,6 +2971,14 @@ static Word *word_new(Sentence sent) return &sent->word[len]; } +/* Used only by display_word_split() for words that shouldn't get split. */ +bool word0_set(Sentence sent, char *w, Parse_Options opts) +{ + word_new(sent); + altappend(sent, &sent->word[0].alternatives, w); + return setup_dialect(sent->dict, opts); +} + /** * build_word_expressions() -- build list of expressions for a word. * @@ -3395,7 +3403,7 @@ bool flatten_wordgraph(Sentence sent, Parse_Options opts) if (verbosity_level(D_SW)) { dyn_str *s = dyn_str_new(); - print_sentence_word_alternatives(s, sent, true, NULL, NULL); + print_sentence_word_alternatives(s, sent, true, NULL, NULL, NULL); char *out = dyn_str_take(s); prt_error("Debug: Sentence words and alternatives:\n%s", out); free(out); diff --git a/link-grammar/tokenize/tokenize.h b/link-grammar/tokenize/tokenize.h index 204bcc4dee..5a54013044 100644 --- a/link-grammar/tokenize/tokenize.h +++ b/link-grammar/tokenize/tokenize.h @@ -23,6 +23,7 @@ void wordgraph_delete(Sentence); void tokenization_done(Sentence, Gword *); void altappend(Sentence, const char ***, const char *); +bool word0_set(Sentence, char *, Parse_Options); Gword *issue_word_alternative(Sentence sent, Gword *unsplit_word, const char *label, diff --git a/link-parser/command-line.c b/link-parser/command-line.c index 76b6ebac92..54d50db227 100644 --- a/link-parser/command-line.c +++ b/link-parser/command-line.c @@ -62,6 +62,7 @@ static int variables_cmd(const Switch*, int); static int file_cmd(const Switch*, int); static int help_cmd(const Switch*, int); static int exit_cmd(const Switch*, int); +static int info_cmd(const Switch*, int); Switch default_switches[] = { @@ -103,6 +104,7 @@ Switch default_switches[] = {"help", Cmd, "List the commands and what they do", help_cmd}, {"quit", Cmd, UNDOC "Exit the program", exit_cmd}, {"variables", Cmd, "List user-settable variables and their functions", variables_cmd}, + {"!", Cmd, UNDOC "Print information on dictionary words", info_cmd}, {NULL, Cmd, NULL, NULL} }; @@ -579,6 +581,7 @@ static int help_cmd(const Switch *uc, int n) printf("\n"); printf(" !! Print all the dictionary words that match .\n"); printf(" A wildcard * may be used to find multiple matches.\n"); + printf(" Issue \"!help !\" for more details.\n"); printf("\n"); printf(" ! Toggle the specified Boolean variable.\n"); printf(" != Assign that value to that variable.\n"); @@ -619,6 +622,13 @@ static int file_cmd(const Switch *uc, int n) return 'f'; } +static int info_cmd(const Switch *uc, int n) +{ + /* Dummy definition - the work is done done in + * x_issue_special_command() (see '!' there). */ + return 'c'; +} + static int x_issue_special_command(char * line, Command_Options *copts, Dictionary dict) { char *s, *x, *y; @@ -629,8 +639,9 @@ static int x_issue_special_command(char * line, Command_Options *copts, Dictiona /* Handle a request for a particular command help. */ if (NULL != dict) { + char *dupline = strdup(line); /* If we are here, it is not a command-line parameter. */ - s = strtok(line, WHITESPACE); + s = strtok(dupline, WHITESPACE); if ((s != NULL) && strncasecmp(s, "help", strlen(s)) == 0) { s = strtok(NULL, WHITESPACE); @@ -652,6 +663,7 @@ static int x_issue_special_command(char * line, Command_Options *copts, Dictiona if (count == 1) { + free(dupline); display_help(&as[j], copts); return 'c'; } @@ -661,13 +673,44 @@ static int x_issue_special_command(char * line, Command_Options *copts, Dictiona else prt_error("Undefined command: \"%s\". %s\n", s, helpmsg); + free(dupline); return -1; } } + free(dupline); } - clean_up_string(line); s = line; + if (s[0] == '!') + { + Parse_Options opts = copts->popts; + char *out; + + out = dict_display_word_info(dict, s+1, opts); + if (NULL != out) + { + printf("%s\n", out); + free(out); + out = dict_display_word_expr(dict, s+1, opts); + if (NULL != out) + { + printf("%s", out); + free(out); + } + else + { + prt_error("Error: '%s': Internal Error: Missing expression.\n", s+1); + } + } + else + { + printf("Token \"%s\" matches nothing in the dictionary.\n", s+1); + } + + return 'c'; + } + + clean_up_string(line); j = -1; count = 0; @@ -706,35 +749,6 @@ static int x_issue_special_command(char * line, Command_Options *copts, Dictiona return ((int (*)(const Switch*, int)) (as[j].ptr))(as, j); } - if (s[0] == '!') - { - Parse_Options opts = copts->popts; - char *out; - - out = dict_display_word_info(dict, s+1, opts); - if (NULL != out) - { - printf("%s\n", out); - free(out); - out = dict_display_word_expr(dict, s+1, opts); - if (NULL != out) - { - printf("%s", out); - free(out); - } - else - { - prt_error("Error: '%s': Internal Error: Missing expression.\n", s+1); - } - } - else - { - printf("Token \"%s\" matches nothing in the dictionary.\n", s+1); - } - - return 'c'; - } - /* Test here for an equation i.e. does the command line hold an equals sign? */ for (x=s; (*x != '=') && (*x != '\0') ; x++) ;