From 0b096aae6a2946fcbbc97c2e1fdeda622fe9c930 Mon Sep 17 00:00:00 2001 From: ampli Date: Mon, 20 Jan 2020 15:47:26 +0200 Subject: [PATCH 01/17] make_expression(): Fix comparing to the wrong Exp field --- link-grammar/dict-file/read-dict.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/link-grammar/dict-file/read-dict.c b/link-grammar/dict-file/read-dict.c index 36752e2010..cf943263fc 100644 --- a/link-grammar/dict-file/read-dict.c +++ b/link-grammar/dict-file/read-dict.c @@ -1182,7 +1182,7 @@ static Exp *make_expression(Dictionary dict) badchar); return NULL; } - if (nl->tag_id != Exptag_none) + if (nl->tag_type != Exptag_none) { nl = make_unary_node(dict->Exp_pool, nl); } From c021b627f4b8825892de2288c43af225d040bcea Mon Sep 17 00:00:00 2001 From: ampli Date: Sun, 5 Jan 2020 14:13:37 +0200 Subject: [PATCH 02/17] link-parser: Fix command truncation after white space --- link-parser/command-line.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/link-parser/command-line.c b/link-parser/command-line.c index 76b6ebac92..b01a0b71aa 100644 --- a/link-parser/command-line.c +++ b/link-parser/command-line.c @@ -629,8 +629,9 @@ static int x_issue_special_command(char * line, Command_Options *copts, Dictiona /* Handle a request for a particular command help. */ if (NULL != dict) { + char *dupline = strdup(line); /* If we are here, it is not a command-line parameter. */ - s = strtok(line, WHITESPACE); + s = strtok(dupline, WHITESPACE); if ((s != NULL) && strncasecmp(s, "help", strlen(s)) == 0) { s = strtok(NULL, WHITESPACE); @@ -652,6 +653,7 @@ static int x_issue_special_command(char * line, Command_Options *copts, Dictiona if (count == 1) { + free(dupline); display_help(&as[j], copts); return 'c'; } @@ -661,9 +663,11 @@ static int x_issue_special_command(char * line, Command_Options *copts, Dictiona else prt_error("Undefined command: \"%s\". %s\n", s, helpmsg); + free(dupline); return -1; } } + free(dupline); } clean_up_string(line); From 227677f264344d1dd14c08d6a1a62f18a0a1ef43 Mon Sep 17 00:00:00 2001 From: ampli Date: Sun, 5 Jan 2020 14:14:48 +0200 Subject: [PATCH 03/17] link-parser: Allow white space in !! command --- link-parser/command-line.c | 60 +++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/link-parser/command-line.c b/link-parser/command-line.c index b01a0b71aa..1d5b577b0e 100644 --- a/link-parser/command-line.c +++ b/link-parser/command-line.c @@ -670,8 +670,37 @@ static int x_issue_special_command(char * line, Command_Options *copts, Dictiona free(dupline); } - clean_up_string(line); s = line; + if (s[0] == '!') + { + Parse_Options opts = copts->popts; + char *out; + + out = dict_display_word_info(dict, s+1, opts); + if (NULL != out) + { + printf("%s\n", out); + free(out); + out = dict_display_word_expr(dict, s+1, opts); + if (NULL != out) + { + printf("%s", out); + free(out); + } + else + { + prt_error("Error: '%s': Internal Error: Missing expression.\n", s+1); + } + } + else + { + printf("Token \"%s\" matches nothing in the dictionary.\n", s+1); + } + + return 'c'; + } + + clean_up_string(line); j = -1; count = 0; @@ -710,35 +739,6 @@ static int x_issue_special_command(char * line, Command_Options *copts, Dictiona return ((int (*)(const Switch*, int)) (as[j].ptr))(as, j); } - if (s[0] == '!') - { - Parse_Options opts = copts->popts; - char *out; - - out = dict_display_word_info(dict, s+1, opts); - if (NULL != out) - { - printf("%s\n", out); - free(out); - out = dict_display_word_expr(dict, s+1, opts); - if (NULL != out) - { - printf("%s", out); - free(out); - } - else - { - prt_error("Error: '%s': Internal Error: Missing expression.\n", s+1); - } - } - else - { - printf("Token \"%s\" matches nothing in the dictionary.\n", s+1); - } - - return 'c'; - } - /* Test here for an equation i.e. does the command line hold an equals sign? */ for (x=s; (*x != '=') && (*x != '\0') ; x++) ; From fff71783ea6504a42cf1f06cffc3db5b81fc65bb Mon Sep 17 00:00:00 2001 From: ampli Date: Sun, 5 Jan 2020 14:57:06 +0200 Subject: [PATCH 04/17] build_disjunct(): Reduce indentation --- link-grammar/prepare/build-disjuncts.c | 50 +++++++++++++------------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/link-grammar/prepare/build-disjuncts.c b/link-grammar/prepare/build-disjuncts.c index 90ea419d63..0c0abbec32 100644 --- a/link-grammar/prepare/build-disjuncts.c +++ b/link-grammar/prepare/build-disjuncts.c @@ -261,37 +261,35 @@ build_disjunct(Sentence sent, Clause * cl, const char * string, for (; cl != NULL; cl = cl->next) { if (NULL == cl->c) continue; /* no connectors */ + if (cl->maxcost > cost_cutoff) continue; - if (cl->maxcost <= cost_cutoff) + if (NULL == sent) /* For the SAT-parser, until fixed. */ { - if (NULL == sent) /* For the SAT-parser, until fixed. */ - { - ndis = xalloc(sizeof(Disjunct)); - } - else - { - ndis = pool_alloc(sent->Disjunct_pool); - connector_pool = sent->Connector_pool; - } - ndis->left = ndis->right = NULL; - - /* Build a list of connectors from the Tconnectors. */ - for (Tconnector *t = cl->c; t != NULL; t = t->next) - { - Connector *n = connector_new(connector_pool, t->e->condesc, opts); - Connector **loc = ('-' == t->e->dir) ? &ndis->left : &ndis->right; + ndis = xalloc(sizeof(Disjunct)); + } + else + { + ndis = pool_alloc(sent->Disjunct_pool); + connector_pool = sent->Connector_pool; + } + ndis->left = ndis->right = NULL; - n->multi = t->e->multi; - n->next = *loc; /* prepend the connector to the current list */ - *loc = n; /* update the connector list */ - } + /* Build a list of connectors from the Tconnectors. */ + for (Tconnector *t = cl->c; t != NULL; t = t->next) + { + Connector *n = connector_new(connector_pool, t->e->condesc, opts); + Connector **loc = ('-' == t->e->dir) ? &ndis->left : &ndis->right; - ndis->word_string = string; - ndis->cost = cl->cost; - ndis->originating_gword = (gword_set*)gs; /* XXX remove constness */ - ndis->next = dis; - dis = ndis; + n->multi = t->e->multi; + n->next = *loc; /* prepend the connector to the current list */ + *loc = n; /* update the connector list */ } + + ndis->word_string = string; + ndis->cost = cl->cost; + ndis->originating_gword = (gword_set*)gs; /* XXX remove constness */ + ndis->next = dis; + dis = ndis; } return dis; } From 3aed015bde4b8c94a8e9ad1341c394246c152b25 Mon Sep 17 00:00:00 2001 From: ampli Date: Sat, 18 Jan 2020 00:11:49 +0200 Subject: [PATCH 05/17] print_sentence_word_alternatives: Make it more robust --- link-grammar/print/print.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/link-grammar/print/print.c b/link-grammar/print/print.c index 4945be5ccf..ce6547017e 100644 --- a/link-grammar/print/print.c +++ b/link-grammar/print/print.c @@ -1312,11 +1312,12 @@ void print_sentence_word_alternatives(dyn_str *s, Sentence sent, bool debugprint { /* For analyzing words we need to ignore the left/right walls */ if (dict->left_wall_defined && - (0 == strcmp(sent->word[0].unsplit_word, LEFT_WALL_WORD))) + ((NULL != sent->word[0].alternatives[0])) && + (0 == strcmp(sent->word[0].alternatives[0], LEFT_WALL_WORD))) first_sentence_word = 1; if (dict->right_wall_defined && - ((NULL != sent->word[sentlen-1].unsplit_word)) && - (0 == strcmp(sent->word[sentlen-1].unsplit_word, RIGHT_WALL_WORD))) + ((NULL != sent->word[sentlen-1].alternatives[0])) && + (0 == strcmp(sent->word[sentlen-1].alternatives[0], RIGHT_WALL_WORD))) sentlen--; /* Find if a word got split. This is indicated by: From dbf503a307d48f1aa5030ca4b0144b988b6d6cb8 Mon Sep 17 00:00:00 2001 From: ampli Date: Mon, 6 Jan 2020 17:29:38 +0200 Subject: [PATCH 06/17] disjunct-utils.[ch]: Use dyn_str for printing disjuncts/connectors --- link-grammar/disjunct-utils.c | 113 +++++++++++++++++++++++----------- link-grammar/disjunct-utils.h | 9 ++- 2 files changed, 83 insertions(+), 39 deletions(-) diff --git a/link-grammar/disjunct-utils.c b/link-grammar/disjunct-utils.c index 671a0369eb..3631d76027 100644 --- a/link-grammar/disjunct-utils.c +++ b/link-grammar/disjunct-utils.c @@ -413,41 +413,74 @@ void count_disjuncts_and_connectors(Sentence sent, unsigned int *dca, } /* ================ Print disjuncts and connectors ============== */ +static bool is_flag(uint32_t flags, char flag) +{ + return (flags>>(flag-'a')) & 1; +} + +static uint32_t make_flag(char flag) +{ + return 1<<(flag-'a'); +} + /* Print one connector with all the details. - * mCnameD(nearest_word, length_limit)x - * optional m: "@" for multi (else nothing) - * Cname: Connector name - * Optional D: "-" / "+" (if dir != -1) - * Optional : tracon_id (if not 0) - * Optional (nearest_word, length_limit): if both are not 0 + * mCnameD{refcount}(nearest_word, length_limit)x + * Optional m: "@" for multi (else nothing). + * Cname: Connector name. + * Optional D: "-" / "+" (if dir != -1). + * Optional : (flag 't'). + * Optional [nearest_word, length_limit or farthest_word]: (flag 'l'). * x: Shallow/deep indication as "s" / "d" (if shallow != -1) */ -void print_one_connector(Connector * e, int dir, int shallow) +void dyn_print_one_connector(dyn_str *s, Connector *e, int dir, int shallow, + uint32_t flags) { - printf("%s%s", e->multi ? "@" : "", connector_string(e)); - if (-1 != dir) printf("%c", "-+"[dir]); - if (e->tracon_id) - { - if ((-1 != shallow) && e->refcount) - printf("<%d,%d>", e->tracon_id, e->refcount); - else - printf("<%d>", e->tracon_id); - } - printf("(%d,%d)", e->nearest_word, e->length_limit); + if (e->multi) + dyn_strcat(s, "@"); + dyn_strcat(s, connector_string(e)); + if (-1 != dir) dyn_strcat(s, &"-+"[dir]); + if (is_flag(flags, 't') && e->tracon_id) + append_string(s, "<%d>", e->tracon_id); + if (is_flag(flags, 'r') && e->refcount) + append_string(s, "{%d}",e->refcount); + if (is_flag(flags, 'l')) + append_string(s, "(%d,%d)", e->nearest_word, e->length_limit); if (-1 != shallow) - printf("%c", (0 == shallow) ? 'd' : 's'); + dyn_strcat(s, (0 == shallow) ? "d" : "s"); +} + +void print_one_connector(Connector *e, int dir, int shallow, uint32_t flags) +{ + dyn_str *s = dyn_str_new(); + + dyn_print_one_connector(s, e, dir, shallow, flags); + + char *t = dyn_str_take(s); + puts(t); + free(t); } -void print_connector_list(Connector * e) +void dyn_print_connector_list(dyn_str *s, Connector *e, uint32_t flags) { - for (;e != NULL; e=e->next) + for (;e != NULL; e = e->next) { - print_one_connector(e, /*dir*/-1, /*shallow*/-1); - if (e->next != NULL) printf(" "); + dyn_print_one_connector(s, e, /*dir*/-1, /*shallow*/-1, flags); + if (e->next != NULL) dyn_strcat(s, " "); } } -void print_disjunct_list(Disjunct * dj) +void print_connector_list(Connector *e, uint32_t flags) +{ + dyn_str *s = dyn_str_new(); + + dyn_print_connector_list(s, e, flags); + + char *t = dyn_str_take(s); + puts(t); + free(t); +} + +void dyn_print_disjunct_list(dyn_str *s, Disjunct *dj, uint32_t flags) { int i = 0; char word[MAX_WORD + 32]; @@ -458,26 +491,34 @@ void print_disjunct_list(Disjunct * dj) lg_strlcpy(word, dj->word_string, sizeof(word)); patch_subscript_mark(word); - printf("%16s", word); - if (print_disjunct_address) printf("(%p)", dj); - printf(": "); + append_string(s, "%16s", word); + if (print_disjunct_address) append_string(s, "(%p)", dj); + dyn_strcat(s, ": "); - printf("[%d](%s) ", i++, cost_stringify(dj->cost)); + append_string(s, "[%d]%s= ", i++, cost_stringify(dj->cost)); - print_connector_list(dj->left); - printf(" <--> "); - print_connector_list(dj->right); - printf("\n"); + dyn_print_connector_list(s, dj->left, flags); + dyn_strcat(s, " <--> "); + dyn_print_connector_list(s, dj->right, flags); + dyn_strcat(s, "\n"); } } void print_all_disjuncts(Sentence sent) { - for (WordIdx w = 0; w < sent->length; w++) - { - printf("Word %zu:\n", w); - print_disjunct_list(sent->word[w].d); - } + dyn_str *s = dyn_str_new(); + uint32_t flags = make_flag('l') | make_flag('t'); + + for (WordIdx w = 0; w < sent->length; w++) + { + append_string(s, "Word %zu:\n", w); + dyn_print_disjunct_list(s, sent->word[w].d, flags); + + } + + char *t = dyn_str_take(s); + puts(t); + free(t); } /* ============= Connector encoding, sharing and packing ============= */ diff --git a/link-grammar/disjunct-utils.h b/link-grammar/disjunct-utils.h index b76af9be9c..d51866be3e 100644 --- a/link-grammar/disjunct-utils.h +++ b/link-grammar/disjunct-utils.h @@ -72,9 +72,12 @@ Tracon_sharing *pack_sentence_for_parsing(Sentence); void free_tracon_sharing(Tracon_sharing *); void count_disjuncts_and_connectors(Sentence, unsigned int *, unsigned int *); -void print_one_connector(Connector *, int, int); -void print_connector_list(Connector *); -void print_disjunct_list(Disjunct *); +void print_one_connector(Connector *, int, int, uint32_t); +void dyn_print_one_connector(dyn_str *s, Connector *, int, int, uint32_t); +void print_connector_list(Connector *, uint32_t); +void dyn_print_connector_list(dyn_str *s, Connector *, uint32_t); +void print_disjunct_list(Disjunct *, uint32_t); +void dyn_print_disjunct_list(dyn_str *s, Disjunct *, uint32_t); void print_all_disjuncts(Sentence); /* Save and restore sentence disjuncts */ From 6c21c75df241537aa28b776d97c9903ecec8f796 Mon Sep 17 00:00:00 2001 From: ampli Date: Sun, 5 Jan 2020 17:29:46 +0200 Subject: [PATCH 07/17] Implement !!word/regex/ for disjunct display --- link-grammar/dict-common/print-dict.c | 162 ++++++++++++++++++++++---- link-grammar/disjunct-utils.c | 109 ++++++++++++++++- link-grammar/disjunct-utils.h | 1 + link-grammar/print/print.c | 22 ++-- link-grammar/print/print.h | 3 +- link-grammar/tokenize/tokenize.c | 2 +- 6 files changed, 263 insertions(+), 36 deletions(-) diff --git a/link-grammar/dict-common/print-dict.c b/link-grammar/dict-common/print-dict.c index 0832207493..2002108197 100644 --- a/link-grammar/dict-common/print-dict.c +++ b/link-grammar/dict-common/print-dict.c @@ -18,6 +18,8 @@ #include "dict-defines.h" #include "dict-file/word-file.h" #include "dict-file/read-dict.h" +#include "dict-utils.h" // copy_Exp +#include "disjunct-utils.h" #include "print/print.h" #include "print/print-util.h" #include "regex-morph.h" @@ -223,8 +225,9 @@ const char *lg_exp_stringify(const Exp *n) return lg_exp_stringify_with_tags(NULL, n); } +/* ================ Display word expressions / disjuncts ================= */ -/* ======================================================================= */ +const char do_display_expr; /* a sentinel to request an expression display */ /** * Display the information about the given word. @@ -236,9 +239,9 @@ const char *lg_exp_stringify(const Exp *n) * In this case no split is done. */ static char *display_word_split(Dictionary dict, - const char * word, - Parse_Options opts, - char * (*display)(Dictionary, const char *)) + const char * word, Parse_Options opts, + char * (*display)(Dictionary, const char *, const void **), + const char **arg) { Sentence sent; @@ -259,9 +262,45 @@ static char *display_word_split(Dictionary dict, if (0 == sentence_split(sent, opts)) { /* List the splits */ - print_sentence_word_alternatives(s, sent, false, NULL, NULL); - /* List the disjuncts information. */ - print_sentence_word_alternatives(s, sent, false, display, NULL); + print_sentence_word_alternatives(s, sent, false, NULL, NULL, NULL); + /* List the expression / disjunct information */ + + /* Initialize the callback arguments */ + const void *carg[3] = { /*regex*/NULL, /*flags*/NULL, opts }; + + Regex_node *rn = NULL; + if (arg != NULL) + { + if (arg[0] == &do_display_expr) + { + carg[0] = &do_display_expr; + } + else if (arg[0] != NULL) + { + /* A regex is specified, which means displaying disjuncts. */ + carg[1] = arg[1]; /* flags */ + + if (arg[0][0] != '\0') + { + rn = malloc(sizeof(Regex_node)); + rn->name = strdup("Disjunct regex"); + rn->pattern = strdup(arg[0]); + rn->re = NULL; + rn->neg = false; + rn->next = NULL; + + if (compile_regexs(rn, NULL) != 0) + { + prt_error("Error: Failed to compile regex \"%s\".\n", arg[0]); + return strdup(""); /* not NULL (NULL means no dict entry) */ + } + + carg[0] = rn; + } + } + } + print_sentence_word_alternatives(s, sent, false, display, carg, NULL); + if (rn != NULL) free_regexs(rn); } sentence_delete(sent); parse_options_set_spell_guess(opts, spell_option); @@ -328,7 +367,7 @@ static char *display_counts(const char *word, Dict_node *dn) { dyn_str *s = dyn_str_new(); - append_string(s, "matches:\n"); + dyn_strcat(s, "matches:\n"); for (; dn != NULL; dn = dn->right) { append_string(s, " %-*s %8u disjuncts", @@ -339,32 +378,51 @@ static char *display_counts(const char *word, Dict_node *dn) { append_string(s, " <%s>", dn->file->file); } - append_string(s, "\n\n"); + dyn_strcat(s, "\n\n"); } return dyn_str_take(s); } /** - * Display the number of disjuncts associated with this dict node + * Display the expressions associated with this dict node. */ -static char *display_expr(Dictionary dict, const char *word, Dict_node *dn) +static char *display_expr(Dictionary dict, const char *word, Dict_node *dn, + const void **arg) { - dyn_str *s = dyn_str_new(); + const Parse_Options opts = (Parse_Options)arg[2]; + + /* copy_Exp() needs an Exp memory pool. */ + Pool_desc *Exp_pool = pool_new(__func__, "Exp", /*num_elements*/256, + sizeof(Exp), /*zero_out*/false, + /*align*/false, /*exact*/false); - append_string(s, "expressions:\n"); + dyn_str *s = dyn_str_new(); + dyn_strcat(s, "expressions:\n"); for (; dn != NULL; dn = dn->right) { - const char *expstr = lg_exp_stringify_with_tags(dict, dn->exp); + Exp *e = copy_Exp(dn->exp, Exp_pool, opts); /* assign dialect costs */ + pool_reuse(Exp_pool); + + const char *expstr = lg_exp_stringify_with_tags(dict, e); append_string(s, " %-*s %s", display_width(DJ_COL_WIDTH, dn->string), dn->string, expstr); - append_string(s, "\n\n"); + dyn_strcat(s, "\n\n"); } + + if (Exp_pool != NULL) pool_delete(Exp_pool); return dyn_str_take(s); } -static char *display_word_info(Dictionary dict, const char * word) +/** + * A callback function to display \p word number of disjuncts and file name. + * + * @arg Callback args (unused). + * @return String to display. Must be freed by the caller. + */ +static char *display_word_info(Dictionary dict, const char *word, + const void **arg) { const char * regex_name; Dict_node *dn_head; @@ -381,21 +439,38 @@ static char *display_word_info(Dictionary dict, const char * word) regex_name = match_regex(dict->regex_root, word); if (regex_name) { - return display_word_info(dict, regex_name); + return display_word_info(dict, regex_name, arg); } return NULL; } -static char *display_word_expr(Dictionary dict, const char * word) +/** + * A callback function to display \p word expressions or disjuncts. + * @param arg Callback data as follows: + * arg[0]: &do_display_expr or disjunct selection regex. + * arg[1]: flags + * argv[2]: Parse_Options + * @return String to display. Must be freed by the caller. + */ +static char *display_word_expr(Dictionary dict, const char *word, + const void **arg) { const char * regex_name; Dict_node *dn_head; + char *out = NULL; dn_head = dictionary_lookup_wild(dict, word); if (dn_head) { - char *out = display_expr(dict, word, dn_head); + if (arg[0] == &do_display_expr) + { + out = display_expr(dict, word, dn_head, arg); + } + else + { + out = display_disjuncts(dict, dn_head, arg); + } free_lookup_list(dict, dn_head); return out; } @@ -404,19 +479,53 @@ static char *display_word_expr(Dictionary dict, const char * word) regex_name = match_regex(dict->regex_root, word); if (regex_name) { - return display_word_expr(dict, regex_name); + return display_word_expr(dict, regex_name, arg); } return NULL; } +/** + * Break word/re/flags into components. + * /regex/ and flags are optional. + * \p re and \p flags can be both NULL; + * @param re[out] the regex component, unless \c NULL. + * @param flags[out] the flags component, unless \c NULL. + * @return The word component. + */ +static const char *display_word_extract(char *word, const char **re, + const char **flags) +{ + if (re != NULL) *re = NULL; + if (flags != NULL) *flags = NULL; + + char *r = strchr(word, '/'); + if (r == NULL) return word; + *r = '\0'; + + if (re != NULL) + { + *re = r + 1; + char *f = strchr(*re, '/'); + if (f != NULL) + { + *f = '\0'; + *flags = f + 1; + } + } + return word; +} + /** * dict_display_word_info() - display the information about the given word. */ -char *dict_display_word_info(Dictionary dict, const char * word, +char *dict_display_word_info(Dictionary dict, const char *word, Parse_Options opts) { - return display_word_split(dict, word, opts, display_word_info); + char *wordbuf = strdupa(word); + word = display_word_extract(wordbuf, NULL, NULL); + + return display_word_split(dict, word, opts, display_word_info, NULL); } /** @@ -424,5 +533,12 @@ char *dict_display_word_info(Dictionary dict, const char * word, */ char *dict_display_word_expr(Dictionary dict, const char * word, Parse_Options opts) { - return display_word_split(dict, word, opts, display_word_expr); + const char *arg[2]; + char *wordbuf = strdupa(word); + word = display_word_extract(wordbuf, &arg[0], &arg[1]); + + /* If no regex component, then it's a request to display expressions. */ + if (arg[0] == NULL) arg[0] = &do_display_expr; + + return display_word_split(dict, word, opts, display_word_expr, arg); } diff --git a/link-grammar/disjunct-utils.c b/link-grammar/disjunct-utils.c index 3631d76027..3bbd132e48 100644 --- a/link-grammar/disjunct-utils.c +++ b/link-grammar/disjunct-utils.c @@ -1,7 +1,7 @@ /*************************************************************************/ /* Copyright (c) 2004 */ /* Daniel Sleator, David Temperley, and John Lafferty */ -/* Copyright 2018, 2019, Amir Plivatsky */ +/* Copyright 2018-2020, Amir Plivatsky */ /* All rights reserved */ /* */ /* Use of the link grammar parsing system is subject to the terms of the */ @@ -14,8 +14,12 @@ #include "api-structures.h" // Sentence #include "connectors.h" +#include "dict-common/dict-structures.h" +#include "dict-common/dict-utils.h" // copy_Exp +#include "dict-common/regex-morph.h" // match_regex #include "disjunct-utils.h" #include "memory-pool.h" +#include "prepare/build-disjuncts.h" #include "print/print-util.h" #include "tokenize/tok-structures.h" // XXX TODO provide gword access methods! #include "tokenize/word-structures.h" @@ -521,6 +525,109 @@ void print_all_disjuncts(Sentence sent) free(t); } +/** + * Display the disjuncts of expressions in \p dn. + */ +char *display_disjuncts(Dictionary dict, const Dict_node *dn, const void **arg) +{ + const void *rn = arg[0]; + const char *flags = arg[1]; + const Parse_Options opts = (Parse_Options)arg[2]; + double max_cost = opts->disjunct_cost; + + uint32_t int_flags = 0; + if (flags != NULL) + { + for (const char *f = flags; *f != '\0'; f++) + int_flags |= make_flag(*f); + } + + /* build_disjuncts_for_exp() needs memory pools for efficiency. */ + Sentence dummy_sent = sentence_create("", dict); /* For memory pools. */ + dummy_sent->Disjunct_pool = pool_new(__func__, "Disjunct", + /*num_elements*/8192, sizeof(Disjunct), + /*zero_out*/false, /*align*/false, false); + dummy_sent->Connector_pool = pool_new(__func__, "Connector", + /*num_elements*/65536, sizeof(Connector), + /*zero_out*/true, /*align*/false, false); + + /* copy_Exp() needs an Exp memory pool. */ + Pool_desc *Exp_pool = pool_new(__func__, "Exp", /*num_elements*/256, + sizeof(Exp), /*zero_out*/false, + /*align*/false, /*exact*/false); + + dyn_str *s = dyn_str_new(); + dyn_strcat(s, "disjuncts:\n"); + for (; dn != NULL; dn = dn->right) + { + /* Use copy_Exp() to assign dialect cost. */ + Exp *e = copy_Exp(dn->exp, Exp_pool, opts); + Disjunct *d = build_disjuncts_for_exp(dummy_sent, e, dn->string, NULL, + max_cost, NULL); + unsigned int dnum0 = count_disjuncts(d); + d = eliminate_duplicate_disjuncts(d); + unsigned int dnum1 = count_disjuncts(d); + + dyn_str *dyn_pdl = dyn_str_new(); + dyn_print_disjunct_list(dyn_pdl, d, int_flags); + char *dliststr = dyn_str_take(dyn_pdl); + + pool_reuse(Exp_pool); + pool_reuse(dummy_sent->Disjunct_pool); + pool_reuse(dummy_sent->Connector_pool); + + /* Count number of disjuncts with tunnel connectors. */ + unsigned int tnum = 0; + for (const char *p = dliststr; *p != '\0'; p++) + if ((p[0] == ' ') && (p[1] == 'x')) tnum++; + + unsigned int dnum_selected = 0; + dyn_str *selected = NULL; + char *dstr = dliststr; + char *end; + if (rn != NULL) + { + selected = dyn_str_new(); + + do + { + end = strchr(dstr, '\n'); + *end = '\0'; + if (match_regex(rn , dstr) != NULL) + { + dyn_strcat(selected, dstr); + dyn_strcat(selected, "\n"); + dnum_selected++; + } + + dstr = end + 1; + } while (*dstr != '\0'); + + free(dliststr); + dliststr = dyn_str_take(selected); + } + + append_string(s, " %s %u/%u disjuncts", dn->string, dnum1, dnum0); + if (tnum != 0) append_string(s, " (%u tunnels)", tnum); + dyn_strcat(s, "\n"); + dyn_strcat(s, dliststr); + dyn_strcat(s, "\n"); + free(dliststr); + + if (rn != NULL) + { + if (dnum_selected == dnum1) + dyn_strcat(s, "(all the disjuncts matched)\n\n"); + else + append_string(s, "(%u disjuncts matched)\n\n", dnum_selected); + } + } + pool_delete(Exp_pool); + sentence_delete(dummy_sent); + + return dyn_str_take(s); +} + /* ============= Connector encoding, sharing and packing ============= */ /* diff --git a/link-grammar/disjunct-utils.h b/link-grammar/disjunct-utils.h index d51866be3e..463785a3c2 100644 --- a/link-grammar/disjunct-utils.h +++ b/link-grammar/disjunct-utils.h @@ -79,6 +79,7 @@ void dyn_print_connector_list(dyn_str *s, Connector *, uint32_t); void print_disjunct_list(Disjunct *, uint32_t); void dyn_print_disjunct_list(dyn_str *s, Disjunct *, uint32_t); void print_all_disjuncts(Sentence); +char *display_disjuncts(Dictionary, const Dict_node *, const void **); /* Save and restore sentence disjuncts */ typedef struct diff --git a/link-grammar/print/print.c b/link-grammar/print/print.c index ce6547017e..5bb5d8e608 100644 --- a/link-grammar/print/print.c +++ b/link-grammar/print/print.c @@ -1258,24 +1258,24 @@ static const char * header(bool print_ps_header) /** * Print elements of the 2D-word-array produced for the parsers. * - * - print_sentence_word_alternatives(s, sent, false, NULL, tokenpos) + * - print_sentence_word_alternatives(s, sent, false, NULL, NULL, tokenpos) * If a pointer to struct "tokenpos" is given, return through it the index of * the first occurrence in the sentence of the given token. This is used to * prevent duplicate information display for repeated morphemes (if there are * multiples splits, each of several morphemes, otherwise some of them may * repeat). * - * - print_sentence_word_alternatives(s, sent, true, NULL, NULL) + * - print_sentence_word_alternatives(s, sent, true, NULL, NULL, NULL) * If debugprint is "true", this is a debug printout of the sentence. (The * debug printouts are with level 0 because this function is invoked for debug * on certain positive level.) * * - * - print_sentence_word_alternatives(s, sent, false, display_func, NULL) + * - print_sentence_word_alternatives(s, sent, false, display_func, arg, NULL) * Iterate over the sentence words and their alternatives. Handle each - * alternative using the display_func function if it is supplied, or else (if it - * is NULL) just print them. It is used to display disjunct information when - * command !!word is used. + * alternative using display_func(..., arg) if it is supplied, or else (if + * display_func is NULL) just print them. It is used to display disjunct + * information when command !!word is used. * FIXME In the current version (using Wordgraph) the "alternatives" in the * word-array don't necessarily consist of real word alternatives. * @@ -1288,8 +1288,10 @@ struct tokenpos /* First position of the given token - to prevent duplicates */ size_t ai; }; -void print_sentence_word_alternatives(dyn_str *s, Sentence sent, bool debugprint, - char * (*display)(Dictionary, const char *), struct tokenpos * tokenpos) +void print_sentence_word_alternatives(dyn_str *s, Sentence sent, + bool debugprint, + char * (*display)(Dictionary, const char *, const void **), + const void **arg, struct tokenpos *tokenpos) { size_t wi; /* Internal sentence word index */ size_t ai; /* Index of a word alternative */ @@ -1432,7 +1434,7 @@ void print_sentence_word_alternatives(dyn_str *s, Sentence sent, bool debugprint { struct tokenpos firstpos = { wt }; - print_sentence_word_alternatives(s, sent, false, NULL, &firstpos); + print_sentence_word_alternatives(s, sent, false, NULL, NULL, &firstpos); if (((firstpos.wi != wi) || (firstpos.ai != ai)) && firstpos.wi >= first_sentence_word) // allow !!LEFT_WORD { @@ -1463,7 +1465,7 @@ void print_sentence_word_alternatives(dyn_str *s, Sentence sent, bool debugprint * Display the features of the token. */ if ((NULL == tokenpos) && (NULL != display)) { - char *info = display(sent->dict, wt); + char *info = display(sent->dict, wt, arg); if (NULL == info) return; append_string(s, "Token \"%s\" ", wt); diff --git a/link-grammar/print/print.h b/link-grammar/print/print.h index 83f206e63c..a54e646e36 100644 --- a/link-grammar/print/print.h +++ b/link-grammar/print/print.h @@ -22,7 +22,8 @@ void print_disjunct_counts(Sentence sent); struct tokenpos; void print_sentence_word_alternatives(dyn_str *, Sentence, bool, - char * (*)(Dictionary, const char *), struct tokenpos *); + char * (*)(Dictionary, const char *, const void **), const void **arg, + struct tokenpos *); // Used for debug/error printing void print_sentence_context(Sentence, dyn_str*); diff --git a/link-grammar/tokenize/tokenize.c b/link-grammar/tokenize/tokenize.c index 08322c83e2..1486e57fe1 100644 --- a/link-grammar/tokenize/tokenize.c +++ b/link-grammar/tokenize/tokenize.c @@ -3395,7 +3395,7 @@ bool flatten_wordgraph(Sentence sent, Parse_Options opts) if (verbosity_level(D_SW)) { dyn_str *s = dyn_str_new(); - print_sentence_word_alternatives(s, sent, true, NULL, NULL); + print_sentence_word_alternatives(s, sent, true, NULL, NULL, NULL); char *out = dyn_str_take(s); prt_error("Debug: Sentence words and alternatives:\n%s", out); free(out); From c5a75cdb95bff72b932a70da7955bef51cb05a37 Mon Sep 17 00:00:00 2001 From: ampli Date: Sun, 19 Jan 2020 04:15:14 +0200 Subject: [PATCH 08/17] print_expression_parens(): Rewrite --- link-grammar/dict-common/print-dict.c | 179 ++++++++++---------------- 1 file changed, 71 insertions(+), 108 deletions(-) diff --git a/link-grammar/dict-common/print-dict.c b/link-grammar/dict-common/print-dict.c index 2002108197..85224334f8 100644 --- a/link-grammar/dict-common/print-dict.c +++ b/link-grammar/dict-common/print-dict.c @@ -52,155 +52,117 @@ static void print_expression_tag(Dictionary dict, dyn_str *e, const Exp *n) dyn_strcat(e, dict->dialect_tag.name[n->tag_id]); } -/** - * print the expression, in infix-style - */ -static dyn_str *print_expression_parens(Dictionary dict, dyn_str *e, - const Exp * n, int need_parens) +static void get_expression_cost(const Exp *e, unsigned int *icost, double *dcost) { - Exp *operand; - int i, icost; - double dcost; - - if (n == NULL) - { - dyn_strcat(e, "NULL expression"); - return e; - } - - if (n->cost < -cost_epsilon) + if (e->cost < -cost_epsilon) { - icost = 1; - dcost = n->cost; + *icost = 1; + *dcost = e->cost; } - else if (cost_eq(n->cost, 0.0)) + else if (cost_eq(e->cost, 0.0)) { /* avoid [X+]-0.00 */ - icost = 0; - dcost = 0; + *icost = 0; + *dcost = 0; } else { - icost = (int) (n->cost); - dcost = n->cost - icost; - if (dcost > cost_epsilon) + *icost = (int) (e->cost); + *dcost = e->cost - *icost; + if (*dcost > cost_epsilon) { - dcost = n->cost; - icost = 1; + *dcost = e->cost; + *icost = 1; } else { - if (icost > 4) + if (*icost > 4) { /* don't print too many [] levels */ - dcost = icost; - icost = 1; + *dcost = *icost; + *icost = 1; } else { - dcost = 0; + *dcost = 0; } } } +} +static bool is_expression_optional(const Exp *e) +{ + Exp *o = e->operand_first; + + return (e->type == OR_type) && (o != NULL) && (o->type == AND_type) && + (NULL == o->operand_first) && (o->cost == 0) && + (o->tag_type = Exptag_none); +} + +static void print_expression_parens(Dictionary dict, dyn_str *e, + const Exp *n, bool need_parens) +{ + unsigned int icost; + double dcost; + get_expression_cost(n, &icost, &dcost); + for (unsigned int i = 0; i < icost; i++) dyn_strcat(e, "["); if (Exptag_none != n->tag_type) dyn_strcat(e, "["); - /* print the connector only */ + const char *opr = NULL; + Exp *opd = n->operand_first; + if (n->type == CONNECTOR_type) { - for (i=0; imulti) dyn_strcat(e, "@"); - append_string(e, "%s%c", n->condesc?n->condesc->string:"(null)", n->dir); - for (i=0; icondesc ? n->condesc->string : "error-null-connector"); + dyn_strcat(e, (const char []){ n->dir, '\0' }); } - - operand = n->operand_first; - if (operand == NULL) - { - for (i=0; itype == OR_type) && operand && (operand->type == AND_type) && - operand->cost == 0 && (NULL == operand->operand_first)) + else if (is_expression_optional(n)) { dyn_strcat(e, "{"); - if (NULL == operand->operand_next) dyn_strcat(e, "error-no-next"); - else print_expression_parens(dict, e, operand->operand_next, false); - dyn_strcat(e, "}"); - for (i=0; itype == AND_type) && (operand->operand_next == NULL)) - { - for (i=0; itype == AND_type) dyn_strcat(e, " & "); - if (n->type == OR_type) dyn_strcat(e, " or "); - - /* print right side of binary expr */ - operand = operand->operand_next; - if (operand == NULL) - { - if (n->type == OR_type) - dyn_strcat(e, "error-no-next"); + if (NULL == opd->operand_next) + dyn_strcat(e, "error-no-next"); /* unary OR */ else - dyn_strcat(e, "()"); + print_expression_parens(dict, e, opd->operand_next, false); + dyn_strcat(e, "}"); } else { - do + if (n->type == AND_type) + opr = " & "; + else if (n->type == OR_type) + opr = " or "; + else + append_string(e, "error-exp-type-%d", (int)n->type); + + if (opr != NULL) { - if (operand->type == n->type) - { - print_expression_parens(dict, e, operand, false); - } - else - { - print_expression_parens(dict, e, operand, true); - } + /* (opd == NULL) means this is a null expression. */ + if (((icost == 0) && need_parens) || (opd == NULL)) dyn_strcat(e, "("); + + if ((opd == NULL) && (n->type == OR_type)) + dyn_strcat(e, "error-zeroary-or"); - operand = operand->operand_next; - if (operand != NULL) + for (Exp *l = opd; l != NULL; l = l->operand_next) { - if (n->type == AND_type) dyn_strcat(e, " & "); - if (n->type == OR_type) dyn_strcat(e, " or "); + print_expression_parens(dict, e, l, true); + + if (l->operand_next != NULL) + dyn_strcat(e, opr); + else if ((n->type == OR_type) && (l == n->operand_first)) + dyn_strcat(e, " or error-no-next"); /* unary OR */ } - } while (operand != NULL); - } - for (i=0; i Date: Sun, 19 Jan 2020 05:53:21 +0200 Subject: [PATCH 09/17] !!word: Display expressions with macro tags --- link-grammar/dict-common/dict-common.c | 2 + link-grammar/dict-common/dict-common.h | 1 + link-grammar/dict-common/dict-structures.h | 2 +- link-grammar/dict-common/print-dict.c | 67 ++++++++++++++++++---- link-grammar/dict-file/dictionary.c | 6 ++ link-grammar/dict-file/read-dict.c | 27 ++++++++- link-grammar/prepare/build-disjuncts.c | 62 +++++++++++++++++--- link-grammar/prepare/build-disjuncts.h | 2 +- 8 files changed, 147 insertions(+), 22 deletions(-) diff --git a/link-grammar/dict-common/dict-common.c b/link-grammar/dict-common/dict-common.c index 1af1af61ae..01ed00a42a 100644 --- a/link-grammar/dict-common/dict-common.c +++ b/link-grammar/dict-common/dict-common.c @@ -303,6 +303,8 @@ void dictionary_delete(Dictionary dict) free_dialect(dict->dialect); free(dict->dialect_tag.name); string_id_delete(dict->dialect_tag.set); + if (dict->macro_tag != NULL) free(dict->macro_tag->name); + free(dict->macro_tag); free((void *)dict->suppress_warning); free_regexs(dict->regex_root); diff --git a/link-grammar/dict-common/dict-common.h b/link-grammar/dict-common/dict-common.h index 89c3a5722b..dac23cf4df 100644 --- a/link-grammar/dict-common/dict-common.h +++ b/link-grammar/dict-common/dict-common.h @@ -94,6 +94,7 @@ struct Dictionary_s Dialect *dialect; /* "4.0.dialect" info */ expression_tag dialect_tag; /* Expression dialect tag info */ + expression_tag *macro_tag; /* Macro tags for expression debug */ /* Affixes are used during the tokenization stage. */ Dictionary affix_table; diff --git a/link-grammar/dict-common/dict-structures.h b/link-grammar/dict-common/dict-structures.h index 25970ef683..8074589528 100644 --- a/link-grammar/dict-common/dict-structures.h +++ b/link-grammar/dict-common/dict-structures.h @@ -38,7 +38,7 @@ static const int cost_max_dec_places = 3; static const double cost_epsilon = 1E-5; #define EXPTAG_SZ 100 /* Initial size for the Exptag array. */ -typedef enum { Exptag_none=0, Exptag_dialect } Exptag_type; +typedef enum { Exptag_none=0, Exptag_dialect, Exptag_macro } Exptag_type; /** * The Exp structure defined below comprises the expression trees that are diff --git a/link-grammar/dict-common/print-dict.c b/link-grammar/dict-common/print-dict.c index 85224334f8..106b603e58 100644 --- a/link-grammar/dict-common/print-dict.c +++ b/link-grammar/dict-common/print-dict.c @@ -44,12 +44,55 @@ const char *cost_stringify(double cost) return buf; } -static void print_expression_tag(Dictionary dict, dyn_str *e, const Exp *n) +#define MACRO_INDENTATION 4 + +static void print_expression_tag_start(Dictionary dict, dyn_str *e, const Exp *n, + int *indent) { - if ((NULL == dict) || (Exptag_none == n->tag_type)) return; + switch (n->tag_type) + { + case Exptag_none: + break; + case Exptag_dialect: + dyn_strcat(e, "["); + break; + case Exptag_macro: + dyn_strcat(e, "\n"); + for(int i = 0; i < *indent; i++) dyn_strcat(e, " "); + dyn_strcat(e, dict->macro_tag->name[n->tag_id]); + dyn_strcat(e, ": "); + *(indent) += MACRO_INDENTATION; + break; + default: + for(int i = 0; i < *indent; i++) dyn_strcat(e, " "); + append_string(e, "Unknown tag type %d: ", (int)n->tag_type); + *(indent) += MACRO_INDENTATION; + } +} - dyn_strcat(e, "]"); - dyn_strcat(e, dict->dialect_tag.name[n->tag_id]); +static void print_expression_tag_end(Dictionary dict, dyn_str *e, const Exp *n, + int *indent) +{ + if (NULL == dict) return; + + switch (n->tag_type) + { + case Exptag_none: + break; + case Exptag_dialect: + dyn_strcat(e, "]"); + dyn_strcat(e, dict->dialect_tag.name[n->tag_id]); + break; + case Exptag_macro: + dyn_strcat(e, "\n"); + for(int i = 0; i < *indent - MACRO_INDENTATION/2; i++) + dyn_strcat(e, " "); + (*indent) -= MACRO_INDENTATION; + break; + default: + /* Handled in print_expression_tag_start(). */ + ; + } } static void get_expression_cost(const Exp *e, unsigned int *icost, double *dcost) @@ -99,14 +142,15 @@ static bool is_expression_optional(const Exp *e) (o->tag_type = Exptag_none); } -static void print_expression_parens(Dictionary dict, dyn_str *e, - const Exp *n, bool need_parens) +static void print_expression_parens(Dictionary dict, dyn_str *e, const Exp *n, + bool need_parens, int *indent) + { unsigned int icost; double dcost; get_expression_cost(n, &icost, &dcost); for (unsigned int i = 0; i < icost; i++) dyn_strcat(e, "["); - if (Exptag_none != n->tag_type) dyn_strcat(e, "["); + print_expression_tag_start(dict, e, n, indent); const char *opr = NULL; Exp *opd = n->operand_first; @@ -123,7 +167,7 @@ static void print_expression_parens(Dictionary dict, dyn_str *e, if (NULL == opd->operand_next) dyn_strcat(e, "error-no-next"); /* unary OR */ else - print_expression_parens(dict, e, opd->operand_next, false); + print_expression_parens(dict, e, opd->operand_next, false, indent); dyn_strcat(e, "}"); } else @@ -145,7 +189,7 @@ static void print_expression_parens(Dictionary dict, dyn_str *e, for (Exp *l = opd; l != NULL; l = l->operand_next) { - print_expression_parens(dict, e, l, true); + print_expression_parens(dict, e, l, true, indent); if (l->operand_next != NULL) dyn_strcat(e, opr); @@ -159,13 +203,14 @@ static void print_expression_parens(Dictionary dict, dyn_str *e, for (unsigned int i = 0; i < icost; i++) dyn_strcat(e, "]"); if (dcost != 0) dyn_strcat(e, cost_stringify(dcost)); - print_expression_tag(dict, e, n); + print_expression_tag_end(dict, e, n, indent); } static const char *lg_exp_stringify_with_tags(Dictionary dict, const Exp *n) { static TLS char *e_str; + int indent = 0; if (e_str != NULL) free(e_str); if (n == NULL) @@ -175,7 +220,7 @@ static const char *lg_exp_stringify_with_tags(Dictionary dict, const Exp *n) } dyn_str *e = dyn_str_new(); - print_expression_parens(dict, e, n, false); + print_expression_parens(dict, e, n, false, &indent); e_str = dyn_str_take(e); return e_str; } diff --git a/link-grammar/dict-file/dictionary.c b/link-grammar/dict-file/dictionary.c index 7d84a81cfc..05cdd94922 100644 --- a/link-grammar/dict-file/dictionary.c +++ b/link-grammar/dict-file/dictionary.c @@ -141,6 +141,12 @@ dictionary_six_str(const char * lang, dict->dialect_tag.set = string_id_create(); condesc_init(dict, 1<<13); Exp_pool_size = 1<<13; + + if (test_enabled("macro-tag")) + { + dict->macro_tag = malloc(sizeof(*dict->macro_tag)); + memset(dict->macro_tag, 0, sizeof(*dict->macro_tag)); + } } else { diff --git a/link-grammar/dict-file/read-dict.c b/link-grammar/dict-file/read-dict.c index cf943263fc..43c36c082b 100644 --- a/link-grammar/dict-file/read-dict.c +++ b/link-grammar/dict-file/read-dict.c @@ -937,6 +937,29 @@ static Exp * make_dir_connector(Dictionary dict, int i) } /* ======================================================================== */ +/** + * Add an optional macro/word tag, for expression debugging. + * Enabled by !test="macro-tag". This tag is used only in expression printing. + */ +static unsigned int exptag_macro_add(Dictionary dict, const char *tag) +{ + expression_tag *mt = dict->macro_tag; + if (mt == NULL) return 0; + + if (mt->num == mt->size) + { + if (mt->num == 0) + mt->size = 128; + else + mt->size *= 2; + + mt->name = realloc(mt->name, mt->size * sizeof(*mt->name)); + } + mt->name[mt->num] = tag; + + return mt->num++; +} + /** * make_connector() -- make a node for a connector or dictionary word. * @@ -970,8 +993,10 @@ static Exp * make_connector(Dictionary dict) return NULL; } - /* Wrap it in a unary node as a placeholder for a cost if needed. */ + /* Wrap it in a unary node as a placeholder for a macro tag and cost. */ n = make_unary_node(dict->Exp_pool, dn->exp); + n->tag_id = exptag_macro_add(dict, dn->string); + if (n->tag_id != 0) n->tag_type = Exptag_macro; file_free_lookup(dn_head); } diff --git a/link-grammar/prepare/build-disjuncts.c b/link-grammar/prepare/build-disjuncts.c index 0c0abbec32..183c6a443e 100644 --- a/link-grammar/prepare/build-disjuncts.c +++ b/link-grammar/prepare/build-disjuncts.c @@ -16,6 +16,7 @@ #include "build-disjuncts.h" #include "connectors.h" #include "dict-common/dict-structures.h" // Exp_struct, lg_exp_stringify +#include "dict-common/dict-common.h" // Dictionary #include "disjunct-utils.h" #include "utilities.h" @@ -375,7 +376,7 @@ static const char *stringify_Exp_type(Exp_type type) } else { - snprintf(unknown_type, sizeof(unknown_type)-1, "unknown_type-%d", + snprintf(unknown_type, sizeof(unknown_type), "unknown_type-%d", (int)(type)); type_str = unknown_type; } @@ -383,6 +384,46 @@ static const char *stringify_Exp_type(Exp_type type) return type_str; } +static const char *stringify_Exp_tag(Exp *e, Dictionary dict) +{ + static TLS char tag_info[64]; + + switch (e->tag_type) + { + case Exptag_none: + return ""; + case Exptag_dialect: + if (dict == NULL) + { + snprintf(tag_info, sizeof(tag_info), " dialect_tag=%u", + e->tag_id); + } + else + { + snprintf(tag_info, sizeof(tag_info), " dialect_tag=%s", + dict->dialect_tag.name[e->tag_id]); + } + break; + case Exptag_macro: + if (dict == NULL) + { + snprintf(tag_info, sizeof(tag_info), " macro_tag"); + } + else + { + snprintf(tag_info, sizeof(tag_info), " macro_tag=%s", + dict->macro_tag->name[e->tag_id]); + } + break; + default: + snprintf(tag_info, sizeof(tag_info), " unknown_tag_type-%d", + (int)(e->tag_type)); + ; + } + + return tag_info; +} + static bool is_ASAN_uninitialized(uintptr_t a) { static const uintptr_t asan_uninitialized = (uintptr_t)0xbebebebebebebebeULL; @@ -390,7 +431,7 @@ static bool is_ASAN_uninitialized(uintptr_t a) return (a == asan_uninitialized); } -GNUC_UNUSED void prt_exp_mem(Exp *e, int i) +GNUC_UNUSED void prt_exp_all(Exp *e, int i, Dictionary dict) { if (is_ASAN_uninitialized((uintptr_t)e)) { @@ -419,20 +460,25 @@ GNUC_UNUSED void prt_exp_mem(Exp *e, int i) return; } } - printf(" (%d operand%s) cost=%s\n", operand_count, - operand_count == 1 ? "" : "s", cost_stringify(e->cost)); - + printf(" (%d operand%s) cost=%s%s\n", operand_count, + operand_count == 1 ? "" : "s", cost_stringify(e->cost), + stringify_Exp_tag(e, dict)); for (Exp *opd = e->operand_first; NULL != opd; opd = opd->operand_next) { - prt_exp_mem(opd, i+2); + prt_exp_all(opd, i+2, dict); } } else { - printf(" %s%s%c cost=%s\n", + printf(" %s%s%c cost=%s%s\n", e->multi ? "@" : "", e->condesc ? e->condesc->string : "(condesc=(null))", - e->dir, cost_stringify(e->cost)); + e->dir, cost_stringify(e->cost), stringify_Exp_tag(e, dict)); } } + +GNUC_UNUSED static void prt_exp_mem(Exp *e) +{ + prt_exp_all(e, 0, NULL); +} #endif /* DEBUG */ diff --git a/link-grammar/prepare/build-disjuncts.h b/link-grammar/prepare/build-disjuncts.h index da01f67446..5b82ed98cd 100644 --- a/link-grammar/prepare/build-disjuncts.h +++ b/link-grammar/prepare/build-disjuncts.h @@ -23,7 +23,7 @@ Disjunct *build_disjuncts_for_exp(Sentence sent, Exp *, const char *, #ifdef DEBUG void prt_exp(Exp *, int); -void prt_exp_mem(Exp *, int); +void prt_exp_all(Exp *, int, Dictionary); #endif /* DEBUG */ #endif /* _LINKGRAMMAR_BUILD_DISJUNCTS_H */ From 420c155cff56099c06fa6119ada3d9dc90445c0c Mon Sep 17 00:00:00 2001 From: ampli Date: Sun, 19 Jan 2020 07:03:48 +0200 Subject: [PATCH 10/17] Load dict macros by default; use !!/word/m to show expression macros --- link-grammar/dict-common/print-dict.c | 32 +++++++++++++++++---------- link-grammar/dict-file/dictionary.c | 2 +- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/link-grammar/dict-common/print-dict.c b/link-grammar/dict-common/print-dict.c index 106b603e58..1987da54e2 100644 --- a/link-grammar/dict-common/print-dict.c +++ b/link-grammar/dict-common/print-dict.c @@ -57,6 +57,7 @@ static void print_expression_tag_start(Dictionary dict, dyn_str *e, const Exp *n dyn_strcat(e, "["); break; case Exptag_macro: + if (*indent < 0) break; dyn_strcat(e, "\n"); for(int i = 0; i < *indent; i++) dyn_strcat(e, " "); dyn_strcat(e, dict->macro_tag->name[n->tag_id]); @@ -84,6 +85,7 @@ static void print_expression_tag_end(Dictionary dict, dyn_str *e, const Exp *n, dyn_strcat(e, dict->dialect_tag.name[n->tag_id]); break; case Exptag_macro: + if (*indent < 0) break; dyn_strcat(e, "\n"); for(int i = 0; i < *indent - MACRO_INDENTATION/2; i++) dyn_strcat(e, " "); @@ -207,10 +209,11 @@ static void print_expression_parens(Dictionary dict, dyn_str *e, const Exp *n, } -static const char *lg_exp_stringify_with_tags(Dictionary dict, const Exp *n) +static const char *lg_exp_stringify_with_tags(Dictionary dict, const Exp *n, + bool show_macros) { static TLS char *e_str; - int indent = 0; + int indent = show_macros ? 0 : -1; if (e_str != NULL) free(e_str); if (n == NULL) @@ -230,7 +233,7 @@ static const char *lg_exp_stringify_with_tags(Dictionary dict, const Exp *n) */ const char *lg_exp_stringify(const Exp *n) { - return lg_exp_stringify_with_tags(NULL, n); + return lg_exp_stringify_with_tags(NULL, n, false); } /* ================ Display word expressions / disjuncts ================= */ @@ -279,6 +282,7 @@ static char *display_word_split(Dictionary dict, Regex_node *rn = NULL; if (arg != NULL) { + carg[1] = arg[1]; /* flags */ if (arg[0] == &do_display_expr) { carg[0] = &do_display_expr; @@ -286,8 +290,6 @@ static char *display_word_split(Dictionary dict, else if (arg[0] != NULL) { /* A regex is specified, which means displaying disjuncts. */ - carg[1] = arg[1]; /* flags */ - if (arg[0][0] != '\0') { rn = malloc(sizeof(Regex_node)); @@ -397,7 +399,9 @@ static char *display_counts(const char *word, Dict_node *dn) static char *display_expr(Dictionary dict, const char *word, Dict_node *dn, const void **arg) { + const char *flags = arg[1]; const Parse_Options opts = (Parse_Options)arg[2]; + bool show_macros = ((flags != NULL) && (strchr(flags, 'm') != NULL)); /* copy_Exp() needs an Exp memory pool. */ Pool_desc *Exp_pool = pool_new(__func__, "Exp", /*num_elements*/256, @@ -411,7 +415,7 @@ static char *display_expr(Dictionary dict, const char *word, Dict_node *dn, Exp *e = copy_Exp(dn->exp, Exp_pool, opts); /* assign dialect costs */ pool_reuse(Exp_pool); - const char *expstr = lg_exp_stringify_with_tags(dict, e); + const char *expstr = lg_exp_stringify_with_tags(dict, e, show_macros); append_string(s, " %-*s %s", display_width(DJ_COL_WIDTH, dn->string), dn->string, @@ -494,9 +498,9 @@ static char *display_word_expr(Dictionary dict, const char *word, } /** - * Break word/re/flags into components. - * /regex/ and flags are optional. - * \p re and \p flags can be both NULL; + * Break "word", "word/flags" or "word/regex/flags" into components. + * "regex" and "flags" are optional. "word/" means an empty regex. + * \p re and \p flags can be both NULL. * @param re[out] the regex component, unless \c NULL. * @param flags[out] the flags component, unless \c NULL. * @return The word component. @@ -513,12 +517,16 @@ static const char *display_word_extract(char *word, const char **re, if (re != NULL) { - *re = r + 1; - char *f = strchr(*re, '/'); + char *f = strchr(r + 1, '/'); if (f != NULL) { + *re = r + 1; *f = '\0'; - *flags = f + 1; + *flags = f + 1; /* disjunct display flags */ + } + else + { + *flags = r + 1; /* expression display flags */ } } return word; diff --git a/link-grammar/dict-file/dictionary.c b/link-grammar/dict-file/dictionary.c index 05cdd94922..dbf035ece7 100644 --- a/link-grammar/dict-file/dictionary.c +++ b/link-grammar/dict-file/dictionary.c @@ -142,7 +142,7 @@ dictionary_six_str(const char * lang, condesc_init(dict, 1<<13); Exp_pool_size = 1<<13; - if (test_enabled("macro-tag")) + if (!test_enabled("no-macro-tag")) { dict->macro_tag = malloc(sizeof(*dict->macro_tag)); memset(dict->macro_tag, 0, sizeof(*dict->macro_tag)); From c4e1247cf31ce7f32ca9250001e28981c7752397 Mon Sep 17 00:00:00 2001 From: ampli Date: Sun, 19 Jan 2020 07:22:06 +0200 Subject: [PATCH 11/17] prt_exp_mem(): Move it and related functions to print-dict.c --- link-grammar/dict-common/dict-utils.h | 4 + link-grammar/dict-common/print-dict.c | 140 +++++++++++++++++++++++++ link-grammar/prepare/build-disjuncts.c | 137 ------------------------ link-grammar/prepare/build-disjuncts.h | 6 -- 4 files changed, 144 insertions(+), 143 deletions(-) diff --git a/link-grammar/dict-common/dict-utils.h b/link-grammar/dict-common/dict-utils.h index e94bd1eb0a..c395bbfd4b 100644 --- a/link-grammar/dict-common/dict-utils.h +++ b/link-grammar/dict-common/dict-utils.h @@ -21,6 +21,10 @@ void free_Exp(Exp *); int size_of_expression(Exp *); Exp * copy_Exp(Exp *, Pool_desc *, Parse_Options); bool is_exp_like_empty_word(Dictionary dict, Exp *); +void prt_exp_all(Exp *, int, Dictionary); +#ifdef DEBUG +void prt_exp(Exp *, int); +#endif /* DEBUG */ /* X_node utilities ... */ X_node * catenate_X_nodes(X_node *, X_node *); diff --git a/link-grammar/dict-common/print-dict.c b/link-grammar/dict-common/print-dict.c index 1987da54e2..dfd6c8c57e 100644 --- a/link-grammar/dict-common/print-dict.c +++ b/link-grammar/dict-common/print-dict.c @@ -23,6 +23,7 @@ #include "print/print.h" #include "print/print-util.h" #include "regex-morph.h" +#include "utilities.h" // GNU_UNUSED /* ======================================================================== */ @@ -236,6 +237,145 @@ const char *lg_exp_stringify(const Exp *n) return lg_exp_stringify_with_tags(NULL, n, false); } +#ifdef DEBUG +/* There is a much better lg_exp_stringify() elsewhere + * This one is for low-level debug. */ +GNUC_UNUSED void prt_exp(Exp *e, int i) +{ + if (e == NULL) return; + + for(int j =0; jtype, e->dir, e->multi, cost_stringify(e->cost)); + if (e->type != CONNECTOR_type) + { + for (e = e->operand_next; e != NULL; e = e->operand_next) prt_exp(e, i+2); + } + else + { + for(int j =0; jcondesc->string); + } +} +#endif + +static const char *stringify_Exp_type(Exp_type type) +{ + static TLS char unknown_type[32] = ""; + const char *type_str; + + if (type > 0 && type <= 3) + { + type_str = ((const char *[]) {"OR", "AND", "CONNECTOR"}) [type-1]; + } + else + { + snprintf(unknown_type, sizeof(unknown_type), "unknown_type-%d", + (int)(type)); + type_str = unknown_type; + } + + return type_str; +} + +static const char *stringify_Exp_tag(Exp *e, Dictionary dict) +{ + static TLS char tag_info[64]; + + switch (e->tag_type) + { + case Exptag_none: + return ""; + case Exptag_dialect: + if (dict == NULL) + { + snprintf(tag_info, sizeof(tag_info), " dialect_tag=%u", + e->tag_id); + } + else + { + snprintf(tag_info, sizeof(tag_info), " dialect_tag=%s", + dict->dialect_tag.name[e->tag_id]); + } + break; + case Exptag_macro: + if (dict == NULL) + { + snprintf(tag_info, sizeof(tag_info), " macro_tag"); + } + else + { + snprintf(tag_info, sizeof(tag_info), " macro_tag=%s", + dict->macro_tag->name[e->tag_id]); + } + break; + default: + snprintf(tag_info, sizeof(tag_info), " unknown_tag_type-%d", + (int)(e->tag_type)); + ; + } + + return tag_info; +} + +static bool is_ASAN_uninitialized(uintptr_t a) +{ + static const uintptr_t asan_uninitialized = (uintptr_t)0xbebebebebebebebeULL; + + return (a == asan_uninitialized); +} + +void prt_exp_all(Exp *e, int i, Dictionary dict) +{ + if (is_ASAN_uninitialized((uintptr_t)e)) + { + printf ("e=UNINITIALIZED\n"); + return; + } + if (e == NULL) return; + + for(int j =0; jtype)); + + if (is_ASAN_uninitialized((uintptr_t)e->operand_first)) + printf(" (UNINITIALIZED operand_first)"); + if (is_ASAN_uninitialized((uintptr_t)e->operand_next)) + printf(" (UNINITIALIZED operand_next)"); + + if (e->type != CONNECTOR_type) + { + int operand_count = 0; + for (Exp *opd = e->operand_first; NULL != opd; opd = opd->operand_next) + { + operand_count++; + if (is_ASAN_uninitialized((uintptr_t)opd->operand_next)) + { + printf(" (operand %d: UNINITIALIZED operand_next)\n", operand_count); + return; + } + } + printf(" (%d operand%s) cost=%s%s\n", operand_count, + operand_count == 1 ? "" : "s", cost_stringify(e->cost), + stringify_Exp_tag(e, dict)); + for (Exp *opd = e->operand_first; NULL != opd; opd = opd->operand_next) + { + prt_exp_all(opd, i+2, dict); + } + } + else + { + printf(" %s%s%c cost=%s%s\n", + e->multi ? "@" : "", + e->condesc ? e->condesc->string : "(condesc=(null))", + e->dir, cost_stringify(e->cost), stringify_Exp_tag(e, dict)); + } +} + +GNUC_UNUSED static void prt_exp_mem(Exp *e) +{ + prt_exp_all(e, 0, NULL); +} + /* ================ Display word expressions / disjuncts ================= */ const char do_display_expr; /* a sentinel to request an expression display */ diff --git a/link-grammar/prepare/build-disjuncts.c b/link-grammar/prepare/build-disjuncts.c index 183c6a443e..f75fa813dc 100644 --- a/link-grammar/prepare/build-disjuncts.c +++ b/link-grammar/prepare/build-disjuncts.c @@ -344,141 +344,4 @@ GNUC_UNUSED static void print_clause_list(Clause * c) printf("\n"); } } - -/* There is a much better lg_exp_stringify() elsewhere - * This one is for low-level debug. */ -GNUC_UNUSED void prt_exp(Exp *e, int i) -{ - if (e == NULL) return; - - for(int j =0; jtype, e->dir, e->multi, cost_stringify(e->cost)); - if (e->type != CONNECTOR_type) - { - for (e = e->operand_next; e != NULL; e = e->operand_next) prt_exp(e, i+2); - } - else - { - for(int j =0; jcondesc->string); - } -} - -static const char *stringify_Exp_type(Exp_type type) -{ - static TLS char unknown_type[32] = ""; - const char *type_str; - - if (type > 0 && type <= 3) - { - type_str = ((const char *[]) {"OR", "AND", "CONNECTOR"}) [type-1]; - } - else - { - snprintf(unknown_type, sizeof(unknown_type), "unknown_type-%d", - (int)(type)); - type_str = unknown_type; - } - - return type_str; -} - -static const char *stringify_Exp_tag(Exp *e, Dictionary dict) -{ - static TLS char tag_info[64]; - - switch (e->tag_type) - { - case Exptag_none: - return ""; - case Exptag_dialect: - if (dict == NULL) - { - snprintf(tag_info, sizeof(tag_info), " dialect_tag=%u", - e->tag_id); - } - else - { - snprintf(tag_info, sizeof(tag_info), " dialect_tag=%s", - dict->dialect_tag.name[e->tag_id]); - } - break; - case Exptag_macro: - if (dict == NULL) - { - snprintf(tag_info, sizeof(tag_info), " macro_tag"); - } - else - { - snprintf(tag_info, sizeof(tag_info), " macro_tag=%s", - dict->macro_tag->name[e->tag_id]); - } - break; - default: - snprintf(tag_info, sizeof(tag_info), " unknown_tag_type-%d", - (int)(e->tag_type)); - ; - } - - return tag_info; -} - -static bool is_ASAN_uninitialized(uintptr_t a) -{ - static const uintptr_t asan_uninitialized = (uintptr_t)0xbebebebebebebebeULL; - - return (a == asan_uninitialized); -} - -GNUC_UNUSED void prt_exp_all(Exp *e, int i, Dictionary dict) -{ - if (is_ASAN_uninitialized((uintptr_t)e)) - { - printf ("e=UNINITIALIZED\n"); - return; - } - if (e == NULL) return; - - for(int j =0; jtype)); - - if (is_ASAN_uninitialized((uintptr_t)e->operand_first)) - printf(" (UNINITIALIZED operand_first)"); - if (is_ASAN_uninitialized((uintptr_t)e->operand_next)) - printf(" (UNINITIALIZED operand_next)"); - - if (e->type != CONNECTOR_type) - { - int operand_count = 0; - for (Exp *opd = e->operand_first; NULL != opd; opd = opd->operand_next) - { - operand_count++; - if (is_ASAN_uninitialized((uintptr_t)opd->operand_next)) - { - printf(" (operand %d: UNINITIALIZED operand_next)\n", operand_count); - return; - } - } - printf(" (%d operand%s) cost=%s%s\n", operand_count, - operand_count == 1 ? "" : "s", cost_stringify(e->cost), - stringify_Exp_tag(e, dict)); - for (Exp *opd = e->operand_first; NULL != opd; opd = opd->operand_next) - { - prt_exp_all(opd, i+2, dict); - } - } - else - { - printf(" %s%s%c cost=%s%s\n", - e->multi ? "@" : "", - e->condesc ? e->condesc->string : "(condesc=(null))", - e->dir, cost_stringify(e->cost), stringify_Exp_tag(e, dict)); - } -} - -GNUC_UNUSED static void prt_exp_mem(Exp *e) -{ - prt_exp_all(e, 0, NULL); -} #endif /* DEBUG */ diff --git a/link-grammar/prepare/build-disjuncts.h b/link-grammar/prepare/build-disjuncts.h index 5b82ed98cd..6e0732db4a 100644 --- a/link-grammar/prepare/build-disjuncts.h +++ b/link-grammar/prepare/build-disjuncts.h @@ -20,10 +20,4 @@ Disjunct *build_disjuncts_for_exp(Sentence sent, Exp *, const char *, const gword_set *, double cost_cutoff, Parse_Options opts); - -#ifdef DEBUG -void prt_exp(Exp *, int); -void prt_exp_all(Exp *, int, Dictionary); -#endif /* DEBUG */ - #endif /* _LINKGRAMMAR_BUILD_DISJUNCTS_H */ From 49ab28c2a4a0598f688abe2d362fc9be8a81cee2 Mon Sep 17 00:00:00 2001 From: ampli Date: Sun, 19 Jan 2020 07:35:11 +0200 Subject: [PATCH 12/17] prt_exp_mem(): Convert to use dyn_str --- link-grammar/dict-common/dict-utils.h | 3 ++- link-grammar/dict-common/print-dict.c | 35 ++++++++++++++++----------- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/link-grammar/dict-common/dict-utils.h b/link-grammar/dict-common/dict-utils.h index c395bbfd4b..43a9828091 100644 --- a/link-grammar/dict-common/dict-utils.h +++ b/link-grammar/dict-common/dict-utils.h @@ -15,13 +15,14 @@ #define _DICT_UTILS_H_ #include "dict-common.h" +#include "utilities.h" // dyn_str /* Exp utilities ... */ void free_Exp(Exp *); int size_of_expression(Exp *); Exp * copy_Exp(Exp *, Pool_desc *, Parse_Options); bool is_exp_like_empty_word(Dictionary dict, Exp *); -void prt_exp_all(Exp *, int, Dictionary); +void prt_exp_all(dyn_str *,Exp *, int, Dictionary); #ifdef DEBUG void prt_exp(Exp *, int); #endif /* DEBUG */ diff --git a/link-grammar/dict-common/print-dict.c b/link-grammar/dict-common/print-dict.c index dfd6c8c57e..f5152b0ed2 100644 --- a/link-grammar/dict-common/print-dict.c +++ b/link-grammar/dict-common/print-dict.c @@ -325,22 +325,22 @@ static bool is_ASAN_uninitialized(uintptr_t a) return (a == asan_uninitialized); } -void prt_exp_all(Exp *e, int i, Dictionary dict) +void prt_exp_all(dyn_str *s, Exp *e, int i, Dictionary dict) { if (is_ASAN_uninitialized((uintptr_t)e)) { - printf ("e=UNINITIALIZED\n"); + dyn_strcat(s, "e=UNINITIALIZED\n"); return; } if (e == NULL) return; - for(int j =0; jtype)); + for(int j =0; jtype)); if (is_ASAN_uninitialized((uintptr_t)e->operand_first)) - printf(" (UNINITIALIZED operand_first)"); + dyn_strcat(s, " (UNINITIALIZED operand_first)"); if (is_ASAN_uninitialized((uintptr_t)e->operand_next)) - printf(" (UNINITIALIZED operand_next)"); + dyn_strcat(s, " (UNINITIALIZED operand_next)"); if (e->type != CONNECTOR_type) { @@ -350,30 +350,37 @@ void prt_exp_all(Exp *e, int i, Dictionary dict) operand_count++; if (is_ASAN_uninitialized((uintptr_t)opd->operand_next)) { - printf(" (operand %d: UNINITIALIZED operand_next)\n", operand_count); + append_string(s, " (operand %d: UNINITIALIZED operand_next)\n", + operand_count); return; } } - printf(" (%d operand%s) cost=%s%s\n", operand_count, + append_string(s, " (%d operand%s) cost=%s%s\n", operand_count, operand_count == 1 ? "" : "s", cost_stringify(e->cost), stringify_Exp_tag(e, dict)); for (Exp *opd = e->operand_first; NULL != opd; opd = opd->operand_next) { - prt_exp_all(opd, i+2, dict); + prt_exp_all(s, opd, i+2, dict); } } else { - printf(" %s%s%c cost=%s%s\n", - e->multi ? "@" : "", - e->condesc ? e->condesc->string : "(condesc=(null))", - e->dir, cost_stringify(e->cost), stringify_Exp_tag(e, dict)); + append_string(s, " %s%s%c cost=%s%s\n", + e->multi ? "@" : "", + e->condesc ? e->condesc->string : "(condesc=(null))", + e->dir, cost_stringify(e->cost), + stringify_Exp_tag(e, dict)); } } GNUC_UNUSED static void prt_exp_mem(Exp *e) { - prt_exp_all(e, 0, NULL); + dyn_str *s = dyn_str_new(); + + prt_exp_all(s, e, 0, NULL); + char *e_str = dyn_str_take(s); + printf("%s", e_str); + free(e_str); } /* ================ Display word expressions / disjuncts ================= */ From 88531a92b06fc0ad0a6f447c3694aad7fddbca7d Mon Sep 17 00:00:00 2001 From: ampli Date: Sun, 19 Jan 2020 08:06:30 +0200 Subject: [PATCH 13/17] !!word/l: Print low-level expression memory --- link-grammar/dict-common/print-dict.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/link-grammar/dict-common/print-dict.c b/link-grammar/dict-common/print-dict.c index f5152b0ed2..c793520708 100644 --- a/link-grammar/dict-common/print-dict.c +++ b/link-grammar/dict-common/print-dict.c @@ -549,6 +549,7 @@ static char *display_expr(Dictionary dict, const char *word, Dict_node *dn, const char *flags = arg[1]; const Parse_Options opts = (Parse_Options)arg[2]; bool show_macros = ((flags != NULL) && (strchr(flags, 'm') != NULL)); + bool low_level = ((flags != NULL) && (strchr(flags, 'l') != NULL)); /* copy_Exp() needs an Exp memory pool. */ Pool_desc *Exp_pool = pool_new(__func__, "Exp", /*num_elements*/256, @@ -562,6 +563,13 @@ static char *display_expr(Dictionary dict, const char *word, Dict_node *dn, Exp *e = copy_Exp(dn->exp, Exp_pool, opts); /* assign dialect costs */ pool_reuse(Exp_pool); + if (low_level) + { + append_string(s, " %s\n", dn->string); + prt_exp_all(s, e, 0, dict); + dyn_strcat(s, "\n\n"); + } + const char *expstr = lg_exp_stringify_with_tags(dict, e, show_macros); append_string(s, " %-*s %s", From e02be1ed849b1d307ce965b4f2b90c0a36a8d6c0 Mon Sep 17 00:00:00 2001 From: ampli Date: Mon, 6 Jan 2020 17:58:02 +0200 Subject: [PATCH 14/17] ChangeLog: Update on adding !!word/ --- ChangeLog | 1 + 1 file changed, 1 insertion(+) diff --git a/ChangeLog b/ChangeLog index 8df5e19674..e4ff641ed2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -9,6 +9,7 @@ Version 5.8.0 (XXX 2020) * English dict: support for archaic/poetic abbreviations * English dict: introduce OH link for vocatives/invocations. * English dict: improved parsing of imperatives. + * Add !!word/ link-parser command for displaying extended word dict info. Version 5.7.0 (13 Sept 2019) * Minor efficiency improvements to the SQL-backed dictionary. From f30b5ca3c0ebc55661409b8d8205f58f24e4fb24 Mon Sep 17 00:00:00 2001 From: ampli Date: Sun, 19 Jan 2020 09:15:45 +0200 Subject: [PATCH 15/17] command-help-en.txt: Add help info for the !! command --- data/command-help-en.txt | 58 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/data/command-help-en.txt b/data/command-help-en.txt index 11e11aed0c..2338123643 100644 --- a/data/command-help-en.txt +++ b/data/command-help-en.txt @@ -305,3 +305,61 @@ Examples: !dialect=irish !dialect=irish,headline !dialect=instructions,bad-spelling:2.2 + +[!] +This command is for debugging the dictionary or the library. +It gets as an argument a word, and optionally a regex and flags. +It splits the given word to tokens according to the current language, +and for each token it prints its matching dictionary words along with its +expression or disjunct list. The word may include a wildcard * to find +multiple matches, and a subscript can be used to limit the matches to this +subscript only. + +Examples ("test.n" is an example word): + +Show the expression: + !!test.n + +Show the expression using macro tags: + !!test.n/m +Each macro tag is followed by its content on the same line. +The other lines are direct expression components (before and after a macro). + +Show also low-level memory details of the expression: + !!test.n/l + +Show the disjuncts (without duplicates): + !!test.n// + +Show selected disjuncts according to the supplied regex: + !!test.n/ Wd .*<-->.*@M\b/ + +Display all the words that start with "test": + !!test* + +Display all the words that start with "test" and have subscript ".q": + !!test*.q + +A sample output of a disjunct-list display: + Token "test.n" matches: + test.n 8509 disjuncts + + Token "test.n" disjuncts: + test.n 4273/4501 disjuncts + + ... + test.n: [4070]1.500= Wd @hCO Ds**c <--> Ss*s @M NM + ... + +In the this sample output: + 8509 Number of disjuncts in the dictionary expression. + 4501 Number of disjuncts after applying cost-max. + 4273 Number of disjuncts w/o duplicates. + 4070 Disjunct ordinal number. + 1.500 Disjunct cost. + = A separator to enable regex anchoring. + <--> A separator of the "-" (LHS) and "+" (RHS) connector lists. + +These variables affect the output: +Disjuncts, expressions: !dialect +Disjuncts only: !cost-max From b9de7bdc3519796dbe52e46c06bb56f6b5b0b4ee Mon Sep 17 00:00:00 2001 From: ampli Date: Sun, 19 Jan 2020 09:31:48 +0200 Subject: [PATCH 16/17] command-line.c: Add '!' as a command and add !help text --- link-parser/command-line.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/link-parser/command-line.c b/link-parser/command-line.c index 1d5b577b0e..54d50db227 100644 --- a/link-parser/command-line.c +++ b/link-parser/command-line.c @@ -62,6 +62,7 @@ static int variables_cmd(const Switch*, int); static int file_cmd(const Switch*, int); static int help_cmd(const Switch*, int); static int exit_cmd(const Switch*, int); +static int info_cmd(const Switch*, int); Switch default_switches[] = { @@ -103,6 +104,7 @@ Switch default_switches[] = {"help", Cmd, "List the commands and what they do", help_cmd}, {"quit", Cmd, UNDOC "Exit the program", exit_cmd}, {"variables", Cmd, "List user-settable variables and their functions", variables_cmd}, + {"!", Cmd, UNDOC "Print information on dictionary words", info_cmd}, {NULL, Cmd, NULL, NULL} }; @@ -579,6 +581,7 @@ static int help_cmd(const Switch *uc, int n) printf("\n"); printf(" !! Print all the dictionary words that match .\n"); printf(" A wildcard * may be used to find multiple matches.\n"); + printf(" Issue \"!help !\" for more details.\n"); printf("\n"); printf(" ! Toggle the specified Boolean variable.\n"); printf(" != Assign that value to that variable.\n"); @@ -619,6 +622,13 @@ static int file_cmd(const Switch *uc, int n) return 'f'; } +static int info_cmd(const Switch *uc, int n) +{ + /* Dummy definition - the work is done done in + * x_issue_special_command() (see '!' there). */ + return 'c'; +} + static int x_issue_special_command(char * line, Command_Options *copts, Dictionary dict) { char *s, *x, *y; From 2e78a98d8d9d40807ac3d657ee7bc3c88ba9c649 Mon Sep 17 00:00:00 2001 From: ampli Date: Sun, 19 Jan 2020 09:54:15 +0200 Subject: [PATCH 17/17] !!: A hack for a clean display Don't split it. --- link-grammar/dict-common/print-dict.c | 75 +++++++++++++++----------- link-grammar/tokenize/tok-structures.h | 1 - link-grammar/tokenize/tokenize.c | 8 +++ link-grammar/tokenize/tokenize.h | 1 + 4 files changed, 52 insertions(+), 33 deletions(-) diff --git a/link-grammar/dict-common/print-dict.c b/link-grammar/dict-common/print-dict.c index c793520708..1ecd6916a6 100644 --- a/link-grammar/dict-common/print-dict.c +++ b/link-grammar/dict-common/print-dict.c @@ -23,6 +23,7 @@ #include "print/print.h" #include "print/print-util.h" #include "regex-morph.h" +#include "tokenize/tokenize.h" // word_add #include "utilities.h" // GNU_UNUSED /* ======================================================================== */ @@ -417,48 +418,58 @@ static char *display_word_split(Dictionary dict, int spell_option = parse_options_get_spell_guess(opts); parse_options_set_spell_guess(opts, 0); sent = sentence_create(pword, dict); - if (0 == sentence_split(sent, opts)) + + if (pword[0] == '<' && pword[strlen(pword)-1] == '>') + { + /* Dictionary macro - don't split. */ + if (!word0_set(sent, pword, opts)) goto display_word_split_error; + } + else { - /* List the splits */ - print_sentence_word_alternatives(s, sent, false, NULL, NULL, NULL); - /* List the expression / disjunct information */ + if (0 != sentence_split(sent, opts)) goto display_word_split_error; + } + + /* List the splits */ + print_sentence_word_alternatives(s, sent, false, NULL, NULL, NULL); + /* List the expression / disjunct information */ - /* Initialize the callback arguments */ - const void *carg[3] = { /*regex*/NULL, /*flags*/NULL, opts }; + /* Initialize the callback arguments */ + const void *carg[3] = { /*regex*/NULL, /*flags*/NULL, opts }; - Regex_node *rn = NULL; - if (arg != NULL) + Regex_node *rn = NULL; + if (arg != NULL) + { + carg[1] = arg[1]; /* flags */ + if (arg[0] == &do_display_expr) { - carg[1] = arg[1]; /* flags */ - if (arg[0] == &do_display_expr) - { - carg[0] = &do_display_expr; - } - else if (arg[0] != NULL) + carg[0] = &do_display_expr; + } + else if (arg[0] != NULL) + { + /* A regex is specified, which means displaying disjuncts. */ + if (arg[0][0] != '\0') { - /* A regex is specified, which means displaying disjuncts. */ - if (arg[0][0] != '\0') + rn = malloc(sizeof(Regex_node)); + rn->name = strdup("Disjunct regex"); + rn->pattern = strdup(arg[0]); + rn->re = NULL; + rn->neg = false; + rn->next = NULL; + + if (compile_regexs(rn, NULL) != 0) { - rn = malloc(sizeof(Regex_node)); - rn->name = strdup("Disjunct regex"); - rn->pattern = strdup(arg[0]); - rn->re = NULL; - rn->neg = false; - rn->next = NULL; - - if (compile_regexs(rn, NULL) != 0) - { - prt_error("Error: Failed to compile regex \"%s\".\n", arg[0]); - return strdup(""); /* not NULL (NULL means no dict entry) */ - } - - carg[0] = rn; + prt_error("Error: Failed to compile regex \"%s\".\n", arg[0]); + return strdup(""); /* not NULL (NULL means no dict entry) */ } + + carg[0] = rn; } } - print_sentence_word_alternatives(s, sent, false, display, carg, NULL); - if (rn != NULL) free_regexs(rn); } + print_sentence_word_alternatives(s, sent, false, display, carg, NULL); + if (rn != NULL) free_regexs(rn); + +display_word_split_error: sentence_delete(sent); parse_options_set_spell_guess(opts, spell_option); diff --git a/link-grammar/tokenize/tok-structures.h b/link-grammar/tokenize/tok-structures.h index be337d26b2..b96c2f91ac 100644 --- a/link-grammar/tokenize/tok-structures.h +++ b/link-grammar/tokenize/tok-structures.h @@ -183,5 +183,4 @@ struct Wordgraph_pathpos_s /* Only for sane_morphism(). */ const Gword **path; /* Linkage candidate wordgraph path */ }; - #endif diff --git a/link-grammar/tokenize/tokenize.c b/link-grammar/tokenize/tokenize.c index 1486e57fe1..cf0e6f96f0 100644 --- a/link-grammar/tokenize/tokenize.c +++ b/link-grammar/tokenize/tokenize.c @@ -2971,6 +2971,14 @@ static Word *word_new(Sentence sent) return &sent->word[len]; } +/* Used only by display_word_split() for words that shouldn't get split. */ +bool word0_set(Sentence sent, char *w, Parse_Options opts) +{ + word_new(sent); + altappend(sent, &sent->word[0].alternatives, w); + return setup_dialect(sent->dict, opts); +} + /** * build_word_expressions() -- build list of expressions for a word. * diff --git a/link-grammar/tokenize/tokenize.h b/link-grammar/tokenize/tokenize.h index 204bcc4dee..5a54013044 100644 --- a/link-grammar/tokenize/tokenize.h +++ b/link-grammar/tokenize/tokenize.h @@ -23,6 +23,7 @@ void wordgraph_delete(Sentence); void tokenization_done(Sentence, Gword *); void altappend(Sentence, const char ***, const char *); +bool word0_set(Sentence, char *, Parse_Options); Gword *issue_word_alternative(Sentence sent, Gword *unsplit_word, const char *label,