diff --git a/data/command-help-en.txt b/data/command-help-en.txt index 2338123643..81c708df2e 100644 --- a/data/command-help-en.txt +++ b/data/command-help-en.txt @@ -308,7 +308,7 @@ Examples: [!] This command is for debugging the dictionary or the library. -It gets as an argument a word, and optionally a regex and flags. +It gets as an argument a word, and optionally a regex and/or flags. It splits the given word to tokens according to the current language, and for each token it prints its matching dictionary words along with its expression or disjunct list. The word may include a wildcard * to find @@ -331,8 +331,20 @@ Show also low-level memory details of the expression: Show the disjuncts (without duplicates): !!test.n// +Show disjunct connector expression source macros: + !!test.n//m + +The above command is more useful for a single disjunct (1234 is an example +for a disjunct number, see below for disjunct print format): + !!test.n/1234/m + Show selected disjuncts according to the supplied regex: - !!test.n/ Wd .*<-->.*@M\b/ + !!test.n/ Wd-.*<-->.*@M\+/ + !!test.n/ J[sk]- D[\w*]+c\-/ + +Show selected disjuncts according to the supplied string (supposing the regex +engine is PCRE, which supports "\Q"): + !!test.n/\Q Ds**x+/ Display all the words that start with "test": !!test* @@ -340,6 +352,7 @@ Display all the words that start with "test": Display all the words that start with "test" and have subscript ".q": !!test*.q + A sample output of a disjunct-list display: Token "test.n" matches: test.n 8509 disjuncts @@ -348,15 +361,15 @@ A sample output of a disjunct-list display: test.n 4273/4501 disjuncts ... - test.n: [4070]1.500= Wd @hCO Ds**c <--> Ss*s @M NM + test.n: [3493]2.600= @AN- @A- Ds**x- <--> NM+ R+ Bs+ Bsm+ ... In the this sample output: 8509 Number of disjuncts in the dictionary expression. 4501 Number of disjuncts after applying cost-max. 4273 Number of disjuncts w/o duplicates. - 4070 Disjunct ordinal number. - 1.500 Disjunct cost. + 3493 Disjunct ordinal number. + 2.600 Disjunct cost. = A separator to enable regex anchoring. <--> A separator of the "-" (LHS) and "+" (RHS) connector lists. diff --git a/link-grammar/connectors.h b/link-grammar/connectors.h index 67055ca171..a1b6c7b1b2 100644 --- a/link-grammar/connectors.h +++ b/link-grammar/connectors.h @@ -120,16 +120,17 @@ struct Connector_struct the power pruning. */ uint8_t prune_pass; /* Prune pass number (one bit could be enough) */ bool multi; /* TRUE if this is a multi-connector */ - int tracon_id; /* Tracon identifier (see disjunct-utils.c) */ + int32_t tracon_id; /* Tracon identifier (see disjunct-utils.c) */ const condesc_t *desc; Connector *next; union { const gword_set *originating_gword; /* Used while and after parsing */ - /* For pruning use only */ struct { - int refcount; /* Memory-sharing reference count */ + int32_t refcount;/* Memory-sharing reference count - for pruning. */ + uint16_t exp_pos; /* The position in the originating expression, + currently used only for debugging dict macros. */ bool shallow; /* TRUE if this is a shallow connector. * A connectors is shallow if it is the first in * its list on its disjunct. (It is deep if it is diff --git a/link-grammar/dict-common/print-dict.c b/link-grammar/dict-common/print-dict.c index 8464a65c62..56b6dceed6 100644 --- a/link-grammar/dict-common/print-dict.c +++ b/link-grammar/dict-common/print-dict.c @@ -20,12 +20,13 @@ #include "dict-file/read-dict.h" #include "dict-utils.h" // copy_Exp #include "disjunct-utils.h" +#include "prepare/build-disjuncts.h" // build_disjuncts_for_exp #include "print/print.h" #include "print/print-util.h" #include "regex-morph.h" #include "tokenize/tokenize.h" // word_add +#include "tokenize/word-structures.h" // Word_struct #include "utilities.h" // GNU_UNUSED - /* ======================================================================== */ bool cost_eq(double cost1, double cost2) @@ -88,7 +89,15 @@ static void print_expression_tag_end(Dictionary dict, dyn_str *e, const Exp *n, break; case Exptag_macro: if (*indent < 0) break; - dyn_strcat(e, "\n"); + /* The sole purpose of the checks before issuing "\n" is to prevent + * empty lines when printing connector macros w/o introducing a + * separate version of this function for connector macro printing. */ + if (dyn_strlen(e) > 0) + { + dyn_trimback(e); + if ((dyn_str_value(e)[dyn_strlen(e)-1]) != '\n') + dyn_strcat(e, "\n"); + } for(int i = 0; i < *indent - MACRO_INDENTATION/2; i++) dyn_strcat(e, " "); (*indent) -= MACRO_INDENTATION; @@ -211,6 +220,100 @@ static void print_expression_parens(Dictionary dict, dyn_str *e, const Exp *n, } +/** + * Find if the given connector is in the given expression. + * @param e Expression. + * @param find_pos Connector position to search in \p e. + * @param pos Temporary connector position while the search advances. + * Return \c true iff the connector is found. + */ +static bool exp_contains_connector(const Exp *e, int *pos, int find_pos) +{ + if (NULL == e) return false; + + if (CONNECTOR_type == e->type) + { +#if 0 + printf("exp_contains_connector: pos=%d C=%s%s%c %s\n", + *pos,e->multi?"@":"",e->condesc->string,e->dir, + (find_pos == *pos) ? "FOUND" : ""); +#endif + return (find_pos == (*pos)++); + } + + for(Exp *opd = e->operand_first; opd != NULL; opd = opd->operand_next) + { + if (exp_contains_connector(opd, pos, find_pos)) + return true; + } + + return false; +} + +typedef struct +{ + Dictionary dict; + dyn_str *e; + int indent; + int pos; /* current connector position in expression */ + int *find_pos; /* disjunct expression connectors positions */ + bool is_after_connector; /* an indicators for printing '&' */ +} cmacro_context; + +/** + * Print nested macros for each desired connector. + * The desired connector positions are in find_pos[] (ascending sorted, + * -1 terminated). Its first element holds the next position of the next + * connector to print, and is chopped after the connector is printed. + */ +static void print_connector_macros(cmacro_context *cmc, const Exp *n) +{ + if ((cmc->find_pos)[0] == -1) + return; /* fast termination when nothing more to do */ + + bool macro_started = false; + int current_pos = cmc->pos; + if ((Exptag_macro == n->tag_type) && + exp_contains_connector(n, ¤t_pos, (cmc->find_pos)[0])) + { + if (cmc->is_after_connector) + { + dyn_strcat(cmc->e, " & "); + cmc->is_after_connector = false; + } + print_expression_tag_start(cmc->dict, cmc->e, n, &cmc->indent); + macro_started = true; + } + + Exp *opd = n->operand_first; + + if (n->type == CONNECTOR_type) + { + if ((cmc->find_pos)[0] == cmc->pos) + { + if (cmc->is_after_connector) dyn_strcat(cmc->e, " & "); + cmc->is_after_connector = true; + if (n->multi) dyn_strcat(cmc->e, "@"); + dyn_strcat(cmc->e, + n->condesc ? n->condesc->string : "error-null-connector"); + dyn_strcat(cmc->e, (const char []){ n->dir, '\0' }); + cmc->find_pos++; /* each expression position is used only once */ + } + cmc->pos++; + } + else + { + for (Exp *l = opd; l != NULL; l = l->operand_next) + { + print_connector_macros(cmc, l); + } + } + + /* The -1 check is to suppress unneeded newlines at the end. */ + if (macro_started && ((cmc->find_pos)[0] != -1)) + print_expression_tag_end(cmc->dict, cmc->e, n, &cmc->indent); +} + static const char *lg_exp_stringify_with_tags(Dictionary dict, const Exp *n, bool show_macros) { @@ -384,10 +487,298 @@ GNUC_UNUSED static void prt_exp_mem(Exp *e) free(e_str); } +/* ================ Print disjuncts and connectors ============== */ +static bool is_flag(uint32_t flags, char flag) +{ + return (flags>>(flag-'a')) & 1; +} + +static uint32_t make_flag(char flag) +{ + return 1<<(flag-'a'); +} + +/* Print one connector with all the details. + * mCnameD{refcount}(nearest_word, length_limit)x + * Optional m: "@" for multi (else nothing). + * Cname: Connector name. + * Optional D: "-" / "+" (if dir != -1). + * Optional : (flag 't'). + * Optional [nearest_word, length_limit or farthest_word]: (flag 'l'). + * x: Shallow/deep indication as "s" / "d" (if shallow != -1) + */ +static void dyn_print_one_connector(dyn_str *s, Connector *e, int dir, + int shallow, uint32_t flags) +{ + if (e->multi) + dyn_strcat(s, "@"); + dyn_strcat(s, connector_string(e)); + if (-1 != dir) dyn_strcat(s, (dir == 0) ? "-" : "+"); + if (is_flag(flags, 't') && e->tracon_id) + append_string(s, "<%d>", e->tracon_id); + if (is_flag(flags, 'r') && e->refcount) + append_string(s, "{%d}",e->refcount); + if (is_flag(flags, 'l')) + append_string(s, "(%d,%d)", e->nearest_word, e->length_limit); +#if 0 + append_string(s, "<<%d>>", e->exp_pos); +#endif + if (-1 != shallow) + dyn_strcat(s, (0 == shallow) ? "d" : "s"); +} + +GNUC_UNUSED static void print_one_connector(Connector *e, int dir, int shallow, + uint32_t flags) +{ + dyn_str *s = dyn_str_new(); + + dyn_print_one_connector(s, e, dir, shallow, flags); + + char *t = dyn_str_take(s); + puts(t); + free(t); +} + +static void dyn_print_connector_list(dyn_str *s, Connector *e, int dir, uint32_t flags) +{ + + if (e == NULL) return; + dyn_print_connector_list(s, e->next, dir, flags); + if (e->next != NULL) dyn_strcat(s, " "); + dyn_print_one_connector(s, e, dir, /*shallow*/-1, flags); +} + +void print_connector_list(Connector *e, uint32_t flags) +{ + dyn_str *s = dyn_str_new(); + + dyn_print_connector_list(s, e, /*dir*/-1, flags); + + char *t = dyn_str_take(s); + puts(t); + free(t); +} + +/* Ascending sort of connector positions. */ +static int ascending_int(const void *a, const void *b) +{ + const int a1 = *(const int *)a; + const int b1 = *(const int *)b; + + if (a1 < b1) return -1; + if (a1 == b1) return 0; + return 1; +} + +typedef struct +{ + const void *regex; + Exp *exp; + Dictionary dict; + unsigned int num_selected; + unsigned int num_tunnels; +} select_data; + +static void dyn_print_disjunct_list(dyn_str *s, Disjunct *dj, uint32_t flags, + bool (* select)(const char *dj_str, select_data *criterion), + select_data *criterion) +{ + int djn = 0; + char word[MAX_WORD + 32]; + bool print_disjunct_address = test_enabled("disjunct-address"); + + for (;dj != NULL; dj=dj->next) + { + lg_strlcpy(word, dj->word_string, sizeof(word)); + patch_subscript_mark(word); + dyn_str *l = dyn_str_new(); + + append_string(l, "%16s", word); + if (print_disjunct_address) append_string(s, "(%p)", dj); + dyn_strcat(l, ": "); + + append_string(l, "[%d]%s= ", djn++, cost_stringify(dj->cost)); + dyn_print_connector_list(l, dj->left, /*dir*/0, flags); + dyn_strcat(l, " <--> "); + dyn_print_connector_list(l, dj->right, /*dir*/1, flags); + + char *ls = dyn_str_take(l); + if ((NULL == select) || select(ls, criterion)) + { + dyn_strcat(s, ls); + dyn_strcat(s, "\n"); + + if (criterion->exp != NULL) + { + int ccnt = 1; + for (Connector *c = dj->left; c != NULL; c = c->next) + ccnt++; + for (Connector *c = dj->right; c != NULL; c = c->next) + ccnt++; + + int *exp_pos = alloca(ccnt * sizeof(int)); + int *i = exp_pos; + for (Connector *c = dj->left; c != NULL; c = c->next) + *i++ = c->exp_pos; + for (Connector *c = dj->right; c != NULL; c = c->next) + *i++ = c->exp_pos; + *i = -1; + + qsort(exp_pos, ccnt-1, sizeof(int), ascending_int); + + cmacro_context cmc = { + .dict = criterion->dict, + .e = s, + .find_pos = exp_pos, + }; + print_connector_macros(&cmc, criterion->exp); + dyn_strcat(s, "\n\n"); + } + } + free(ls); + } +} + +void print_all_disjuncts(Sentence sent) +{ + dyn_str *s = dyn_str_new(); + uint32_t flags = make_flag('l') | make_flag('t'); + + for (WordIdx w = 0; w < sent->length; w++) + { + append_string(s, "Word %zu:\n", w); + dyn_print_disjunct_list(s, sent->word[w].d, flags, NULL, NULL); + + } + + char *t = dyn_str_take(s); + puts(t); + free(t); +} + /* ================ Display word expressions / disjuncts ================= */ +#define DJ_COL_WIDTH sizeof(" ") + +static bool select_disjunct(const char *dj_str, select_data *criterion) +{ + /* Count number of disjuncts with tunnel connectors. */ + for (const char *p = dj_str; *p != '\0'; p++) + { + if ((p[0] == ' ') && (p[1] == 'x')) + { + criterion->num_tunnels++; + break; + } + } + + /* Select desired disjuncts. */ + if (match_regex(criterion->regex , dj_str) == NULL) return false; + criterion->num_selected++; + return true; +} + +/** + * Display the disjuncts of expressions in \p dn. + */ +static char *display_disjuncts(Dictionary dict, const Dict_node *dn, + const void **arg) +{ + const void *rn = arg[0]; + const char *flags = arg[1]; + const Parse_Options opts = (Parse_Options)arg[2]; + double max_cost = opts->disjunct_cost; + + uint32_t int_flags = 0; + if (flags != NULL) + { + for (const char *f = flags; *f != '\0'; f++) + int_flags |= make_flag(*f); + } + + /* build_disjuncts_for_exp() needs memory pools for efficiency. */ + Sentence dummy_sent = sentence_create("", dict); /* For memory pools. */ + dummy_sent->Disjunct_pool = pool_new(__func__, "Disjunct", + /*num_elements*/8192, sizeof(Disjunct), + /*zero_out*/false, /*align*/false, false); + dummy_sent->Connector_pool = pool_new(__func__, "Connector", + /*num_elements*/65536, sizeof(Connector), + /*zero_out*/true, /*align*/false, false); + + /* copy_Exp() needs an Exp memory pool. */ + Pool_desc *Exp_pool = pool_new(__func__, "Exp", /*num_elements*/256, + sizeof(Exp), /*zero_out*/false, + /*align*/false, /*exact*/false); + + select_data criterion = { .regex = rn }; + void *select = (rn == NULL) ? NULL : select_disjunct; + + dyn_str *s = dyn_str_new(); + dyn_strcat(s, "disjuncts:\n"); + for (; dn != NULL; dn = dn->right) + { + /* Use copy_Exp() to assign dialect cost. */ + Exp *e = copy_Exp(dn->exp, Exp_pool, opts); + Disjunct *d = build_disjuncts_for_exp(dummy_sent, e, dn->string, NULL, + max_cost, NULL); + + unsigned int dnum0 = count_disjuncts(d); + d = eliminate_duplicate_disjuncts(d); + unsigned int dnum1 = count_disjuncts(d); + + if ((flags != NULL) && (strchr(flags, 'm') != NULL)) + { + criterion.exp = e; + criterion.dict = dict; + } + criterion.num_selected = 0; + dyn_str *dyn_pdl = dyn_str_new(); + dyn_print_disjunct_list(dyn_pdl, d, int_flags, select, &criterion); + char *dliststr = dyn_str_take(dyn_pdl); + + pool_reuse(Exp_pool); + pool_reuse(dummy_sent->Disjunct_pool); + pool_reuse(dummy_sent->Connector_pool); + + append_string(s, " %-*s %8u/%u disjuncts", + display_width(DJ_COL_WIDTH, dn->string), dn->string, + dnum1, dnum0); + if (criterion.num_tunnels != 0) + append_string(s, " (%u tunnels)", criterion.num_tunnels); + dyn_strcat(s, "\n\n"); + dyn_strcat(s, dliststr); + dyn_strcat(s, "\n"); + free(dliststr); + + if (rn != NULL) + { + if (criterion.num_selected == dnum1) + dyn_strcat(s, "(all the disjuncts matched)\n\n"); + else + append_string(s, "(%u disjunct%s matched)\n\n", + criterion.num_selected, + criterion.num_selected == 1 ? "" : "s"); + } + } + pool_delete(Exp_pool); + sentence_delete(dummy_sent); + + return dyn_str_take(s); +} const char do_display_expr; /* a sentinel to request an expression display */ +static size_t unknown_flag(const char *display_type, const char *flags) +{ + const char *known_flags; + + if (&do_display_expr == display_type) + known_flags = "lm"; + else + known_flags = "m"; + + return strspn(flags, known_flags); +} + /** * Display the information about the given word. * If the word can split, display the information about each part. @@ -439,7 +830,20 @@ static char *display_word_split(Dictionary dict, Regex_node *rn = NULL; if (arg != NULL) { + if (NULL != arg[1]) + { + size_t unknown_flag_pos = unknown_flag(arg[0], arg[1]); + if (arg[1][unknown_flag_pos] != '\0') + { + prt_error("Error: Token display: Unknown flag \"%c\".\n", + arg[1][unknown_flag_pos]); + dyn_strcat(s, " "); /* avoid a no-match error */ + goto display_word_split_error; + } + } + carg[1] = arg[1]; /* flags */ + if (arg[0] == &do_display_expr) { carg[0] = &do_display_expr; @@ -451,11 +855,22 @@ static char *display_word_split(Dictionary dict, { rn = malloc(sizeof(Regex_node)); rn->name = strdup("Disjunct regex"); - rn->pattern = strdup(arg[0]); rn->re = NULL; rn->neg = false; rn->next = NULL; + if (arg[0][strspn(arg[0], "0123456789")] != '\0') + { + rn->pattern = strdup(arg[0]); + } + else + { + rn->pattern = malloc(strlen(arg[0]) + 4); /* \ [ ] \0 */ + strcpy(rn->pattern, "\\["); + strcat(rn->pattern, arg[0]); + strcat(rn->pattern, "]"); + } + if (compile_regexs(rn, NULL) != 0) { prt_error("Error: Failed to compile regex \"%s\".\n", arg[0]); @@ -526,8 +941,6 @@ static unsigned int count_disjunct_for_dict_node(Dict_node *dn) return (NULL == dn) ? 0 : count_clause(dn->exp); } -#define DJ_COL_WIDTH sizeof(" ") - /** * Display the number of disjuncts associated with this dict node */ diff --git a/link-grammar/disjunct-utils.c b/link-grammar/disjunct-utils.c index 3bbd132e48..5c1be6c852 100644 --- a/link-grammar/disjunct-utils.c +++ b/link-grammar/disjunct-utils.c @@ -416,218 +416,6 @@ void count_disjuncts_and_connectors(Sentence sent, unsigned int *dca, *dca = dcnt; } -/* ================ Print disjuncts and connectors ============== */ -static bool is_flag(uint32_t flags, char flag) -{ - return (flags>>(flag-'a')) & 1; -} - -static uint32_t make_flag(char flag) -{ - return 1<<(flag-'a'); -} - -/* Print one connector with all the details. - * mCnameD{refcount}(nearest_word, length_limit)x - * Optional m: "@" for multi (else nothing). - * Cname: Connector name. - * Optional D: "-" / "+" (if dir != -1). - * Optional : (flag 't'). - * Optional [nearest_word, length_limit or farthest_word]: (flag 'l'). - * x: Shallow/deep indication as "s" / "d" (if shallow != -1) - */ -void dyn_print_one_connector(dyn_str *s, Connector *e, int dir, int shallow, - uint32_t flags) -{ - if (e->multi) - dyn_strcat(s, "@"); - dyn_strcat(s, connector_string(e)); - if (-1 != dir) dyn_strcat(s, &"-+"[dir]); - if (is_flag(flags, 't') && e->tracon_id) - append_string(s, "<%d>", e->tracon_id); - if (is_flag(flags, 'r') && e->refcount) - append_string(s, "{%d}",e->refcount); - if (is_flag(flags, 'l')) - append_string(s, "(%d,%d)", e->nearest_word, e->length_limit); - if (-1 != shallow) - dyn_strcat(s, (0 == shallow) ? "d" : "s"); -} - -void print_one_connector(Connector *e, int dir, int shallow, uint32_t flags) -{ - dyn_str *s = dyn_str_new(); - - dyn_print_one_connector(s, e, dir, shallow, flags); - - char *t = dyn_str_take(s); - puts(t); - free(t); -} - -void dyn_print_connector_list(dyn_str *s, Connector *e, uint32_t flags) -{ - for (;e != NULL; e = e->next) - { - dyn_print_one_connector(s, e, /*dir*/-1, /*shallow*/-1, flags); - if (e->next != NULL) dyn_strcat(s, " "); - } -} - -void print_connector_list(Connector *e, uint32_t flags) -{ - dyn_str *s = dyn_str_new(); - - dyn_print_connector_list(s, e, flags); - - char *t = dyn_str_take(s); - puts(t); - free(t); -} - -void dyn_print_disjunct_list(dyn_str *s, Disjunct *dj, uint32_t flags) -{ - int i = 0; - char word[MAX_WORD + 32]; - bool print_disjunct_address = test_enabled("disjunct-address"); - - for (;dj != NULL; dj=dj->next) - { - lg_strlcpy(word, dj->word_string, sizeof(word)); - patch_subscript_mark(word); - - append_string(s, "%16s", word); - if (print_disjunct_address) append_string(s, "(%p)", dj); - dyn_strcat(s, ": "); - - append_string(s, "[%d]%s= ", i++, cost_stringify(dj->cost)); - - dyn_print_connector_list(s, dj->left, flags); - dyn_strcat(s, " <--> "); - dyn_print_connector_list(s, dj->right, flags); - dyn_strcat(s, "\n"); - } -} - -void print_all_disjuncts(Sentence sent) -{ - dyn_str *s = dyn_str_new(); - uint32_t flags = make_flag('l') | make_flag('t'); - - for (WordIdx w = 0; w < sent->length; w++) - { - append_string(s, "Word %zu:\n", w); - dyn_print_disjunct_list(s, sent->word[w].d, flags); - - } - - char *t = dyn_str_take(s); - puts(t); - free(t); -} - -/** - * Display the disjuncts of expressions in \p dn. - */ -char *display_disjuncts(Dictionary dict, const Dict_node *dn, const void **arg) -{ - const void *rn = arg[0]; - const char *flags = arg[1]; - const Parse_Options opts = (Parse_Options)arg[2]; - double max_cost = opts->disjunct_cost; - - uint32_t int_flags = 0; - if (flags != NULL) - { - for (const char *f = flags; *f != '\0'; f++) - int_flags |= make_flag(*f); - } - - /* build_disjuncts_for_exp() needs memory pools for efficiency. */ - Sentence dummy_sent = sentence_create("", dict); /* For memory pools. */ - dummy_sent->Disjunct_pool = pool_new(__func__, "Disjunct", - /*num_elements*/8192, sizeof(Disjunct), - /*zero_out*/false, /*align*/false, false); - dummy_sent->Connector_pool = pool_new(__func__, "Connector", - /*num_elements*/65536, sizeof(Connector), - /*zero_out*/true, /*align*/false, false); - - /* copy_Exp() needs an Exp memory pool. */ - Pool_desc *Exp_pool = pool_new(__func__, "Exp", /*num_elements*/256, - sizeof(Exp), /*zero_out*/false, - /*align*/false, /*exact*/false); - - dyn_str *s = dyn_str_new(); - dyn_strcat(s, "disjuncts:\n"); - for (; dn != NULL; dn = dn->right) - { - /* Use copy_Exp() to assign dialect cost. */ - Exp *e = copy_Exp(dn->exp, Exp_pool, opts); - Disjunct *d = build_disjuncts_for_exp(dummy_sent, e, dn->string, NULL, - max_cost, NULL); - unsigned int dnum0 = count_disjuncts(d); - d = eliminate_duplicate_disjuncts(d); - unsigned int dnum1 = count_disjuncts(d); - - dyn_str *dyn_pdl = dyn_str_new(); - dyn_print_disjunct_list(dyn_pdl, d, int_flags); - char *dliststr = dyn_str_take(dyn_pdl); - - pool_reuse(Exp_pool); - pool_reuse(dummy_sent->Disjunct_pool); - pool_reuse(dummy_sent->Connector_pool); - - /* Count number of disjuncts with tunnel connectors. */ - unsigned int tnum = 0; - for (const char *p = dliststr; *p != '\0'; p++) - if ((p[0] == ' ') && (p[1] == 'x')) tnum++; - - unsigned int dnum_selected = 0; - dyn_str *selected = NULL; - char *dstr = dliststr; - char *end; - if (rn != NULL) - { - selected = dyn_str_new(); - - do - { - end = strchr(dstr, '\n'); - *end = '\0'; - if (match_regex(rn , dstr) != NULL) - { - dyn_strcat(selected, dstr); - dyn_strcat(selected, "\n"); - dnum_selected++; - } - - dstr = end + 1; - } while (*dstr != '\0'); - - free(dliststr); - dliststr = dyn_str_take(selected); - } - - append_string(s, " %s %u/%u disjuncts", dn->string, dnum1, dnum0); - if (tnum != 0) append_string(s, " (%u tunnels)", tnum); - dyn_strcat(s, "\n"); - dyn_strcat(s, dliststr); - dyn_strcat(s, "\n"); - free(dliststr); - - if (rn != NULL) - { - if (dnum_selected == dnum1) - dyn_strcat(s, "(all the disjuncts matched)\n\n"); - else - append_string(s, "(%u disjuncts matched)\n\n", dnum_selected); - } - } - pool_delete(Exp_pool); - sentence_delete(dummy_sent); - - return dyn_str_take(s); -} - /* ============= Connector encoding, sharing and packing ============= */ /* diff --git a/link-grammar/disjunct-utils.h b/link-grammar/disjunct-utils.h index 463785a3c2..91003b2800 100644 --- a/link-grammar/disjunct-utils.h +++ b/link-grammar/disjunct-utils.h @@ -72,14 +72,9 @@ Tracon_sharing *pack_sentence_for_parsing(Sentence); void free_tracon_sharing(Tracon_sharing *); void count_disjuncts_and_connectors(Sentence, unsigned int *, unsigned int *); -void print_one_connector(Connector *, int, int, uint32_t); -void dyn_print_one_connector(dyn_str *s, Connector *, int, int, uint32_t); void print_connector_list(Connector *, uint32_t); -void dyn_print_connector_list(dyn_str *s, Connector *, uint32_t); void print_disjunct_list(Disjunct *, uint32_t); -void dyn_print_disjunct_list(dyn_str *s, Disjunct *, uint32_t); void print_all_disjuncts(Sentence); -char *display_disjuncts(Dictionary, const Dict_node *, const void **); /* Save and restore sentence disjuncts */ typedef struct diff --git a/link-grammar/prepare/build-disjuncts.c b/link-grammar/prepare/build-disjuncts.c index f75fa813dc..4882bc3ae0 100644 --- a/link-grammar/prepare/build-disjuncts.c +++ b/link-grammar/prepare/build-disjuncts.c @@ -26,6 +26,7 @@ struct Tconnector_struct { Tconnector * next; const Exp *e; /* a CONNECTOR_type element from which to get the connector */ + int exp_pos; /* the position in the originating expression */ }; typedef struct clause_struct Clause; @@ -42,6 +43,7 @@ typedef struct double cost_cutoff; Pool_desc *Tconnector_pool; Pool_desc *Clause_pool; + int exp_pos; } clause_context; #ifdef DEBUG @@ -141,11 +143,12 @@ static Tconnector * catenate(Tconnector * e1, Tconnector * e2, Pool_desc *tp) /** * build the connector for the terminal node n */ -static Tconnector * build_terminal(Exp *e, Pool_desc *tp) +static Tconnector * build_terminal(Exp *e, clause_context *ct) { - Tconnector *c = pool_alloc(tp); + Tconnector *c = pool_alloc(ct->Tconnector_pool); c->e = e; c->next = NULL; + c->exp_pos = ct->exp_pos++; return c; } @@ -219,7 +222,7 @@ static Clause * build_clause(Exp *e, clause_context *ct) else if (e->type == CONNECTOR_type) { c = pool_alloc(ct->Clause_pool); - c->c = build_terminal(e, ct->Tconnector_pool); + c->c = build_terminal(e, ct); c->cost = 0.0; c->maxcost = 0.0; c->next = NULL; @@ -281,6 +284,7 @@ build_disjunct(Sentence sent, Clause * cl, const char * string, Connector *n = connector_new(connector_pool, t->e->condesc, opts); Connector **loc = ('-' == t->e->dir) ? &ndis->left : &ndis->right; + n->exp_pos = t->exp_pos; n->multi = t->e->multi; n->next = *loc; /* prepend the connector to the current list */ *loc = n; /* update the connector list */ diff --git a/link-grammar/utilities.c b/link-grammar/utilities.c index c65381cc34..e5dfb69e5e 100644 --- a/link-grammar/utilities.c +++ b/link-grammar/utilities.c @@ -561,6 +561,11 @@ const char * dyn_str_value(dyn_str* s) return s->str; } +size_t dyn_strlen(dyn_str* s) +{ + return s->end; +} + /* ======================================================== */ /* Locale routines */ diff --git a/link-grammar/utilities.h b/link-grammar/utilities.h index 21a566ea02..711273c77c 100644 --- a/link-grammar/utilities.h +++ b/link-grammar/utilities.h @@ -480,6 +480,7 @@ void dyn_strcat(dyn_str*, const char*); void dyn_trimback(dyn_str*); char * dyn_str_take(dyn_str*); const char * dyn_str_value(dyn_str*); +size_t dyn_strlen(dyn_str*); size_t altlen(const char **);