From 922ab93234a2052e1733644ed698b9671276f2f7 Mon Sep 17 00:00:00 2001 From: ampli Date: Sat, 2 Jun 2018 22:17:51 +0300 Subject: [PATCH 1/9] WordTag::insert_connectors(): Improve debugging --- link-grammar/sat-solver/word-tag.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/link-grammar/sat-solver/word-tag.cpp b/link-grammar/sat-solver/word-tag.cpp index 56647407a6..55acfb91f6 100644 --- a/link-grammar/sat-solver/word-tag.cpp +++ b/link-grammar/sat-solver/word-tag.cpp @@ -22,10 +22,16 @@ void WordTag::insert_connectors(Exp* exp, int& dfs_position, double cost = parent_cost + exp->cost; #ifdef DEBUG - if (0 && verbosity_level(+D_IC)) { // Extreme debug - printf("Expression type %d for Word%d, var %s:\n", exp->type, _word, var); - printf("parent_exp: "); print_expression(parent_exp); - printf("exp: "); print_expression(exp); + if (0 && verbosity_level(+D_IC)) // Extreme debug + //if (_word == 2) + { + const char*type = + ((const char *[]) {"OR_type", "AND_type", "CONNECTOR_type"}) [exp->type-1]; + printf("Expression type %s for Word%d, var %s:\n", type, _word, var); + //printf("parent_exp: "); print_expression(parent_exp); + printf("exp(%s) e=%.2f pc=%.2f ", word_xnode->string,exp->cost, parent_cost); + print_expression(exp); + if (exp->cost > 0 || root) prt_exp_mem(exp, 0); } #endif From c2fc967ebc61bf6ee455d646ea096adc76109a88 Mon Sep 17 00:00:00 2001 From: ampli Date: Sat, 2 Jun 2018 22:38:19 +0300 Subject: [PATCH 2/9] SAT-parser: Fix connector cost calculation When distributing over AND_type, attribute the cost to the first and'ed term only (instead of to each of them). The cost calculation in generate_satisfaction_for_expression() is left intact for now, because changing it interferes with the cost cutoff calculation. --- link-grammar/sat-solver/sat-encoder.cpp | 1 + link-grammar/sat-solver/word-tag.cpp | 7 ++++--- link-grammar/sat-solver/word-tag.hpp | 4 ++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/link-grammar/sat-solver/sat-encoder.cpp b/link-grammar/sat-solver/sat-encoder.cpp index 8dd47a072f..a245f966ee 100644 --- a/link-grammar/sat-solver/sat-encoder.cpp +++ b/link-grammar/sat-solver/sat-encoder.cpp @@ -416,6 +416,7 @@ void SATEncoder::generate_satisfaction_for_expression(int w, int& dfs_position, *s++ = 'c'; fast_sprintf(s, i); + /* if (i != 0) total_cost = 0; */ // This interferes with the cost cutoff generate_satisfaction_for_expression(w, dfs_position, l->e, new_var, total_cost); } } diff --git a/link-grammar/sat-solver/word-tag.cpp b/link-grammar/sat-solver/word-tag.cpp index 55acfb91f6..70933ca210 100644 --- a/link-grammar/sat-solver/word-tag.cpp +++ b/link-grammar/sat-solver/word-tag.cpp @@ -44,7 +44,7 @@ void WordTag::insert_connectors(Exp* exp, int& dfs_position, _dir.push_back('+'); _right_connectors.push_back( PositionConnector(parent_exp, exp, '+', _word, dfs_position, - cost, leading_right, false, + cost, parent_cost, leading_right, false, eps_right, eps_left, word_xnode, _opts)); leading_right = false; break; @@ -53,7 +53,7 @@ void WordTag::insert_connectors(Exp* exp, int& dfs_position, _dir.push_back('-'); _left_connectors.push_back( PositionConnector(parent_exp, exp, '-', _word, dfs_position, - cost, false, leading_left, + cost, parent_cost, false, leading_left, eps_right, eps_left, word_xnode, _opts)); leading_left = false; break; @@ -85,8 +85,9 @@ void WordTag::insert_connectors(Exp* exp, int& dfs_position, *s++ = 'c'; fast_sprintf(s, i); + double and_cost = (i == 0) ? cost : 0; insert_connectors(l->e, dfs_position, leading_right, leading_left, - eps_right, eps_left, new_var, false, cost, parent_exp, word_xnode); + eps_right, eps_left, new_var, false, and_cost, parent_exp, word_xnode); if (leading_right) { eps_right.push_back(_variables->epsilon(new_var, '+')); diff --git a/link-grammar/sat-solver/word-tag.hpp b/link-grammar/sat-solver/word-tag.hpp index 61c4655ffb..94ef86d21c 100644 --- a/link-grammar/sat-solver/word-tag.hpp +++ b/link-grammar/sat-solver/word-tag.hpp @@ -17,10 +17,10 @@ extern "C" { struct PositionConnector { PositionConnector(Exp* pe, Exp* e, char d, int w, int p, - double pcst, bool lr, bool ll, + double cst, double pcst, bool lr, bool ll, const std::vector& er, const std::vector& el, const X_node *w_xnode, Parse_Options opts) : exp(pe), dir(d), word(w), position(p), - cost(e->cost), parent_cost(pcst), + cost(cst), parent_cost(pcst), leading_right(lr), leading_left(ll), eps_right(er), eps_left(el), word_xnode(w_xnode) { From 7daae3254503607f63b205440c916e88a48219e5 Mon Sep 17 00:00:00 2001 From: ampli Date: Sat, 2 Jun 2018 22:43:29 +0300 Subject: [PATCH 3/9] WordTag::insert_connectors(): Record costly-null expressions There cost is to be recovered on linkage extraction if they happen to reside on a participating disjunct. --- link-grammar/sat-solver/word-tag.cpp | 8 +++++++- link-grammar/sat-solver/word-tag.hpp | 13 +++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/link-grammar/sat-solver/word-tag.cpp b/link-grammar/sat-solver/word-tag.cpp index 70933ca210..df8a65c5a6 100644 --- a/link-grammar/sat-solver/word-tag.cpp +++ b/link-grammar/sat-solver/word-tag.cpp @@ -11,7 +11,7 @@ extern "C" { #include "utilities.h" } -#define D_IC 6 +#define D_IC 8 void WordTag::insert_connectors(Exp* exp, int& dfs_position, bool& leading_right, bool& leading_left, std::vector& eps_right, @@ -63,6 +63,12 @@ void WordTag::insert_connectors(Exp* exp, int& dfs_position, } else if (exp->type == AND_type) { if (exp->u.l == NULL) { /* zeroary and */ + if (cost != 0) + { + lgdebug(+D_IC, "EmptyConnector var=%s(%d) cost %.2f pcost %.2f\n", + var, _variables->string(var), cost, parent_cost); + _empty_connectors.push_back(EmptyConnector(_variables->string(var),cost)); + } } else if (exp->u.l->next == NULL) { /* unary and - skip */ diff --git a/link-grammar/sat-solver/word-tag.hpp b/link-grammar/sat-solver/word-tag.hpp index 94ef86d21c..3e5199b32f 100644 --- a/link-grammar/sat-solver/word-tag.hpp +++ b/link-grammar/sat-solver/word-tag.hpp @@ -84,6 +84,14 @@ struct PositionConnector }; +struct EmptyConnector { + EmptyConnector(int var, double cst) + : ec_var(var), ec_cost(cst) + { + } + int ec_var; + double ec_cost; +}; // XXX TODO: Hash connectors for faster matching @@ -92,6 +100,7 @@ class WordTag private: std::vector _left_connectors; std::vector _right_connectors; + std::vector _empty_connectors; std::vector _dir; std::vector _position; @@ -133,6 +142,10 @@ class WordTag return _right_connectors; } + const std::vector& get_empty_connectors() const { + return _empty_connectors; + } + PositionConnector* get(int dfs_position) { switch (_dir[dfs_position - 1]) { From 509679a2dde98486da1b6c9495497cf02dd0e2bc Mon Sep 17 00:00:00 2001 From: ampli Date: Sat, 2 Jun 2018 22:54:07 +0300 Subject: [PATCH 4/9] SAT-parser: Extract costs due to costly-null expressions --- link-grammar/api.c | 1 - link-grammar/linkage/score.c | 9 +-------- link-grammar/sat-solver/sat-encoder.cpp | 13 +++++++++++++ link-grammar/sat-solver/word-tag.hpp | 5 +++++ 4 files changed, 19 insertions(+), 9 deletions(-) diff --git a/link-grammar/api.c b/link-grammar/api.c index a750edc28b..995f1950ff 100644 --- a/link-grammar/api.c +++ b/link-grammar/api.c @@ -573,7 +573,6 @@ double sentence_disjunct_cost(Sentence sent, LinkageIdx i) { if (!sent) return 0.0; - /* The sat solver (currently) fails to fill in link_info */ if (!sent->lnkages) return 0.0; if (sent->num_linkages_alloced <= i) return 0.0; /* bounds check */ return sent->lnkages[i].lifo.disjunct_cost; diff --git a/link-grammar/linkage/score.c b/link-grammar/linkage/score.c index 6f2b30ca40..67d6640eae 100644 --- a/link-grammar/linkage/score.c +++ b/link-grammar/linkage/score.c @@ -71,14 +71,7 @@ static double compute_disjunct_cost(Linkage lkg) void linkage_score(Linkage lkg, Parse_Options opts) { lkg->lifo.unused_word_cost = unused_word_cost(lkg); - if (opts->use_sat_solver) - { - lkg->lifo.disjunct_cost = 0.0; - } - else - { - lkg->lifo.disjunct_cost = compute_disjunct_cost(lkg); - } + lkg->lifo.disjunct_cost = compute_disjunct_cost(lkg); lkg->lifo.link_cost = compute_link_cost(lkg); lkg->lifo.corpus_cost = -1.0; diff --git a/link-grammar/sat-solver/sat-encoder.cpp b/link-grammar/sat-solver/sat-encoder.cpp index a245f966ee..66cf22bdb8 100644 --- a/link-grammar/sat-solver/sat-encoder.cpp +++ b/link-grammar/sat-solver/sat-encoder.cpp @@ -1782,6 +1782,7 @@ Exp* SATEncoderConjunctionFreeSentences::PositionConnector2exp(const PositionCon return e; } +#define D_SEL 8 bool SATEncoderConjunctionFreeSentences::sat_extract_links(Linkage lkg) { Disjunct *d; @@ -1875,6 +1876,17 @@ bool SATEncoderConjunctionFreeSentences::sat_extract_links(Linkage lkg) #endif d = build_disjuncts_for_exp(de, xnode_word[wi]->string, MAX_CONNECTOR_COST, _opts); word_record_in_disjunct(xnode_word[wi]->word, d); + + /* Recover cost of costly-nulls. */ + const vector& ec = _word_tags[wi].get_empty_connectors(); + for (vector::const_iterator j = ec.begin(); j < ec.end(); j++) + { + lgdebug(+D_SEL, "Word %zu: Costly-null var=%d, found=%d cost=%.2f\n", + wi, j->ec_var, _solver->model[j->ec_var] == l_True, j->ec_cost); + if (_solver->model[j->ec_var] == l_True) + d->cost += j->ec_cost; + } + lkg->chosen_disjuncts[wi] = d; free_Exp(de); } @@ -1884,6 +1896,7 @@ bool SATEncoderConjunctionFreeSentences::sat_extract_links(Linkage lkg) DEBUG_print("Total: ." << lkg->num_links << "." << endl); return false; } +#undef D_SEL /** * Main entry point into the SAT parser. diff --git a/link-grammar/sat-solver/word-tag.hpp b/link-grammar/sat-solver/word-tag.hpp index 3e5199b32f..dfb36dd670 100644 --- a/link-grammar/sat-solver/word-tag.hpp +++ b/link-grammar/sat-solver/word-tag.hpp @@ -84,6 +84,11 @@ struct PositionConnector }; +/* + * Record the SAT variable and cost of costly-null expressions. + * Their cost is recovered (in sat_extract_links()) if + * they happen to reside on a participating disjunct. + */ struct EmptyConnector { EmptyConnector(int var, double cst) : ec_var(var), ec_cost(cst) From b403ec2095a4650f0f4904bdaa0857542165050b Mon Sep 17 00:00:00 2001 From: ampli Date: Sat, 2 Jun 2018 22:56:27 +0300 Subject: [PATCH 5/9] sentence_link_cost(): Remove old comment The SAT-parser already fills in this info. --- link-grammar/api.c | 1 - 1 file changed, 1 deletion(-) diff --git a/link-grammar/api.c b/link-grammar/api.c index 995f1950ff..7a79fed50f 100644 --- a/link-grammar/api.c +++ b/link-grammar/api.c @@ -582,7 +582,6 @@ int sentence_link_cost(Sentence sent, LinkageIdx i) { if (!sent) return 0; - /* The sat solver (currently) fails to fill in link_info */ if (!sent->lnkages) return 0; if (sent->num_linkages_alloced <= i) return 0; /* bounds check */ return sent->lnkages[i].lifo.link_cost; From 57e24b01f72ceba27ce9e846f896e1eec119f3c5 Mon Sep 17 00:00:00 2001 From: ampli Date: Sat, 2 Jun 2018 23:10:35 +0300 Subject: [PATCH 6/9] sat_extract_links(): Improve debug messages --- link-grammar/sat-solver/sat-encoder.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/link-grammar/sat-solver/sat-encoder.cpp b/link-grammar/sat-solver/sat-encoder.cpp index 66cf22bdb8..b3c8eb1f21 100644 --- a/link-grammar/sat-solver/sat-encoder.cpp +++ b/link-grammar/sat-solver/sat-encoder.cpp @@ -1862,13 +1862,16 @@ bool SATEncoderConjunctionFreeSentences::sat_extract_links(Linkage lkg) if (xnode_word[wi] == NULL) { if (!_sent->word[wi].optional) - prt_error("Warning: Non-optional word %zu has no linkage\n", wi); + { + de = null_exp(); + prt_error("Error: Internal error: Non-optional word %zu has no linkage\n", wi); + } continue; } if (de == NULL) { de = null_exp(); - lgdebug(+0, "Warning: No expression for word %zu\n", wi); + prt_error("Error: Internal error: No expression for word %zu\n", wi); } #ifndef MAX_CONNECTOR_COST @@ -1893,7 +1896,7 @@ bool SATEncoderConjunctionFreeSentences::sat_extract_links(Linkage lkg) lkg->num_links = current_link; - DEBUG_print("Total: ." << lkg->num_links << "." << endl); + DEBUG_print("Total links: ." << lkg->num_links << "." << endl); return false; } #undef D_SEL From fe6e901450e2f92e5badfebbd0964063be148ccc Mon Sep 17 00:00:00 2001 From: ampli Date: Sun, 3 Jun 2018 01:52:41 +0300 Subject: [PATCH 7/9] SAT-parser: Allow get_next_linkage to fail This fix allows to discard an unintended linkage. It is to be used to discard linkages with cost > cost_max (not actually done, at least for now). In this opportunity an malloc() of the linkage struct is eliminated by allocating it on the stack. Also discard the FIXME on create_linkage, because compute_chosen_disjuncts() doesn't exists now and its replacement process_linkages() is too different. --- link-grammar/sat-solver/sat-encoder.cpp | 51 ++++++++++++------------- link-grammar/sat-solver/sat-encoder.hpp | 2 +- 2 files changed, 26 insertions(+), 27 deletions(-) diff --git a/link-grammar/sat-solver/sat-encoder.cpp b/link-grammar/sat-solver/sat-encoder.cpp index b3c8eb1f21..00028a40ae 100644 --- a/link-grammar/sat-solver/sat-encoder.cpp +++ b/link-grammar/sat-solver/sat-encoder.cpp @@ -1529,21 +1529,14 @@ void SATEncoder::pp_prune() /** * Create the next linkage. - * This is very similar to compute_chosen_disjuncts(), except that - * sat_extract_links() is called, instead of normal extract_links(). - * It would be good to refactor this and the other to make them even - * more similar, because else, its confusing ... - * FIXME? Use a shared function for the code here. + * Return true iff the linkage can be created. */ -Linkage SATEncoder::create_linkage() +bool SATEncoder::create_linkage(Linkage linkage) { - Linkage linkage = (Linkage) malloc(sizeof(struct Linkage_s)); - memset(linkage, 0, sizeof(struct Linkage_s)); - partial_init_linkage(_sent, linkage, _sent->length); - sat_extract_links(linkage); + if (!sat_extract_links(linkage)) return false; compute_link_names(linkage, _sent->string_set); - return linkage; + return true; } void SATEncoder::generate_linkage_prohibiting() @@ -1563,15 +1556,16 @@ void SATEncoder::generate_linkage_prohibiting() Linkage SATEncoder::get_next_linkage() { - Linkage linkage = NULL; + Linkage_s linkage; bool connected; - bool sane = true; bool display_linkage_disconnected = false; + /* Loop until a good linkage is found. * Insane (mixed alternatives) linkages are always ignored. * Disconnected linkages are normally ignored, unless * !test=linkage-disconnected is used (and they are sane) */ + bool linkage_ok; do { if (!_solver->solve()) return NULL; @@ -1587,16 +1581,24 @@ Linkage SATEncoder::get_next_linkage() generate_linkage_prohibiting(); } + linkage_ok = false; if (connected || display_linkage_disconnected) { - linkage = create_linkage(); - sane = sane_linkage_morphism(_sent, linkage, _opts); - if (!sane) { - free_linkage_connectors_and_disjuncts(linkage); - free_linkage(linkage); - free(linkage); + memset(&linkage, 0, sizeof(struct Linkage_s)); + + linkage_ok = create_linkage(&linkage); + if (linkage_ok) + linkage_ok = sane_linkage_morphism(_sent, &linkage, _opts); + if (!linkage_ok) { + /* We cannot elegantly add this linkage to sent->linkges[] - + * to be freed in sentence_delete(), since insane linkages + * must be there with index > num_linkages_post_processed - so + * they remain hidden, but num_linkages_post_processed is an + * arbitrary number. So we must free it here. */ + free_linkage_connectors_and_disjuncts(&linkage); + free_linkage(&linkage); continue; // skip this linkage } - remove_empty_words(linkage); /* Discard optional words. */ + remove_empty_words(&linkage); /* Discard optional words. */ } if (!connected) { @@ -1606,9 +1608,7 @@ Linkage SATEncoder::get_next_linkage() lgdebug(+D_SAT, "Linkage DISCONNECTED (skipped)\n"); } } - } while (!sane || !(connected || display_linkage_disconnected)); - - assert(linkage, "No linkage"); + } while (!linkage_ok); /* We cannot expand the linkage array on demand, since the API uses * linkage pointers, and they would become invalid if realloc() changes @@ -1624,8 +1624,7 @@ Linkage SATEncoder::get_next_linkage() Linkage lkg = &_sent->lnkages[_next_linkage_index]; _next_linkage_index++; - *lkg = *linkage; /* copy en-mass */ - free(linkage); + *lkg = linkage; /* copy en-mass */ /* The link-parser code checks the next linkage for num_violations * (to save calls to linkage_create()). Allow for that practice. */ @@ -1897,7 +1896,7 @@ bool SATEncoderConjunctionFreeSentences::sat_extract_links(Linkage lkg) lkg->num_links = current_link; DEBUG_print("Total links: ." << lkg->num_links << "." << endl); - return false; + return true; } #undef D_SEL diff --git a/link-grammar/sat-solver/sat-encoder.hpp b/link-grammar/sat-solver/sat-encoder.hpp index 272a7edb26..89b3f5f00e 100644 --- a/link-grammar/sat-solver/sat-encoder.hpp +++ b/link-grammar/sat-solver/sat-encoder.hpp @@ -278,7 +278,7 @@ class SATEncoder virtual bool sat_extract_links(Linkage) = 0; // Create linkage from a propositional model - Linkage create_linkage(); + bool create_linkage(Linkage); // Generate clause that prohibits the current model void generate_linkage_prohibiting(); From a7b09040cf23a7f35b1abe0e1bd17656f89b9c70 Mon Sep 17 00:00:00 2001 From: ampli Date: Sun, 3 Jun 2018 02:05:17 +0300 Subject: [PATCH 8/9] sat_extract_links(): Prepare for after-linkage cost-cutoff Not actually used because it is currently incompatible to the classic parser (in which a total linkage cost can be greater than cost_cutoff). --- link-grammar/sat-solver/sat-encoder.cpp | 40 ++++++++++++++++++++----- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/link-grammar/sat-solver/sat-encoder.cpp b/link-grammar/sat-solver/sat-encoder.cpp index 00028a40ae..5bbdd7b360 100644 --- a/link-grammar/sat-solver/sat-encoder.cpp +++ b/link-grammar/sat-solver/sat-encoder.cpp @@ -1851,6 +1851,8 @@ bool SATEncoderConjunctionFreeSentences::sat_extract_links(Linkage lkg) xnode_word[var->right_word] = right_xnode; } + lkg->num_links = current_link; + // Now build the disjuncts. // This is needed so that compute_chosen_word works correctly. // Just in case there is no expression for a disjunct, a null one is used. @@ -1873,27 +1875,51 @@ bool SATEncoderConjunctionFreeSentences::sat_extract_links(Linkage lkg) prt_error("Error: Internal error: No expression for word %zu\n", wi); } -#ifndef MAX_CONNECTOR_COST -#define MAX_CONNECTOR_COST 1000.0f + double cost_cutoff; +#if LIMIT_TOTAL_LINKAGE_COST // Undefined - incompatible to the classic parser. + cost_cutoff = _opts->disjunct_cost; +#else + cost_cutoff = 1000.0; +#endif // LIMIT_TOTAL_LINKAGE_COST + d = build_disjuncts_for_exp(de, xnode_word[wi]->string, cost_cutoff, _opts); + free_Exp(de); + + if (d == NULL) + { + lgdebug(+D_SEL, "Debug: Word %zu: Disjunct cost > cost_cutoff %.2f\n", + wi, cost_cutoff); +#ifdef DEBUG + if (!test_enabled("SAT-cost")) #endif - d = build_disjuncts_for_exp(de, xnode_word[wi]->string, MAX_CONNECTOR_COST, _opts); + return false; + } + word_record_in_disjunct(xnode_word[wi]->word, d); /* Recover cost of costly-nulls. */ const vector& ec = _word_tags[wi].get_empty_connectors(); for (vector::const_iterator j = ec.begin(); j < ec.end(); j++) { - lgdebug(+D_SEL, "Word %zu: Costly-null var=%d, found=%d cost=%.2f\n", + lgdebug(+D_SEL, "Debug: Word %zu: Costly-null var=%d, found=%d cost=%.2f\n", wi, j->ec_var, _solver->model[j->ec_var] == l_True, j->ec_cost); if (_solver->model[j->ec_var] == l_True) d->cost += j->ec_cost; } lkg->chosen_disjuncts[wi] = d; - free_Exp(de); - } - lkg->num_links = current_link; +#if LIMIT_TOTAL_LINKAGE_COST + if (d->cost > cost_cutoff) + { + lgdebug(+D_SEL, "Word %zu: Disjunct cost %.2f > cost_cutoff %.2f\n", + wi, d->cost, cost_cutoff); +#ifdef DEBUG + if (!test_enabled("SAT-cost")) +#endif + return false; + } +#endif // LIMIT_TOTAL_LINKAGE_COST + } DEBUG_print("Total links: ." << lkg->num_links << "." << endl); return true; From 23e9f27d8a0a130a685d55774230bc867fe95156 Mon Sep 17 00:00:00 2001 From: ampli Date: Tue, 2 Oct 2018 20:13:02 +0300 Subject: [PATCH 9/9] ChangeLog: Notify about the SAT parser disjunct cost modification --- ChangeLog | 1 + 1 file changed, 1 insertion(+) diff --git a/ChangeLog b/ChangeLog index 83c9d43fc5..f22f1fb815 100644 --- a/ChangeLog +++ b/ChangeLog @@ -7,6 +7,7 @@ Version 5.5.2 (XXX 2018) * Major documentation update for building with Cygwin. * Revise the manpage. * Remove the experimental Viterbi code. + * Modify the SAT parser disjunct cost metric to be like in the classic parser. Version 5.5.1 (27 July 2018) * Fix broken Java bindings build.