Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SAT solver cost #820

Merged
merged 9 commits into from
Oct 2, 2018
1 change: 1 addition & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ Version 5.5.2 (XXX 2018)
* Major documentation update for building with Cygwin.
* Revise the manpage.
* Remove the experimental Viterbi code.
* Modify the SAT parser disjunct cost metric to be like in the classic parser.

Version 5.5.1 (27 July 2018)
* Fix broken Java bindings build.
Expand Down
2 changes: 0 additions & 2 deletions link-grammar/api.c
Original file line number Diff line number Diff line change
Expand Up @@ -573,7 +573,6 @@ double sentence_disjunct_cost(Sentence sent, LinkageIdx i)
{
if (!sent) return 0.0;

/* The sat solver (currently) fails to fill in link_info */
if (!sent->lnkages) return 0.0;
if (sent->num_linkages_alloced <= i) return 0.0; /* bounds check */
return sent->lnkages[i].lifo.disjunct_cost;
Expand All @@ -583,7 +582,6 @@ int sentence_link_cost(Sentence sent, LinkageIdx i)
{
if (!sent) return 0;

/* The sat solver (currently) fails to fill in link_info */
if (!sent->lnkages) return 0;
if (sent->num_linkages_alloced <= i) return 0; /* bounds check */
return sent->lnkages[i].lifo.link_cost;
Expand Down
9 changes: 1 addition & 8 deletions link-grammar/linkage/score.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,7 @@ static double compute_disjunct_cost(Linkage lkg)
void linkage_score(Linkage lkg, Parse_Options opts)
{
lkg->lifo.unused_word_cost = unused_word_cost(lkg);
if (opts->use_sat_solver)
{
lkg->lifo.disjunct_cost = 0.0;
}
else
{
lkg->lifo.disjunct_cost = compute_disjunct_cost(lkg);
}
lkg->lifo.disjunct_cost = compute_disjunct_cost(lkg);
lkg->lifo.link_cost = compute_link_cost(lkg);
lkg->lifo.corpus_cost = -1.0;

Expand Down
112 changes: 77 additions & 35 deletions link-grammar/sat-solver/sat-encoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,7 @@ void SATEncoder::generate_satisfaction_for_expression(int w, int& dfs_position,
*s++ = 'c';
fast_sprintf(s, i);

/* if (i != 0) total_cost = 0; */ // This interferes with the cost cutoff
generate_satisfaction_for_expression(w, dfs_position, l->e, new_var, total_cost);
}
}
Expand Down Expand Up @@ -1528,21 +1529,14 @@ void SATEncoder::pp_prune()

/**
* Create the next linkage.
* This is very similar to compute_chosen_disjuncts(), except that
* sat_extract_links() is called, instead of normal extract_links().
* It would be good to refactor this and the other to make them even
* more similar, because else, its confusing ...
* FIXME? Use a shared function for the code here.
* Return true iff the linkage can be created.
*/
Linkage SATEncoder::create_linkage()
bool SATEncoder::create_linkage(Linkage linkage)
{
Linkage linkage = (Linkage) malloc(sizeof(struct Linkage_s));
memset(linkage, 0, sizeof(struct Linkage_s));

partial_init_linkage(_sent, linkage, _sent->length);
sat_extract_links(linkage);
if (!sat_extract_links(linkage)) return false;
compute_link_names(linkage, _sent->string_set);
return linkage;
return true;
}

void SATEncoder::generate_linkage_prohibiting()
Expand All @@ -1562,15 +1556,16 @@ void SATEncoder::generate_linkage_prohibiting()

Linkage SATEncoder::get_next_linkage()
{
Linkage linkage = NULL;
Linkage_s linkage;
bool connected;
bool sane = true;
bool display_linkage_disconnected = false;


/* Loop until a good linkage is found.
* Insane (mixed alternatives) linkages are always ignored.
* Disconnected linkages are normally ignored, unless
* !test=linkage-disconnected is used (and they are sane) */
bool linkage_ok;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should almost certainly initialize here, e.g. linkage_ok = false.

do {
if (!_solver->solve()) return NULL;

Expand All @@ -1586,16 +1581,24 @@ Linkage SATEncoder::get_next_linkage()
generate_linkage_prohibiting();
}

linkage_ok = false;
if (connected || display_linkage_disconnected) {
linkage = create_linkage();
sane = sane_linkage_morphism(_sent, linkage, _opts);
if (!sane) {
free_linkage_connectors_and_disjuncts(linkage);
free_linkage(linkage);
free(linkage);
memset(&linkage, 0, sizeof(struct Linkage_s));

linkage_ok = create_linkage(&linkage);
if (linkage_ok)
linkage_ok = sane_linkage_morphism(_sent, &linkage, _opts);
if (!linkage_ok) {
/* We cannot elegantly add this linkage to sent->linkges[] -
* to be freed in sentence_delete(), since insane linkages
* must be there with index > num_linkages_post_processed - so
* they remain hidden, but num_linkages_post_processed is an
* arbitrary number. So we must free it here. */
free_linkage_connectors_and_disjuncts(&linkage);
free_linkage(&linkage);
continue; // skip this linkage
}
remove_empty_words(linkage); /* Discard optional words. */
remove_empty_words(&linkage); /* Discard optional words. */
}

if (!connected) {
Expand All @@ -1605,9 +1608,7 @@ Linkage SATEncoder::get_next_linkage()
lgdebug(+D_SAT, "Linkage DISCONNECTED (skipped)\n");
}
}
} while (!sane || !(connected || display_linkage_disconnected));

assert(linkage, "No linkage");
} while (!linkage_ok);

/* We cannot expand the linkage array on demand, since the API uses
* linkage pointers, and they would become invalid if realloc() changes
Expand All @@ -1623,8 +1624,7 @@ Linkage SATEncoder::get_next_linkage()

Linkage lkg = &_sent->lnkages[_next_linkage_index];
_next_linkage_index++;
*lkg = *linkage; /* copy en-mass */
free(linkage);
*lkg = linkage; /* copy en-mass */

/* The link-parser code checks the next linkage for num_violations
* (to save calls to linkage_create()). Allow for that practice. */
Expand Down Expand Up @@ -1781,6 +1781,7 @@ Exp* SATEncoderConjunctionFreeSentences::PositionConnector2exp(const PositionCon
return e;
}

#define D_SEL 8
bool SATEncoderConjunctionFreeSentences::sat_extract_links(Linkage lkg)
{
Disjunct *d;
Expand Down Expand Up @@ -1850,6 +1851,8 @@ bool SATEncoderConjunctionFreeSentences::sat_extract_links(Linkage lkg)
xnode_word[var->right_word] = right_xnode;
}

lkg->num_links = current_link;

// Now build the disjuncts.
// This is needed so that compute_chosen_word works correctly.
// Just in case there is no expression for a disjunct, a null one is used.
Expand All @@ -1860,29 +1863,68 @@ bool SATEncoderConjunctionFreeSentences::sat_extract_links(Linkage lkg)
if (xnode_word[wi] == NULL)
{
if (!_sent->word[wi].optional)
prt_error("Warning: Non-optional word %zu has no linkage\n", wi);
{
de = null_exp();
prt_error("Error: Internal error: Non-optional word %zu has no linkage\n", wi);
}
continue;
}

if (de == NULL) {
de = null_exp();
lgdebug(+0, "Warning: No expression for word %zu\n", wi);
prt_error("Error: Internal error: No expression for word %zu\n", wi);
}

#ifndef MAX_CONNECTOR_COST
#define MAX_CONNECTOR_COST 1000.0f
double cost_cutoff;
#if LIMIT_TOTAL_LINKAGE_COST // Undefined - incompatible to the classic parser.
cost_cutoff = _opts->disjunct_cost;
#else
cost_cutoff = 1000.0;
#endif // LIMIT_TOTAL_LINKAGE_COST
d = build_disjuncts_for_exp(de, xnode_word[wi]->string, cost_cutoff, _opts);
free_Exp(de);

if (d == NULL)
{
lgdebug(+D_SEL, "Debug: Word %zu: Disjunct cost > cost_cutoff %.2f\n",
wi, cost_cutoff);
#ifdef DEBUG
if (!test_enabled("SAT-cost"))
#endif
d = build_disjuncts_for_exp(de, xnode_word[wi]->string, MAX_CONNECTOR_COST, _opts);
return false;
}

word_record_in_disjunct(xnode_word[wi]->word, d);

/* Recover cost of costly-nulls. */
const vector<EmptyConnector>& ec = _word_tags[wi].get_empty_connectors();
for (vector<EmptyConnector>::const_iterator j = ec.begin(); j < ec.end(); j++)
{
lgdebug(+D_SEL, "Debug: Word %zu: Costly-null var=%d, found=%d cost=%.2f\n",
wi, j->ec_var, _solver->model[j->ec_var] == l_True, j->ec_cost);
if (_solver->model[j->ec_var] == l_True)
d->cost += j->ec_cost;
}

lkg->chosen_disjuncts[wi] = d;
free_Exp(de);
}

lkg->num_links = current_link;
#if LIMIT_TOTAL_LINKAGE_COST
if (d->cost > cost_cutoff)
{
lgdebug(+D_SEL, "Word %zu: Disjunct cost %.2f > cost_cutoff %.2f\n",
wi, d->cost, cost_cutoff);
#ifdef DEBUG
if (!test_enabled("SAT-cost"))
#endif
return false;
}
#endif // LIMIT_TOTAL_LINKAGE_COST
}

DEBUG_print("Total: ." << lkg->num_links << "." << endl);
return false;
DEBUG_print("Total links: ." << lkg->num_links << "." << endl);
return true;
}
#undef D_SEL

/**
* Main entry point into the SAT parser.
Expand Down
2 changes: 1 addition & 1 deletion link-grammar/sat-solver/sat-encoder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ class SATEncoder
virtual bool sat_extract_links(Linkage) = 0;

// Create linkage from a propositional model
Linkage create_linkage();
bool create_linkage(Linkage);

// Generate clause that prohibits the current model
void generate_linkage_prohibiting();
Expand Down
29 changes: 21 additions & 8 deletions link-grammar/sat-solver/word-tag.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ extern "C" {
#include "utilities.h"
}

#define D_IC 6
#define D_IC 8
void WordTag::insert_connectors(Exp* exp, int& dfs_position,
bool& leading_right, bool& leading_left,
std::vector<int>& eps_right,
Expand All @@ -22,10 +22,16 @@ void WordTag::insert_connectors(Exp* exp, int& dfs_position,
double cost = parent_cost + exp->cost;

#ifdef DEBUG
if (0 && verbosity_level(+D_IC)) { // Extreme debug
printf("Expression type %d for Word%d, var %s:\n", exp->type, _word, var);
printf("parent_exp: "); print_expression(parent_exp);
printf("exp: "); print_expression(exp);
if (0 && verbosity_level(+D_IC)) // Extreme debug
//if (_word == 2)
{
const char*type =
((const char *[]) {"OR_type", "AND_type", "CONNECTOR_type"}) [exp->type-1];
printf("Expression type %s for Word%d, var %s:\n", type, _word, var);
//printf("parent_exp: "); print_expression(parent_exp);
printf("exp(%s) e=%.2f pc=%.2f ", word_xnode->string,exp->cost, parent_cost);
print_expression(exp);
if (exp->cost > 0 || root) prt_exp_mem(exp, 0);
}
#endif

Expand All @@ -38,7 +44,7 @@ void WordTag::insert_connectors(Exp* exp, int& dfs_position,
_dir.push_back('+');
_right_connectors.push_back(
PositionConnector(parent_exp, exp, '+', _word, dfs_position,
cost, leading_right, false,
cost, parent_cost, leading_right, false,
eps_right, eps_left, word_xnode, _opts));
leading_right = false;
break;
Expand All @@ -47,7 +53,7 @@ void WordTag::insert_connectors(Exp* exp, int& dfs_position,
_dir.push_back('-');
_left_connectors.push_back(
PositionConnector(parent_exp, exp, '-', _word, dfs_position,
cost, false, leading_left,
cost, parent_cost, false, leading_left,
eps_right, eps_left, word_xnode, _opts));
leading_left = false;
break;
Expand All @@ -57,6 +63,12 @@ void WordTag::insert_connectors(Exp* exp, int& dfs_position,
} else if (exp->type == AND_type) {
if (exp->u.l == NULL) {
/* zeroary and */
if (cost != 0)
{
lgdebug(+D_IC, "EmptyConnector var=%s(%d) cost %.2f pcost %.2f\n",
var, _variables->string(var), cost, parent_cost);
_empty_connectors.push_back(EmptyConnector(_variables->string(var),cost));
}
} else
if (exp->u.l->next == NULL) {
/* unary and - skip */
Expand All @@ -79,8 +91,9 @@ void WordTag::insert_connectors(Exp* exp, int& dfs_position,
*s++ = 'c';
fast_sprintf(s, i);

double and_cost = (i == 0) ? cost : 0;
insert_connectors(l->e, dfs_position, leading_right, leading_left,
eps_right, eps_left, new_var, false, cost, parent_exp, word_xnode);
eps_right, eps_left, new_var, false, and_cost, parent_exp, word_xnode);

if (leading_right) {
eps_right.push_back(_variables->epsilon(new_var, '+'));
Expand Down
22 changes: 20 additions & 2 deletions link-grammar/sat-solver/word-tag.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ extern "C" {
struct PositionConnector
{
PositionConnector(Exp* pe, Exp* e, char d, int w, int p,
double pcst, bool lr, bool ll,
double cst, double pcst, bool lr, bool ll,
const std::vector<int>& er, const std::vector<int>& el, const X_node *w_xnode, Parse_Options opts)
: exp(pe), dir(d), word(w), position(p),
cost(e->cost), parent_cost(pcst),
cost(cst), parent_cost(pcst),
leading_right(lr), leading_left(ll),
eps_right(er), eps_left(el), word_xnode(w_xnode)
{
Expand Down Expand Up @@ -84,6 +84,19 @@ struct PositionConnector

};

/*
* Record the SAT variable and cost of costly-null expressions.
* Their cost is recovered (in sat_extract_links()) if
* they happen to reside on a participating disjunct.
*/
struct EmptyConnector {
EmptyConnector(int var, double cst)
: ec_var(var), ec_cost(cst)
{
}
int ec_var;
double ec_cost;
};

// XXX TODO: Hash connectors for faster matching

Expand All @@ -92,6 +105,7 @@ class WordTag
private:
std::vector<PositionConnector> _left_connectors;
std::vector<PositionConnector> _right_connectors;
std::vector<EmptyConnector> _empty_connectors;

std::vector<char> _dir;
std::vector<int> _position;
Expand Down Expand Up @@ -133,6 +147,10 @@ class WordTag
return _right_connectors;
}

const std::vector<EmptyConnector>& get_empty_connectors() const {
return _empty_connectors;
}

PositionConnector* get(int dfs_position)
{
switch (_dir[dfs_position - 1]) {
Expand Down