Skip to content

Commit

Permalink
Merge pull request #632 from pshipton/harfbuzz1
Browse files Browse the repository at this point in the history
Port harfbuzz - Avoid O(n^2) behavior in mark-attachment
  • Loading branch information
keithc-ca authored Mar 22, 2023
2 parents 41f52b5 + 3319f31 commit 451b504
Show file tree
Hide file tree
Showing 3 changed files with 152 additions and 88 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -89,54 +89,78 @@ struct MarkBasePosFormat1

const Coverage &get_coverage () const { return this+markCoverage; }

static inline bool accept (hb_buffer_t *buffer, unsigned idx)
{
/* We only want to attach to the first of a MultipleSubst sequence.
* https://github.com/harfbuzz/harfbuzz/issues/740
* Reject others...
* ...but stop if we find a mark in the MultipleSubst sequence:
* https://github.com/harfbuzz/harfbuzz/issues/1020 */
return !_hb_glyph_info_multiplied (&buffer->info[idx]) ||
0 == _hb_glyph_info_get_lig_comp (&buffer->info[idx]) ||
(idx == 0 ||
_hb_glyph_info_is_mark (&buffer->info[idx - 1]) ||
!_hb_glyph_info_multiplied (&buffer->info[idx - 1]) ||
_hb_glyph_info_get_lig_id (&buffer->info[idx]) !=
_hb_glyph_info_get_lig_id (&buffer->info[idx - 1]) ||
_hb_glyph_info_get_lig_comp (&buffer->info[idx]) !=
_hb_glyph_info_get_lig_comp (&buffer->info[idx - 1]) + 1
);
}

bool apply (hb_ot_apply_context_t *c) const
{
TRACE_APPLY (this);
hb_buffer_t *buffer = c->buffer;
unsigned int mark_index = (this+markCoverage).get_coverage (buffer->cur().codepoint);
if (likely (mark_index == NOT_COVERED)) return_trace (false);

/* Now we search backwards for a non-mark glyph */
/* Now we search backwards for a non-mark glyph.
* We don't use skippy_iter.prev() to avoid O(n^2) behavior. */

hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input;
skippy_iter.reset (buffer->idx, 1);
skippy_iter.set_lookup_props (LookupFlag::IgnoreMarks);
do {
unsigned unsafe_from;
if (!skippy_iter.prev (&unsafe_from))

if (c->last_base_until > buffer->idx)
{
c->last_base_until = 0;
c->last_base = -1;
}
unsigned j;
for (j = buffer->idx; j > c->last_base_until; j--)
{
auto match = skippy_iter.match (buffer->info[j - 1]);
if (match == skippy_iter.MATCH)
{
if (!accept (buffer, j - 1))
match = skippy_iter.SKIP;
}
if (match == skippy_iter.MATCH)
{
buffer->unsafe_to_concat_from_outbuffer (unsafe_from, buffer->idx + 1);
return_trace (false);
c->last_base = (signed) j - 1;
break;
}
}
c->last_base_until = buffer->idx;
if (c->last_base == -1)
{
buffer->unsafe_to_concat_from_outbuffer (0, buffer->idx + 1);
return_trace (false);
}

/* We only want to attach to the first of a MultipleSubst sequence.
* https://github.com/harfbuzz/harfbuzz/issues/740
* Reject others...
* ...but stop if we find a mark in the MultipleSubst sequence:
* https://github.com/harfbuzz/harfbuzz/issues/1020 */
if (!_hb_glyph_info_multiplied (&buffer->info[skippy_iter.idx]) ||
0 == _hb_glyph_info_get_lig_comp (&buffer->info[skippy_iter.idx]) ||
(skippy_iter.idx == 0 ||
_hb_glyph_info_is_mark (&buffer->info[skippy_iter.idx - 1]) ||
_hb_glyph_info_get_lig_id (&buffer->info[skippy_iter.idx]) !=
_hb_glyph_info_get_lig_id (&buffer->info[skippy_iter.idx - 1]) ||
_hb_glyph_info_get_lig_comp (&buffer->info[skippy_iter.idx]) !=
_hb_glyph_info_get_lig_comp (&buffer->info[skippy_iter.idx - 1]) + 1
))
break;
skippy_iter.reject ();
} while (true);
unsigned idx = (unsigned) c->last_base;

/* Checking that matched glyph is actually a base glyph by GDEF is too strong; disabled */
//if (!_hb_glyph_info_is_base_glyph (&buffer->info[skippy_iter.idx])) { return_trace (false); }
//if (!_hb_glyph_info_is_base_glyph (&buffer->info[idx])) { return_trace (false); }

unsigned int base_index = (this+baseCoverage).get_coverage (buffer->info[skippy_iter.idx].codepoint);
unsigned int base_index = (this+baseCoverage).get_coverage (buffer->info[idx].codepoint);
if (base_index == NOT_COVERED)
{
buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx + 1);
buffer->unsafe_to_concat_from_outbuffer (idx, buffer->idx + 1);
return_trace (false);
}

return_trace ((this+markArray).apply (c, mark_index, base_index, this+baseArray, classCount, skippy_iter.idx));
return_trace ((this+markArray).apply (c, mark_index, base_index, this+baseArray, classCount, idx));
}

bool subset (hb_subset_context_t *c) const
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,24 +138,41 @@ struct MarkLigPosFormat1
if (likely (mark_index == NOT_COVERED)) return_trace (false);

/* Now we search backwards for a non-mark glyph */

hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input;
skippy_iter.reset (buffer->idx, 1);
skippy_iter.set_lookup_props (LookupFlag::IgnoreMarks);
unsigned unsafe_from;
if (!skippy_iter.prev (&unsafe_from))

if (c->last_base_until > buffer->idx)
{
buffer->unsafe_to_concat_from_outbuffer (unsafe_from, buffer->idx + 1);
c->last_base_until = 0;
c->last_base = -1;
}
unsigned j;
for (j = buffer->idx; j > c->last_base_until; j--)
{
auto match = skippy_iter.match (buffer->info[j - 1]);
if (match == skippy_iter.MATCH)
{
c->last_base = (signed) j - 1;
break;
}
}
c->last_base_until = buffer->idx;
if (c->last_base == -1)
{
buffer->unsafe_to_concat_from_outbuffer (0, buffer->idx + 1);
return_trace (false);
}

unsigned idx = (unsigned) c->last_base;

/* Checking that matched glyph is actually a ligature by GDEF is too strong; disabled */
//if (!_hb_glyph_info_is_ligature (&buffer->info[skippy_iter.idx])) { return_trace (false); }
//if (!_hb_glyph_info_is_ligature (&buffer->info[idx])) { return_trace (false); }

unsigned int j = skippy_iter.idx;
unsigned int lig_index = (this+ligatureCoverage).get_coverage (buffer->info[j].codepoint);
unsigned int lig_index = (this+ligatureCoverage).get_coverage (buffer->info[idx].codepoint);
if (lig_index == NOT_COVERED)
{
buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx + 1);
buffer->unsafe_to_concat_from_outbuffer (idx, buffer->idx + 1);
return_trace (false);
}

Expand All @@ -166,7 +183,7 @@ struct MarkLigPosFormat1
unsigned int comp_count = lig_attach.rows;
if (unlikely (!comp_count))
{
buffer->unsafe_to_concat_from_outbuffer (skippy_iter.idx, buffer->idx + 1);
buffer->unsafe_to_concat_from_outbuffer (idx, buffer->idx + 1);
return_trace (false);
}

Expand All @@ -175,15 +192,15 @@ struct MarkLigPosFormat1
* can directly use the component index. If not, we attach the mark
* glyph to the last component of the ligature. */
unsigned int comp_index;
unsigned int lig_id = _hb_glyph_info_get_lig_id (&buffer->info[j]);
unsigned int lig_id = _hb_glyph_info_get_lig_id (&buffer->info[idx]);
unsigned int mark_id = _hb_glyph_info_get_lig_id (&buffer->cur());
unsigned int mark_comp = _hb_glyph_info_get_lig_comp (&buffer->cur());
if (lig_id && lig_id == mark_id && mark_comp > 0)
comp_index = hb_min (comp_count, _hb_glyph_info_get_lig_comp (&buffer->cur())) - 1;
else
comp_index = comp_count - 1;

return_trace ((this+markArray).apply (c, mark_index, comp_index, lig_attach, classCount, j));
return_trace ((this+markArray).apply (c, mark_index, comp_index, lig_attach, classCount, idx));
}

bool subset (hb_subset_context_t *c) const
Expand Down
121 changes: 72 additions & 49 deletions src/java.desktop/share/native/libharfbuzz/hb-ot-layout-gsubgpos.hh
Original file line number Diff line number Diff line change
Expand Up @@ -517,34 +517,56 @@ struct hb_ot_apply_context_t :
may_skip (const hb_glyph_info_t &info) const
{ return matcher.may_skip (c, info); }

bool next (unsigned *unsafe_to = nullptr)
enum match_t {
MATCH,
NOT_MATCH,
SKIP
};

match_t match (hb_glyph_info_t &info)
{
assert (num_items > 0);
while (idx + num_items < end)
{
idx++;
hb_glyph_info_t &info = c->buffer->info[idx];
matcher_t::may_skip_t skip = matcher.may_skip (c, info);
if (unlikely (skip == matcher_t::SKIP_YES))
return SKIP;

matcher_t::may_skip_t skip = matcher.may_skip (c, info);
if (unlikely (skip == matcher_t::SKIP_YES))
continue;
matcher_t::may_match_t match = matcher.may_match (info, match_glyph_data);
if (match == matcher_t::MATCH_YES ||
(match == matcher_t::MATCH_MAYBE &&
skip == matcher_t::SKIP_NO))
return MATCH;

matcher_t::may_match_t match = matcher.may_match (info, match_glyph_data);
if (match == matcher_t::MATCH_YES ||
(match == matcher_t::MATCH_MAYBE &&
skip == matcher_t::SKIP_NO))
{
num_items--;
if (match_glyph_data) match_glyph_data++;
return true;
}
if (skip == matcher_t::SKIP_NO)
return NOT_MATCH;

if (skip == matcher_t::SKIP_NO)
{
if (unsafe_to)
*unsafe_to = idx + 1;
return false;
}
return SKIP;
}

bool next (unsigned *unsafe_to = nullptr)
{
assert (num_items > 0);
/* The alternate condition below is faster at string boundaries,
* but produces subpar "unsafe-to-concat" values. */
signed stop = (signed) end - (signed) num_items;
while ((signed) idx < stop)
{
idx++;
switch (match (c->buffer->info[idx]))
{
case MATCH:
{
num_items--;
if (match_glyph_data) match_glyph_data++;
return true;
}
case NOT_MATCH:
{
if (unsafe_to)
*unsafe_to = idx + 1;
return false;
}
case SKIP:
continue;
}
}
if (unsafe_to)
*unsafe_to = end;
Expand All @@ -553,31 +575,29 @@ struct hb_ot_apply_context_t :
bool prev (unsigned *unsafe_from = nullptr)
{
assert (num_items > 0);
while (idx > num_items - 1)
/* The alternate condition below is faster at string boundaries,
* but produces subpar "unsafe-to-concat" values. */
unsigned stop = num_items - 1;
while (idx > stop)
{
idx--;
hb_glyph_info_t &info = c->buffer->out_info[idx];

matcher_t::may_skip_t skip = matcher.may_skip (c, info);
if (unlikely (skip == matcher_t::SKIP_YES))
continue;

matcher_t::may_match_t match = matcher.may_match (info, match_glyph_data);
if (match == matcher_t::MATCH_YES ||
(match == matcher_t::MATCH_MAYBE &&
skip == matcher_t::SKIP_NO))
{
num_items--;
if (match_glyph_data) match_glyph_data++;
return true;
}

if (skip == matcher_t::SKIP_NO)
{
if (unsafe_from)
*unsafe_from = hb_max (1u, idx) - 1u;
return false;
}
idx--;
switch (match (c->buffer->out_info[idx]))
{
case MATCH:
{
num_items--;
if (match_glyph_data) match_glyph_data++;
return true;
}
case NOT_MATCH:
{
if (unsafe_from)
*unsafe_from = hb_max (1u, idx) - 1u;
return false;
}
case SKIP:
continue;
}
}
if (unsafe_from)
*unsafe_from = 0;
Expand Down Expand Up @@ -640,6 +660,9 @@ struct hb_ot_apply_context_t :
uint32_t random_state = 1;
unsigned new_syllables = (unsigned) -1;

signed last_base = -1; // GPOS uses
unsigned last_base_until = 0; // GPOS uses

hb_ot_apply_context_t (unsigned int table_index_,
hb_font_t *font_,
hb_buffer_t *buffer_) :
Expand Down Expand Up @@ -677,7 +700,7 @@ struct hb_ot_apply_context_t :
iter_context.init (this, true);
}

void set_lookup_mask (hb_mask_t mask) { lookup_mask = mask; init_iters (); }
void set_lookup_mask (hb_mask_t mask) { lookup_mask = mask; last_base = -1; last_base_until = 0; init_iters (); }
void set_auto_zwj (bool auto_zwj_) { auto_zwj = auto_zwj_; init_iters (); }
void set_auto_zwnj (bool auto_zwnj_) { auto_zwnj = auto_zwnj_; init_iters (); }
void set_per_syllable (bool per_syllable_) { per_syllable = per_syllable_; init_iters (); }
Expand Down

0 comments on commit 451b504

Please sign in to comment.