diff --git a/lib/Caxy/HtmlDiff/AbstractDiff.php b/lib/Caxy/HtmlDiff/AbstractDiff.php index 39cbbac..47d4199 100644 --- a/lib/Caxy/HtmlDiff/AbstractDiff.php +++ b/lib/Caxy/HtmlDiff/AbstractDiff.php @@ -19,6 +19,7 @@ abstract class AbstractDiff protected $specialCaseTags; protected $specialCaseChars; protected $groupDiffs; + protected $matchThreshold = 80; public function __construct($oldText, $newText, $encoding = 'UTF-8', $specialCaseTags = null, $groupDiffs = null) { @@ -39,6 +40,28 @@ public function __construct($oldText, $newText, $encoding = 'UTF-8', $specialCas $this->setSpecialCaseChars(static::$defaultSpecialCaseChars); } + /** + * @return int + */ + public function getMatchThreshold() + { + return $this->matchThreshold; + } + + /** + * @param int $matchThreshold + * + * @return AbstractDiff + */ + public function setMatchThreshold($matchThreshold) + { + $this->matchThreshold = $matchThreshold; + + return $this; + } + + + public function setSpecialCaseChars(array $chars) { $this->specialCaseChars = $chars; diff --git a/lib/Caxy/HtmlDiff/HtmlDiff.php b/lib/Caxy/HtmlDiff/HtmlDiff.php index 394c59b..915a331 100644 --- a/lib/Caxy/HtmlDiff/HtmlDiff.php +++ b/lib/Caxy/HtmlDiff/HtmlDiff.php @@ -215,7 +215,9 @@ protected function diffElements($oldText, $newText, $stripWrappingTags = true) protected function diffList($oldText, $newText) { - $diff = new ListDiff($oldText, $newText, $this->encoding, $this->specialCaseTags, $this->groupDiffs); + $diff = new ListDiffNew($oldText, $newText, $this->encoding, $this->specialCaseTags, $this->groupDiffs); + $diff->setMatchThreshold($this->matchThreshold); + return $diff->build(); } diff --git a/lib/Caxy/HtmlDiff/ListDiff/DiffList.php b/lib/Caxy/HtmlDiff/ListDiff/DiffList.php new file mode 100644 index 0000000..bed9eb8 --- /dev/null +++ b/lib/Caxy/HtmlDiff/ListDiff/DiffList.php @@ -0,0 +1,102 @@ +listType = $listType; + $this->startTag = $startTag; + $this->endTag = $endTag; + $this->listItems = $listItems; + $this->attributes = $attributes; + } + + /** + * @return mixed + */ + public function getListType() + { + return $this->listType; + } + + /** + * @param mixed $listType + * + * @return DiffList + */ + public function setListType($listType) + { + $this->listType = $listType; + + return $this; + } + + /** + * @return mixed + */ + public function getStartTag() + { + return $this->startTag; + } + + public function getStartTagWithDiffClass($class = 'diff-list') + { + return str_replace('>', ' class="'.$class.'">', $this->startTag); + } + + /** + * @param mixed $startTag + */ + public function setStartTag($startTag) + { + $this->startTag = $startTag; + } + + /** + * @return mixed + */ + public function getEndTag() + { + return $this->endTag; + } + + /** + * @param mixed $endTag + */ + public function setEndTag($endTag) + { + $this->endTag = $endTag; + } + + /** + * @return mixed + */ + public function getListItems() + { + return $this->listItems; + } + + /** + * @param mixed $listItems + * + * @return DiffList + */ + public function setListItems($listItems) + { + $this->listItems = $listItems; + + return $this; + } +} \ No newline at end of file diff --git a/lib/Caxy/HtmlDiff/ListDiff/DiffListItem.php b/lib/Caxy/HtmlDiff/ListDiff/DiffListItem.php new file mode 100644 index 0000000..da7272f --- /dev/null +++ b/lib/Caxy/HtmlDiff/ListDiff/DiffListItem.php @@ -0,0 +1,124 @@ +text = $text; + $this->attributes = $attributes; + $this->startTag = $startTag; + $this->endTag = $endTag; + } + + /** + * @return array + */ + public function getAttributes() + { + return $this->attributes; + } + + /** + * @param array $attributes + * + * @return DiffListItem + */ + public function setAttributes($attributes) + { + $this->attributes = $attributes; + + return $this; + } + + /** + * @return mixed + */ + public function getText() + { + return $this->text; + } + + /** + * @param mixed $text + * + * @return DiffListItem + */ + public function setText($text) + { + $this->text = $text; + + return $this; + } + + /** + * @return mixed + */ + public function getStartTag() + { + return $this->startTag; + } + + public function getStartTagWithDiffClass($class = 'normal') + { + return str_replace('>', ' class="'.$class.'">', $this->startTag); + } + + /** + * @param mixed $startTag + * + * @return DiffListItem + */ + public function setStartTag($startTag) + { + $this->startTag = $startTag; + + return $this; + } + + /** + * @return mixed + */ + public function getEndTag() + { + return $this->endTag; + } + + /** + * @param mixed $endTag + * + * @return DiffListItem + */ + public function setEndTag($endTag) + { + $this->endTag = $endTag; + + return $this; + } + + public function getHtml($class = 'normal', $wrapTag = null) + { + $startWrap = $wrapTag ? sprintf('<%s>', $wrapTag) : ''; + $endWrap = $wrapTag ? sprintf('', $wrapTag) : ''; + return sprintf('%s%s%s%s%s', $this->getStartTagWithDiffClass($class), $startWrap, $this->getInnerHtml(), $endWrap, $this->endTag); + } + + public function getInnerHtml() + { + return implode('', $this->text); + } + + public function __toString() + { + return $this->getHtml(); + } +} \ No newline at end of file diff --git a/lib/Caxy/HtmlDiff/ListDiffNew.php b/lib/Caxy/HtmlDiff/ListDiffNew.php new file mode 100644 index 0000000..c3cc57c --- /dev/null +++ b/lib/Caxy/HtmlDiff/ListDiffNew.php @@ -0,0 +1,246 @@ +splitInputsToWords(); + + return $this->diffLists( + $this->buildDiffList($this->oldWords), + $this->buildDiffList($this->newWords) + ); + } + + protected function diffLists(DiffList $oldList, DiffList $newList) + { + $oldMatchData = array(); + $newMatchData = array(); + $oldListIndices = array(); + $newListIndices = array(); + $oldListItems = array(); + $newListItems = array(); + + foreach ($oldList->getListItems() as $oldIndex => $oldListItem) { + if ($oldListItem instanceof DiffListItem) { + $oldListItems[$oldIndex] = $oldListItem; + + $oldListIndices[] = $oldIndex; + $oldMatchData[$oldIndex] = array(); + + // Get match percentages + foreach ($newList->getListItems() as $newIndex => $newListItem) { + if ($newListItem instanceof DiffListItem) { + if (!in_array($newListItem, $newListItems)) { + $newListItems[$newIndex] = $newListItem; + } + if (!in_array($newIndex, $newListIndices)) { + $newListIndices[] = $newIndex; + } + if (!array_key_exists($newIndex, $newMatchData)) { + $newMatchData[$newIndex] = array(); + } + + $oldText = implode('', $oldListItem->getText()); + $newText = implode('', $newListItem->getText()); + + // similar_text + $percentage = null; + similar_text($oldText, $newText, $percentage); + + $oldMatchData[$oldIndex][$newIndex] = $percentage; + $newMatchData[$newIndex][$oldIndex] = $percentage; + } + } + } + } + + $currentIndexInOld = 0; + $currentIndexInNew = 0; + $oldCount = count($oldListIndices); + $newCount = count($newListIndices); + $difference = max($oldCount, $newCount) - min($oldCount, $newCount); + + $diffOutput = ''; + + foreach ($newList->getListItems() as $newIndex => $newListItem) { + if ($newListItem instanceof DiffListItem) { + $operation = null; + + $oldListIndex = array_key_exists($currentIndexInOld, $oldListIndices) ? $oldListIndices[$currentIndexInOld] : null; + $class = 'normal'; + + if (null !== $oldListIndex && array_key_exists($oldListIndex, $oldMatchData)) { + // Check percentage matches of upcoming list items in old. + $matchPercentage = $oldMatchData[$oldListIndex][$newIndex]; + + // does the old list item match better? + $otherMatchBetter = false; + foreach ($oldMatchData[$oldListIndex] as $index => $percentage) { + if ($index > $newIndex && $percentage > $matchPercentage) { + $otherMatchBetter = $index; + } + } + + if (false !== $otherMatchBetter && $newCount > $oldCount && $difference > 0) { + $diffOutput .= sprintf('%s', $newListItem->getHtml('normal new', 'ins')); + $currentIndexInNew++; + $difference--; + + continue; + } + + $nextOldListIndex = array_key_exists($currentIndexInOld + 1, $oldListIndices) ? $oldListIndices[$currentIndexInOld + 1] : null; + + $replacement = false; + + if ($nextOldListIndex !== null && $oldMatchData[$nextOldListIndex][$newIndex] > $matchPercentage && $oldMatchData[$nextOldListIndex][$newIndex] > $this->matchThreshold) { + // Following list item in old is better match, use that. + $diffOutput .= sprintf('%s', $oldListItems[$oldListIndex]->getHtml('removed', 'del')); + + $currentIndexInOld++; + $oldListIndex = $nextOldListIndex; + $matchPercentage = $oldMatchData[$oldListIndex]; + $replacement = true; + } + + if ($matchPercentage > $this->matchThreshold || $currentIndexInNew === $currentIndexInOld) { + // Diff the two lists. + $htmlDiff = new HtmlDiff($oldListItems[$oldListIndex]->getInnerHtml(), $newListItem->getInnerHtml(), $this->encoding, $this->specialCaseTags, $this->groupDiffs); + $diffContent = $htmlDiff->build(); + + $diffOutput .= sprintf('%s%s%s', $newListItem->getStartTagWithDiffClass($replacement ? 'replacement' : 'normal'), $diffContent, $newListItem->getEndTag()); + + } else { + $diffOutput .= sprintf('%s', $oldListItems[$oldListIndex]->getHtml('removed', 'del')); + $diffOutput .= sprintf('%s', $newListItem->getHtml('replacement', 'ins')); + } + $currentIndexInOld++; + } else { + $diffOutput .= sprintf('%s', $newListItem->getHtml('normal new', 'ins')); + } + + $currentIndexInNew++; + } + } + + // Output any additional list items + while (array_key_exists($currentIndexInOld, $oldListIndices)) { + $oldListIndex = $oldListIndices[$currentIndexInOld]; + $diffOutput .= sprintf('%s', $oldListItems[$oldListIndex]->getHtml('removed', 'del')); + $currentIndexInOld++; + } + + return sprintf('%s%s%s', $newList->getStartTagWithDiffClass(), $diffOutput, $newList->getEndTag()); + } + + protected function buildDiffList($words) + { + $listType = null; + $listStartTag = null; + $listEndTag = null; + $attributes = array(); + $openLists = 0; + $openListItems = 0; + $list = array(); + $currentListItem = null; + $listItemType = null; + $listItemStart = null; + $listItemEnd = null; + + foreach ($words as $i => $word) { + if ($this->isOpeningListTag($word, $listType)) { + if ($openLists > 0) { + if ($openListItems > 0) { + $currentListItem[] = $word; + } else { + $list[] = $word; + } + } else { + $listType = substr($word, 1, 2); + $listStartTag = $word; + } + + $openLists++; + } elseif ($this->isClosingListTag($word, $listType)) { + if ($openLists > 1) { + if ($openListItems > 0) { + $currentListItem[] = $word; + } else { + $list[] = $word; + } + } else { + $listEndTag = $word; + } + + $openLists--; + } elseif ($this->isOpeningListItemTag($word, $listItemType)) { + if ($openListItems === 0) { + // New top-level list item + $currentListItem = array(); + $listItemType = substr($word, 1, 2); + $listItemStart = $word; + } else { + $currentListItem[] = $word; + } + + $openListItems++; + } elseif ($this->isClosingListItemTag($word, $listItemType)) { + if ($openListItems === 1) { + $listItemEnd = $word; + $listItem = new DiffListItem($currentListItem, array(), $listItemStart, $listItemEnd); + $list[] = $listItem; + $currentListItem = null; + } else { + $currentListItem[] = $word; + } + + $openListItems--; + } else { + if ($openListItems > 0) { + $currentListItem[] = $word; + } else { + $list[] = $word; + } + } + } + + $diffList = new DiffList($listType, $listStartTag, $listEndTag, $list, $attributes); + + return $diffList; + } + + protected function isOpeningListTag($word, $type = null) + { + $filter = $type !== null ? array('<' . $type) : array('