Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow edition of simplified/traditional scripts in Chinese sentences #2189

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 63 additions & 28 deletions src/Lib/Autotranscription.php
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,37 @@ private function _unpack_grouped_furigana(&$group)
return $formatted;
}

private function _errorIfNonEqual(&$errors, $sentence, $transcr) {
if ($sentence !== $transcr) {
/* Find the first character that differs */
$character = mb_substr(
mb_strcut(
$transcr,
strspn($transcr ^ $sentence, "\0")
),
0,
1
);
if ($character) {
$errors[] = format(
__(
'The provided sentence differs from the original one '.
'near “{character}”.',
true),
compact('character')
);
} else {
$errors[] = format(
__(
'The provided sentence is shorter than the '.
'original one.',
true),
compact('character')
);
}
}
}

/**
* Convert Japanese text into furigana.
*/
Expand Down Expand Up @@ -221,34 +252,7 @@ public function jpn_Jpan_to_Hrkt_validate($sentenceText, $transcr, &$errors) {
$tokenizeFuriRegex = '/\[([^|]+)\|([\p{Hiragana}\p{Katakana}ー|]*)\]/u';

$withoutFuri = preg_replace($tokenizeFuriRegex, '$1', $transcr);
if ($sentenceText !== $withoutFuri) {
/* Find the first character that differs */
$character = mb_substr(
mb_strcut(
$withoutFuri,
strspn($withoutFuri ^ $sentenceText, "\0")
),
0,
1
);
if ($character) {
$errors[] = format(
__(
'The provided sentence differs from the original one '.
'near “{character}”.',
true),
compact('character')
);
} else {
$errors[] = format(
__(
'The provided sentence is shorter than the '.
'original one.',
true),
compact('character')
);
}
}
$this->_errorIfNonEqual($errors, $sentenceText, $withoutFuri);

$withFuri = preg_replace('/\[([^|]+)\|+\]/u', '$1', $transcr);
$withFuri = preg_replace($tokenizeFuriRegex, '$2', $withFuri);
Expand Down Expand Up @@ -333,6 +337,37 @@ private function _basic_pinyin_cleanup($text) {
return $text;
}

public function cmn_Hans_to_Hant_validate($sentence, $transcr, &$errors) {
if (mb_strlen($sentence) < mb_strlen($transcr)) {
$errors[] = __('The provided sentence is longer than '
.'the original one.');
return false;
}

// Compare $sentence with $transcr while ignoring Han chars
$sentenceA = preg_split("//u", $sentence, -1, PREG_SPLIT_NO_EMPTY);
$transcrA = preg_split("//u", $transcr, -1, PREG_SPLIT_NO_EMPTY);
$transcrComp = '';
for ($i = 0; $i < count($transcrA) && $i < count($sentenceA); $i++) {
$charS = $sentenceA[$i];
$charT = $transcrA[$i];
$charS_isHan = preg_match('/\p{Han}/u', $charS) === 1;
$charT_isHan = preg_match('/\p{Han}/u', $charT) === 1;
if ($charS_isHan && $charT_isHan) {
$transcrComp .= $charS;
} else {
$transcrComp .= $charT;
}
}
$this->_errorIfNonEqual($errors, $sentence, $transcrComp);

return count($errors) == 0;
}

public function cmn_Hant_to_Hans_validate($sentence, $transcr, &$errors) {
return $this->cmn_Hans_to_Hant_validate($sentence, $transcr, $errors);
}

public function cmn_Hant_to_Latn_generate($text, &$needsReview) {
$pinyin = $this->_call_sinoparserd('pinyin', $text);
$pinyin = $this->_basic_pinyin_cleanup($pinyin);
Expand Down
2 changes: 0 additions & 2 deletions src/Model/Entity/Transcription.php
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,13 @@ class Transcription extends Entity
'cmn-Hans' => array(
'Hant' => array(
'type' => 'altscript',
'readonly' => true,
),
'Latn' => array(
),
),
'cmn-Hant' => array(
'Hans' => array(
'type' => 'altscript',
'readonly' => true,
),
'Latn' => array(
),
Expand Down
2 changes: 0 additions & 2 deletions src/Model/Table/TranscriptionsTable.php
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,13 @@ class TranscriptionsTable extends Table
'cmn-Hans' => array(
'Hant' => array(
'type' => 'altscript',
'readonly' => true,
),
'Latn' => array(
),
),
'cmn-Hant' => array(
'Hans' => array(
'type' => 'altscript',
'readonly' => true,
),
'Latn' => array(
),
Expand Down
13 changes: 13 additions & 0 deletions tests/Fixture/SentencesFixture.php
Original file line number Diff line number Diff line change
Expand Up @@ -765,5 +765,18 @@ class SentencesFixture extends TestFixture {
'license' => 'CC BY 2.0 FR',
'based_on_id' => '55',
),
array(
'id' => '58',
'lang' => 'uzb',
'text' => 'Ишингни қил!',
'correctness' => '0',
'user_id' => '7',
'created' => '2020-01-22 22:22:22',
'modified' => '2020-01-22 22:22:22',
'script' => 'Cyrl',
'hash' => "rjskda\0\0\0\0\0\0\0\0\0\0",
'license' => 'CC BY 2.0 FR',
'based_on_id' => '0',
),
);
}
10 changes: 10 additions & 0 deletions tests/Fixture/TranscriptionsFixture.php
Original file line number Diff line number Diff line change
Expand Up @@ -51,5 +51,15 @@ class TranscriptionsFixture extends TestFixture {
'created' => '2014-10-18 17:43:32',
'modified' => '2014-10-18 17:43:32'
),
array(
'id' => 4,
'sentence_id' => 58,
'script' => 'Latn',
'text' => 'Ishingni qil!',
'user_id' => null,
'needsReview' => 0,
'created' => '2020-01-22 22:22:22',
'modified' => '2020-01-22 22:22:22'
),
);
}
22 changes: 22 additions & 0 deletions tests/TestCase/Lib/AutotranscriptionTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,28 @@ function testPinyin() {
$this->assertInvalidTranscriptions('cmn', 'Hant', 'Latn', $testBad);
}

function testHansHantValidation() {
$testGood = array(
'門開著嗎?' => array(
'门开着吗?',
'門開著嗎?',
),
);
$testBad = array(
'門開著嗎?' => array(
'门开着',
'门开着吗',
'门开着吗吗',
'门开着吗?',
'门开着吗?啊',
),
);
foreach (array('Hans' => 'Hant', 'Hant' => 'Hans') as $script => $oppositeScript) {
$this->assertValidTranscriptions('cmn', $script, $oppositeScript, $testGood);
$this->assertInvalidTranscriptions('cmn', $script, $oppositeScript, $testBad);
}
}

function _mockHttpClient($body) {
$response = $this->getMockBuilder(Cake\Http\Response::class)
->setMethods(['isOk', 'getStringBody'])
Expand Down
8 changes: 4 additions & 4 deletions tests/TestCase/Model/Table/TranscriptionsTableTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -426,10 +426,10 @@ function testCanCreateReadonlyTranscriptions() {

function testCannotUpdateReadonlyTranscriptions() {
$result = (bool)$this->Transcription->saveTranscription(array(
'id' => 2,
'sentence_id' => 2,
'script' => 'Hant',
'text' => '問題的根源是,在當今世界,愚人充滿了自信,而智者充滿了懷疑。',
'id' => 4,
'sentence_id' => 58,
'script' => 'Latn',
'text' => 'Ishingni qqq!',
));
$this->assertFalse($result);
}
Expand Down