From 58d57570d804ab0524357ee81704eea720a05555 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=99=82=E6=B5=81?= Date: Tue, 10 Mar 2020 02:00:55 +0800 Subject: [PATCH 1/3] Enable edition of simplified/traditional Chinese Refs #2007. --- src/Model/Entity/Transcription.php | 2 -- src/Model/Table/TranscriptionsTable.php | 2 -- 2 files changed, 4 deletions(-) diff --git a/src/Model/Entity/Transcription.php b/src/Model/Entity/Transcription.php index e555ed3382..04cd733a1e 100644 --- a/src/Model/Entity/Transcription.php +++ b/src/Model/Entity/Transcription.php @@ -37,7 +37,6 @@ class Transcription extends Entity 'cmn-Hans' => array( 'Hant' => array( 'type' => 'altscript', - 'readonly' => true, ), 'Latn' => array( ), @@ -45,7 +44,6 @@ class Transcription extends Entity 'cmn-Hant' => array( 'Hans' => array( 'type' => 'altscript', - 'readonly' => true, ), 'Latn' => array( ), diff --git a/src/Model/Table/TranscriptionsTable.php b/src/Model/Table/TranscriptionsTable.php index 097c1a92ee..322d713c69 100644 --- a/src/Model/Table/TranscriptionsTable.php +++ b/src/Model/Table/TranscriptionsTable.php @@ -47,7 +47,6 @@ class TranscriptionsTable extends Table 'cmn-Hans' => array( 'Hant' => array( 'type' => 'altscript', - 'readonly' => true, ), 'Latn' => array( ), @@ -55,7 +54,6 @@ class TranscriptionsTable extends Table 'cmn-Hant' => array( 'Hans' => array( 'type' => 'altscript', - 'readonly' => true, ), 'Latn' => array( ), From d4db2eab586b81f8fba856afb0fedaa9e1d3e6f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=99=82=E6=B5=81?= Date: Tue, 10 Mar 2020 02:02:37 +0800 Subject: [PATCH 2/3] Validate edited simplified/traditional Chinese --- src/Lib/Autotranscription.php | 91 ++++++++++++++------ tests/TestCase/Lib/AutotranscriptionTest.php | 22 +++++ 2 files changed, 85 insertions(+), 28 deletions(-) diff --git a/src/Lib/Autotranscription.php b/src/Lib/Autotranscription.php index fd0b36172d..6e47550eb3 100644 --- a/src/Lib/Autotranscription.php +++ b/src/Lib/Autotranscription.php @@ -163,6 +163,37 @@ private function _unpack_grouped_furigana(&$group) return $formatted; } + private function _errorIfNonEqual(&$errors, $sentence, $transcr) { + if ($sentence !== $transcr) { + /* Find the first character that differs */ + $character = mb_substr( + mb_strcut( + $transcr, + strspn($transcr ^ $sentence, "\0") + ), + 0, + 1 + ); + if ($character) { + $errors[] = format( + __( + 'The provided sentence differs from the original one '. + 'near “{character}”.', + true), + compact('character') + ); + } else { + $errors[] = format( + __( + 'The provided sentence is shorter than the '. + 'original one.', + true), + compact('character') + ); + } + } + } + /** * Convert Japanese text into furigana. */ @@ -221,34 +252,7 @@ public function jpn_Jpan_to_Hrkt_validate($sentenceText, $transcr, &$errors) { $tokenizeFuriRegex = '/\[([^|]+)\|([\p{Hiragana}\p{Katakana}ー|]*)\]/u'; $withoutFuri = preg_replace($tokenizeFuriRegex, '$1', $transcr); - if ($sentenceText !== $withoutFuri) { - /* Find the first character that differs */ - $character = mb_substr( - mb_strcut( - $withoutFuri, - strspn($withoutFuri ^ $sentenceText, "\0") - ), - 0, - 1 - ); - if ($character) { - $errors[] = format( - __( - 'The provided sentence differs from the original one '. - 'near “{character}”.', - true), - compact('character') - ); - } else { - $errors[] = format( - __( - 'The provided sentence is shorter than the '. - 'original one.', - true), - compact('character') - ); - } - } + $this->_errorIfNonEqual($errors, $sentenceText, $withoutFuri); $withFuri = preg_replace('/\[([^|]+)\|+\]/u', '$1', $transcr); $withFuri = preg_replace($tokenizeFuriRegex, '$2', $withFuri); @@ -333,6 +337,37 @@ private function _basic_pinyin_cleanup($text) { return $text; } + public function cmn_Hans_to_Hant_validate($sentence, $transcr, &$errors) { + if (mb_strlen($sentence) < mb_strlen($transcr)) { + $errors[] = __('The provided sentence is longer than ' + .'the original one.'); + return false; + } + + // Compare $sentence with $transcr while ignoring Han chars + $sentenceA = preg_split("//u", $sentence, -1, PREG_SPLIT_NO_EMPTY); + $transcrA = preg_split("//u", $transcr, -1, PREG_SPLIT_NO_EMPTY); + $transcrComp = ''; + for ($i = 0; $i < count($transcrA) && $i < count($sentenceA); $i++) { + $charS = $sentenceA[$i]; + $charT = $transcrA[$i]; + $charS_isHan = preg_match('/\p{Han}/u', $charS) === 1; + $charT_isHan = preg_match('/\p{Han}/u', $charT) === 1; + if ($charS_isHan && $charT_isHan) { + $transcrComp .= $charS; + } else { + $transcrComp .= $charT; + } + } + $this->_errorIfNonEqual($errors, $sentence, $transcrComp); + + return count($errors) == 0; + } + + public function cmn_Hant_to_Hans_validate($sentence, $transcr, &$errors) { + return $this->cmn_Hans_to_Hant_validate($sentence, $transcr, $errors); + } + public function cmn_Hant_to_Latn_generate($text, &$needsReview) { $pinyin = $this->_call_sinoparserd('pinyin', $text); $pinyin = $this->_basic_pinyin_cleanup($pinyin); diff --git a/tests/TestCase/Lib/AutotranscriptionTest.php b/tests/TestCase/Lib/AutotranscriptionTest.php index 5d7a2bba40..11c0d92fe8 100644 --- a/tests/TestCase/Lib/AutotranscriptionTest.php +++ b/tests/TestCase/Lib/AutotranscriptionTest.php @@ -109,6 +109,28 @@ function testPinyin() { $this->assertInvalidTranscriptions('cmn', 'Hant', 'Latn', $testBad); } + function testHansHantValidation() { + $testGood = array( + '門開著嗎?' => array( + '门开着吗?', + '門開著嗎?', + ), + ); + $testBad = array( + '門開著嗎?' => array( + '门开着', + '门开着吗', + '门开着吗吗', + '门开着吗?', + '门开着吗?啊', + ), + ); + foreach (array('Hans' => 'Hant', 'Hant' => 'Hans') as $script => $oppositeScript) { + $this->assertValidTranscriptions('cmn', $script, $oppositeScript, $testGood); + $this->assertInvalidTranscriptions('cmn', $script, $oppositeScript, $testBad); + } + } + function _mockHttpClient($body) { $response = $this->getMockBuilder(Cake\Http\Response::class) ->setMethods(['isOk', 'getStringBody']) From 17d5128d326d054124725f8bd2beb5c43c2d7953 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=99=82=E6=B5=81?= Date: Tue, 10 Mar 2020 14:28:13 +0800 Subject: [PATCH 3/3] Fix test about readonly transcriptions --- tests/Fixture/SentencesFixture.php | 13 +++++++++++++ tests/Fixture/TranscriptionsFixture.php | 10 ++++++++++ .../Model/Table/TranscriptionsTableTest.php | 8 ++++---- 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/tests/Fixture/SentencesFixture.php b/tests/Fixture/SentencesFixture.php index bc45594376..a836d9e569 100644 --- a/tests/Fixture/SentencesFixture.php +++ b/tests/Fixture/SentencesFixture.php @@ -765,5 +765,18 @@ class SentencesFixture extends TestFixture { 'license' => 'CC BY 2.0 FR', 'based_on_id' => '55', ), + array( + 'id' => '58', + 'lang' => 'uzb', + 'text' => 'Ишингни қил!', + 'correctness' => '0', + 'user_id' => '7', + 'created' => '2020-01-22 22:22:22', + 'modified' => '2020-01-22 22:22:22', + 'script' => 'Cyrl', + 'hash' => "rjskda\0\0\0\0\0\0\0\0\0\0", + 'license' => 'CC BY 2.0 FR', + 'based_on_id' => '0', + ), ); } diff --git a/tests/Fixture/TranscriptionsFixture.php b/tests/Fixture/TranscriptionsFixture.php index 75d5a3096f..46997cc2d7 100644 --- a/tests/Fixture/TranscriptionsFixture.php +++ b/tests/Fixture/TranscriptionsFixture.php @@ -51,5 +51,15 @@ class TranscriptionsFixture extends TestFixture { 'created' => '2014-10-18 17:43:32', 'modified' => '2014-10-18 17:43:32' ), + array( + 'id' => 4, + 'sentence_id' => 58, + 'script' => 'Latn', + 'text' => 'Ishingni qil!', + 'user_id' => null, + 'needsReview' => 0, + 'created' => '2020-01-22 22:22:22', + 'modified' => '2020-01-22 22:22:22' + ), ); } diff --git a/tests/TestCase/Model/Table/TranscriptionsTableTest.php b/tests/TestCase/Model/Table/TranscriptionsTableTest.php index c1373f6bb7..628c0da2b4 100644 --- a/tests/TestCase/Model/Table/TranscriptionsTableTest.php +++ b/tests/TestCase/Model/Table/TranscriptionsTableTest.php @@ -426,10 +426,10 @@ function testCanCreateReadonlyTranscriptions() { function testCannotUpdateReadonlyTranscriptions() { $result = (bool)$this->Transcription->saveTranscription(array( - 'id' => 2, - 'sentence_id' => 2, - 'script' => 'Hant', - 'text' => '問題的根源是,在當今世界,愚人充滿了自信,而智者充滿了懷疑。', + 'id' => 4, + 'sentence_id' => 58, + 'script' => 'Latn', + 'text' => 'Ishingni qqq!', )); $this->assertFalse($result); }