From ace53d3cb267142cd3eb4d32f017171929b1c7ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20L=C3=BCck?= Date: Sun, 18 Jul 2021 21:16:51 +0200 Subject: [PATCH] Support optional `charset` parameter for full UTF-8 support (`utf8mb4`) --- README.md | 20 +++++++++ src/Commands/AuthenticateCommand.php | 43 ++++++++++++++++++- src/Factory.php | 49 +++++++++++++++++----- src/Io/Query.php | 9 ++++ tests/Commands/AuthenticateCommandTest.php | 28 +++++++++++++ tests/FactoryTest.php | 13 ++++++ tests/ResultQueryTest.php | 33 +++++++++++++++ 7 files changed, 182 insertions(+), 13 deletions(-) create mode 100644 tests/Commands/AuthenticateCommandTest.php diff --git a/README.md b/README.md index f2e83a8..1452170 100644 --- a/README.md +++ b/README.md @@ -168,6 +168,16 @@ authentication. You can explicitly pass a custom timeout value in seconds $factory->createConnection('localhost?timeout=0.5'); ``` +By default, the connection uses the `utf8` charset encoding. Note that +MySQL's `utf8` encoding (also known as `utf8mb3`) predates what is now +known as UTF-8 and for historical reasons doesn't support emojis and +other characters. If you want full UTF-8 support, you can pass the +charset encoding like this: + +```php +$factory->createConnection('localhost?charset=utf8mb4'); +``` + #### createLazyConnection() Creates a new connection. @@ -274,6 +284,16 @@ timeout) like this: $factory->createLazyConnection('localhost?idle=0.1'); ``` +By default, the connection uses the `utf8` charset encoding. Note that +MySQL's `utf8` encoding (also known as `utf8mb3`) predates what is now +known as UTF-8 and for historical reasons doesn't support emojis and +other characters. If you want full UTF-8 support, you can pass the +charset encoding like this: + +```php +$factory->createLazyConnection('localhost?charset=utf8mb4'); +``` + ### ConnectionInterface The `ConnectionInterface` represents a connection that is responsible for diff --git a/src/Commands/AuthenticateCommand.php b/src/Commands/AuthenticateCommand.php index 4f78024..1a6b64e 100644 --- a/src/Commands/AuthenticateCommand.php +++ b/src/Commands/AuthenticateCommand.php @@ -7,6 +7,7 @@ /** * @internal + * @link https://dev.mysql.com/doc/internals/en/connection-phase-packets.html#packet-Protocol::HandshakeResponse */ class AuthenticateCommand extends AbstractCommand { @@ -15,13 +16,51 @@ class AuthenticateCommand extends AbstractCommand private $dbname; private $maxPacketSize = 0x1000000; - private $charsetNumber = 0x21; - public function __construct($user, $passwd, $dbname) + /** + * @var int + * @link https://dev.mysql.com/doc/internals/en/character-set.html#packet-Protocol::CharacterSet + */ + private $charsetNumber; + + /** + * Mapping from charset name to internal charset ID + * + * Note that this map currently only contains ASCII-compatible charset encodings + * because of quoting rules as defined in the `Query` class. + * + * @var array + * @see self::$charsetNumber + * @see \React\MySQL\Io\Query::$escapeChars + */ + private static $charsetMap = array( + 'latin1' => 8, + 'latin2' => 9, + 'ascii' => 11, + 'latin5' => 30, + 'utf8' => 33, + 'latin7' => 41, + 'utf8mb4' => 45, + 'binary' => 63 + ); + + /** + * @param string $user + * @param string $passwd + * @param string $dbname + * @param string $charset + * @throws \InvalidArgumentException for invalid/unknown charset name + */ + public function __construct($user, $passwd, $dbname, $charset) { + if (!isset(self::$charsetMap[$charset])) { + throw new \InvalidArgumentException('Unsupported charset selected'); + } + $this->user = $user; $this->passwd = $passwd; $this->dbname = $dbname; + $this->charsetNumber = self::$charsetMap[$charset]; } public function getId() diff --git a/src/Factory.php b/src/Factory.php index ed5e248..55f71c5 100644 --- a/src/Factory.php +++ b/src/Factory.php @@ -143,6 +143,16 @@ public function __construct(LoopInterface $loop = null, ConnectorInterface $conn * $factory->createConnection('localhost?timeout=0.5'); * ``` * + * By default, the connection uses the `utf8` charset encoding. Note that + * MySQL's `utf8` encoding (also known as `utf8mb3`) predates what is now + * known as UTF-8 and for historical reasons doesn't support emojis and + * other characters. If you want full UTF-8 support, you can pass the + * charset encoding like this: + * + * ```php + * $factory->createConnection('localhost?charset=utf8mb4'); + * ``` + * * @param string $uri * @return PromiseInterface Promise */ @@ -153,6 +163,22 @@ public function createConnection($uri) return \React\Promise\reject(new \InvalidArgumentException('Invalid connect uri given')); } + $args = []; + if (isset($parts['query'])) { + parse_str($parts['query'], $args); + } + + try { + $authCommand = new AuthenticateCommand( + isset($parts['user']) ? rawurldecode($parts['user']) : 'root', + isset($parts['pass']) ? rawurldecode($parts['pass']) : '', + isset($parts['path']) ? rawurldecode(ltrim($parts['path'], '/')) : '', + isset($args['charset']) ? $args['charset'] : 'utf8' + ); + } catch (\InvalidArgumentException $e) { + return \React\Promise\reject($e); + } + $connecting = $this->connector->connect( $parts['host'] . ':' . (isset($parts['port']) ? $parts['port'] : 3306) ); @@ -168,16 +194,12 @@ public function createConnection($uri) $connecting->cancel(); }); - $connecting->then(function (SocketConnectionInterface $stream) use ($parts, $deferred) { + $connecting->then(function (SocketConnectionInterface $stream) use ($authCommand, $deferred) { $executor = new Executor(); $parser = new Parser($stream, $executor); $connection = new Connection($stream, $executor); - $command = $executor->enqueue(new AuthenticateCommand( - isset($parts['user']) ? rawurldecode($parts['user']) : 'root', - isset($parts['pass']) ? rawurldecode($parts['pass']) : '', - isset($parts['path']) ? rawurldecode(ltrim($parts['path'], '/')) : '' - )); + $command = $executor->enqueue($authCommand); $parser->start(); $command->on('success', function () use ($deferred, $connection) { @@ -191,11 +213,6 @@ public function createConnection($uri) $deferred->reject(new \RuntimeException('Unable to connect to database server', 0, $error)); }); - $args = []; - if (isset($parts['query'])) { - parse_str($parts['query'], $args); - } - // use timeout from explicit ?timeout=x parameter or default to PHP's default_socket_timeout (60) $timeout = (float) isset($args['timeout']) ? $args['timeout'] : ini_get("default_socket_timeout"); if ($timeout < 0) { @@ -317,6 +334,16 @@ public function createConnection($uri) * $factory->createLazyConnection('localhost?idle=0.1'); * ``` * + * By default, the connection uses the `utf8` charset encoding. Note that + * MySQL's `utf8` encoding (also known as `utf8mb3`) predates what is now + * known as UTF-8 and for historical reasons doesn't support emojis and + * other characters. If you want full UTF-8 support, you can pass the + * charset encoding like this: + * + * ```php + * $factory->createLazyConnection('localhost?charset=utf8mb4'); + * ``` + * * @param string $uri * @return ConnectionInterface */ diff --git a/src/Io/Query.php b/src/Io/Query.php index 4297efa..f17513a 100644 --- a/src/Io/Query.php +++ b/src/Io/Query.php @@ -13,6 +13,15 @@ class Query private $params = []; + /** + * Mapping from byte/character to escaped character string + * + * Note that this mapping assumes an ASCII-compatible charset encoding such + * as UTF-8, ISO 8859 and others. + * + * @var array + * @see \React\MySQL\Commands\AuthenticateCommand::$charsetMap + */ private $escapeChars = array( "\x00" => "\\0", "\r" => "\\r", diff --git a/tests/Commands/AuthenticateCommandTest.php b/tests/Commands/AuthenticateCommandTest.php new file mode 100644 index 0000000..a351cd7 --- /dev/null +++ b/tests/Commands/AuthenticateCommandTest.php @@ -0,0 +1,28 @@ +expectException('InvalidArgumentException'); + } else { + // legacy PHPUnit < 5.2 + $this->setExpectedException('InvalidArgumentException'); + } + new AuthenticateCommand('Alice', 'secret', '', 'utf16'); + } +} diff --git a/tests/FactoryTest.php b/tests/FactoryTest.php index b7d3bf7..538062b 100644 --- a/tests/FactoryTest.php +++ b/tests/FactoryTest.php @@ -89,6 +89,19 @@ public function testConnectWithInvalidUriWillRejectWithoutConnecting() $promise->then(null, $this->expectCallableOnce()); } + public function testConnectWithInvalidCharsetWillRejectWithoutConnecting() + { + $loop = $this->getMockBuilder('React\EventLoop\LoopInterface')->getMock(); + $connector = $this->getMockBuilder('React\Socket\ConnectorInterface')->getMock(); + $connector->expects($this->never())->method('connect'); + + $factory = new Factory($loop, $connector); + $promise = $factory->createConnection('localhost?charset=unknown'); + + $this->assertInstanceof('React\Promise\PromiseInterface', $promise); + $promise->then(null, $this->expectCallableOnce()); + } + public function testConnectWithInvalidHostRejectsWithConnectionError() { $loop = \React\EventLoop\Factory::create(); diff --git a/tests/ResultQueryTest.php b/tests/ResultQueryTest.php index 80e82d5..7dbf993 100644 --- a/tests/ResultQueryTest.php +++ b/tests/ResultQueryTest.php @@ -340,6 +340,39 @@ public function testSelectStaticTextTwoColumnsWithSameNameOverwritesValue() $loop->run(); } + public function testSelectCharsetDefaultsToUtf8() + { + $loop = \React\EventLoop\Factory::create(); + $connection = $this->createConnection($loop); + + $connection->query('SELECT @@character_set_client')->then(function (QueryResult $command) { + $this->assertCount(1, $command->resultRows); + $this->assertCount(1, $command->resultRows[0]); + $this->assertSame('utf8', reset($command->resultRows[0])); + }); + + $connection->quit(); + $loop->run(); + } + + public function testSelectWithExplcitCharsetReturnsCharset() + { + $loop = \React\EventLoop\Factory::create(); + $factory = new Factory($loop); + + $uri = $this->getConnectionString() . '?charset=latin1'; + $connection = $factory->createLazyConnection($uri); + + $connection->query('SELECT @@character_set_client')->then(function (QueryResult $command) { + $this->assertCount(1, $command->resultRows); + $this->assertCount(1, $command->resultRows[0]); + $this->assertSame('latin1', reset($command->resultRows[0])); + }); + + $connection->quit(); + $loop->run(); + } + public function testSimpleSelect() { $loop = \React\EventLoop\Factory::create();