Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support optional charset parameter for full UTF-8 support (utf8mb4) #135

Merged
merged 1 commit into from
Jul 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,16 @@ authentication. You can explicitly pass a custom timeout value in seconds
$factory->createConnection('localhost?timeout=0.5');
```

By default, the connection uses the `utf8` charset encoding. Note that
MySQL's `utf8` encoding (also known as `utf8mb3`) predates what is now
known as UTF-8 and for historical reasons doesn't support emojis and
other characters. If you want full UTF-8 support, you can pass the
charset encoding like this:

```php
$factory->createConnection('localhost?charset=utf8mb4');
```

#### createLazyConnection()

Creates a new connection.
Expand Down Expand Up @@ -274,6 +284,16 @@ timeout) like this:
$factory->createLazyConnection('localhost?idle=0.1');
```

By default, the connection uses the `utf8` charset encoding. Note that
MySQL's `utf8` encoding (also known as `utf8mb3`) predates what is now
known as UTF-8 and for historical reasons doesn't support emojis and
other characters. If you want full UTF-8 support, you can pass the
charset encoding like this:

```php
$factory->createLazyConnection('localhost?charset=utf8mb4');
```

### ConnectionInterface

The `ConnectionInterface` represents a connection that is responsible for
Expand Down
43 changes: 41 additions & 2 deletions src/Commands/AuthenticateCommand.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

/**
* @internal
* @link https://dev.mysql.com/doc/internals/en/connection-phase-packets.html#packet-Protocol::HandshakeResponse
*/
class AuthenticateCommand extends AbstractCommand
{
Expand All @@ -15,13 +16,51 @@ class AuthenticateCommand extends AbstractCommand
private $dbname;

private $maxPacketSize = 0x1000000;
private $charsetNumber = 0x21;

public function __construct($user, $passwd, $dbname)
/**
* @var int
* @link https://dev.mysql.com/doc/internals/en/character-set.html#packet-Protocol::CharacterSet
*/
private $charsetNumber;

/**
* Mapping from charset name to internal charset ID
*
* Note that this map currently only contains ASCII-compatible charset encodings
* because of quoting rules as defined in the `Query` class.
*
* @var array<string,int>
* @see self::$charsetNumber
* @see \React\MySQL\Io\Query::$escapeChars
*/
private static $charsetMap = array(
'latin1' => 8,
'latin2' => 9,
'ascii' => 11,
'latin5' => 30,
'utf8' => 33,
'latin7' => 41,
'utf8mb4' => 45,
'binary' => 63
);

/**
* @param string $user
* @param string $passwd
* @param string $dbname
* @param string $charset
* @throws \InvalidArgumentException for invalid/unknown charset name
*/
public function __construct($user, $passwd, $dbname, $charset)
{
if (!isset(self::$charsetMap[$charset])) {
throw new \InvalidArgumentException('Unsupported charset selected');
}

$this->user = $user;
$this->passwd = $passwd;
$this->dbname = $dbname;
$this->charsetNumber = self::$charsetMap[$charset];
}

public function getId()
Expand Down
49 changes: 38 additions & 11 deletions src/Factory.php
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,16 @@ public function __construct(LoopInterface $loop = null, ConnectorInterface $conn
* $factory->createConnection('localhost?timeout=0.5');
* ```
*
* By default, the connection uses the `utf8` charset encoding. Note that
* MySQL's `utf8` encoding (also known as `utf8mb3`) predates what is now
* known as UTF-8 and for historical reasons doesn't support emojis and
* other characters. If you want full UTF-8 support, you can pass the
* charset encoding like this:
*
* ```php
* $factory->createConnection('localhost?charset=utf8mb4');
* ```
*
* @param string $uri
* @return PromiseInterface Promise<ConnectionInterface, Exception>
*/
Expand All @@ -153,6 +163,22 @@ public function createConnection($uri)
return \React\Promise\reject(new \InvalidArgumentException('Invalid connect uri given'));
}

$args = [];
if (isset($parts['query'])) {
parse_str($parts['query'], $args);
}

try {
$authCommand = new AuthenticateCommand(
isset($parts['user']) ? rawurldecode($parts['user']) : 'root',
isset($parts['pass']) ? rawurldecode($parts['pass']) : '',
isset($parts['path']) ? rawurldecode(ltrim($parts['path'], '/')) : '',
isset($args['charset']) ? $args['charset'] : 'utf8'
);
} catch (\InvalidArgumentException $e) {
return \React\Promise\reject($e);
}

$connecting = $this->connector->connect(
$parts['host'] . ':' . (isset($parts['port']) ? $parts['port'] : 3306)
);
Expand All @@ -168,16 +194,12 @@ public function createConnection($uri)
$connecting->cancel();
});

$connecting->then(function (SocketConnectionInterface $stream) use ($parts, $deferred) {
$connecting->then(function (SocketConnectionInterface $stream) use ($authCommand, $deferred) {
$executor = new Executor();
$parser = new Parser($stream, $executor);

$connection = new Connection($stream, $executor);
$command = $executor->enqueue(new AuthenticateCommand(
isset($parts['user']) ? rawurldecode($parts['user']) : 'root',
isset($parts['pass']) ? rawurldecode($parts['pass']) : '',
isset($parts['path']) ? rawurldecode(ltrim($parts['path'], '/')) : ''
));
$command = $executor->enqueue($authCommand);
$parser->start();

$command->on('success', function () use ($deferred, $connection) {
Expand All @@ -191,11 +213,6 @@ public function createConnection($uri)
$deferred->reject(new \RuntimeException('Unable to connect to database server', 0, $error));
});

$args = [];
if (isset($parts['query'])) {
parse_str($parts['query'], $args);
}

// use timeout from explicit ?timeout=x parameter or default to PHP's default_socket_timeout (60)
$timeout = (float) isset($args['timeout']) ? $args['timeout'] : ini_get("default_socket_timeout");
if ($timeout < 0) {
Expand Down Expand Up @@ -317,6 +334,16 @@ public function createConnection($uri)
* $factory->createLazyConnection('localhost?idle=0.1');
* ```
*
* By default, the connection uses the `utf8` charset encoding. Note that
* MySQL's `utf8` encoding (also known as `utf8mb3`) predates what is now
* known as UTF-8 and for historical reasons doesn't support emojis and
* other characters. If you want full UTF-8 support, you can pass the
* charset encoding like this:
*
* ```php
* $factory->createLazyConnection('localhost?charset=utf8mb4');
* ```
*
* @param string $uri
* @return ConnectionInterface
*/
Expand Down
9 changes: 9 additions & 0 deletions src/Io/Query.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,15 @@ class Query

private $params = [];

/**
* Mapping from byte/character to escaped character string
*
* Note that this mapping assumes an ASCII-compatible charset encoding such
* as UTF-8, ISO 8859 and others.
*
* @var array<string,string>
* @see \React\MySQL\Commands\AuthenticateCommand::$charsetMap
*/
private $escapeChars = array(
"\x00" => "\\0",
"\r" => "\\r",
Expand Down
28 changes: 28 additions & 0 deletions tests/Commands/AuthenticateCommandTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
<?php

namespace React\Tests\MySQL\Commands;

use PHPUnit\Framework\TestCase;
use React\MySQL\Commands\AuthenticateCommand;

class AuthenticateCommandTest extends TestCase
{
/**
* @doesNotPerformAssertions
*/
public function testCtorWithKnownCharset()
{
new AuthenticateCommand('Alice', 'secret', '', 'utf8');
}

public function testCtorWithUnknownCharsetThrows()
{
if (method_exists($this, 'expectException')) {
$this->expectException('InvalidArgumentException');
} else {
// legacy PHPUnit < 5.2
$this->setExpectedException('InvalidArgumentException');
}
new AuthenticateCommand('Alice', 'secret', '', 'utf16');
}
}
13 changes: 13 additions & 0 deletions tests/FactoryTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,19 @@ public function testConnectWithInvalidUriWillRejectWithoutConnecting()
$promise->then(null, $this->expectCallableOnce());
}

public function testConnectWithInvalidCharsetWillRejectWithoutConnecting()
{
$loop = $this->getMockBuilder('React\EventLoop\LoopInterface')->getMock();
$connector = $this->getMockBuilder('React\Socket\ConnectorInterface')->getMock();
$connector->expects($this->never())->method('connect');

$factory = new Factory($loop, $connector);
$promise = $factory->createConnection('localhost?charset=unknown');

$this->assertInstanceof('React\Promise\PromiseInterface', $promise);
$promise->then(null, $this->expectCallableOnce());
}

public function testConnectWithInvalidHostRejectsWithConnectionError()
{
$loop = \React\EventLoop\Factory::create();
Expand Down
33 changes: 33 additions & 0 deletions tests/ResultQueryTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,39 @@ public function testSelectStaticTextTwoColumnsWithSameNameOverwritesValue()
$loop->run();
}

public function testSelectCharsetDefaultsToUtf8()
{
$loop = \React\EventLoop\Factory::create();
$connection = $this->createConnection($loop);

$connection->query('SELECT @@character_set_client')->then(function (QueryResult $command) {
$this->assertCount(1, $command->resultRows);
$this->assertCount(1, $command->resultRows[0]);
$this->assertSame('utf8', reset($command->resultRows[0]));
});

$connection->quit();
$loop->run();
}

public function testSelectWithExplcitCharsetReturnsCharset()
{
$loop = \React\EventLoop\Factory::create();
$factory = new Factory($loop);

$uri = $this->getConnectionString() . '?charset=latin1';
$connection = $factory->createLazyConnection($uri);

$connection->query('SELECT @@character_set_client')->then(function (QueryResult $command) {
$this->assertCount(1, $command->resultRows);
$this->assertCount(1, $command->resultRows[0]);
$this->assertSame('latin1', reset($command->resultRows[0]));
});

$connection->quit();
$loop->run();
}

public function testSimpleSelect()
{
$loop = \React\EventLoop\Factory::create();
Expand Down