Skip to content

Commit

Permalink
misc: replace regex-based type parser with character-based one
Browse files Browse the repository at this point in the history
This commit introduced a complete rewrite of the first layer of the type
parser. The previous one would use regex to split a raw type in tokens,
but that led to limitations — mostly concerning quoted strings — that
are now fixed.

Example of previous limitations, now solved:

```php
// Union of strings containing space chars
(new MapperBuilder())
    ->mapper()
    ->map(
        "'foo bar'|'baz fiz'",
        'baz fiz'
    );

// Shaped array with special chars in the key
(new MapperBuilder())
    ->mapper()
    ->map(
        "array{'some & key': string}",
        ['some & key' => 'value']
    );
```
  • Loading branch information
romm committed Aug 16, 2023
1 parent 1964d41 commit a5f6e96
Show file tree
Hide file tree
Showing 11 changed files with 664 additions and 492 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
use CuyZ\Valinor\Definition\PropertyDefinition;
use CuyZ\Valinor\Definition\Repository\AttributesRepository;
use CuyZ\Valinor\Definition\Repository\ClassDefinitionRepository;
use CuyZ\Valinor\Type\ClassType;
use CuyZ\Valinor\Type\GenericType;
use CuyZ\Valinor\Type\Parser\Exception\InvalidType;
use CuyZ\Valinor\Type\Parser\Factory\Specifications\AliasSpecification;
Expand All @@ -23,11 +24,11 @@
use CuyZ\Valinor\Type\Parser\Factory\TypeParserFactory;
use CuyZ\Valinor\Type\Parser\TypeParser;
use CuyZ\Valinor\Type\Type;
use CuyZ\Valinor\Type\ClassType;
use CuyZ\Valinor\Type\Types\UnresolvableType;
use CuyZ\Valinor\Utility\Reflection\Reflection;
use ReflectionMethod;
use ReflectionProperty;
use CuyZ\Valinor\Utility\Reflection\DocParser;

use function array_filter;
use function array_keys;
Expand Down Expand Up @@ -156,7 +157,7 @@ private function typeResolver(ClassType $type, string $targetClass): ReflectionT
private function localTypeAliases(ClassType $type): array
{
$reflection = Reflection::class($type->className());
$rawTypes = Reflection::localTypeAliases($reflection);
$rawTypes = DocParser::localTypeAliases($reflection);

$typeParser = $this->typeParser($type);

Expand All @@ -181,7 +182,7 @@ private function localTypeAliases(ClassType $type): array
private function importedTypeAliases(ClassType $type): array
{
$reflection = Reflection::class($type->className());
$importedTypesRaw = Reflection::importedTypeAliases($reflection);
$importedTypesRaw = DocParser::importedTypeAliases($reflection);

$typeParser = $this->typeParser($type);

Expand Down
28 changes: 21 additions & 7 deletions src/Definition/Repository/Reflection/ReflectionTypeResolver.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
use CuyZ\Valinor\Type\Type;
use CuyZ\Valinor\Type\Types\MixedType;
use CuyZ\Valinor\Type\Types\UnresolvableType;
use CuyZ\Valinor\Utility\Reflection\DocParser;
use CuyZ\Valinor\Utility\Reflection\Reflection;
use ReflectionFunctionAbstract;
use ReflectionParameter;
Expand All @@ -23,7 +24,7 @@ public function __construct(
private TypeParser $advancedParser
) {}

public function resolveType(\ReflectionProperty|\ReflectionParameter|\ReflectionFunctionAbstract $reflection): Type
public function resolveType(ReflectionProperty|ReflectionParameter|ReflectionFunctionAbstract $reflection): Type
{
$nativeType = $this->nativeType($reflection);
$typeFromDocBlock = $this->typeFromDocBlock($reflection);
Expand Down Expand Up @@ -51,11 +52,24 @@ public function resolveType(\ReflectionProperty|\ReflectionParameter|\Reflection
return $typeFromDocBlock;
}

private function typeFromDocBlock(\ReflectionProperty|\ReflectionParameter|\ReflectionFunctionAbstract $reflection): ?Type
private function typeFromDocBlock(ReflectionProperty|ReflectionParameter|ReflectionFunctionAbstract $reflection): ?Type
{
$type = $reflection instanceof ReflectionFunctionAbstract
? Reflection::docBlockReturnType($reflection)
: Reflection::docBlockType($reflection);
if ($reflection instanceof ReflectionFunctionAbstract) {
$type = DocParser::functionReturnType($reflection);
} elseif ($reflection instanceof ReflectionProperty) {
$type = DocParser::propertyType($reflection);
} else {
$type = null;

if ($reflection->isPromoted()) {
// @phpstan-ignore-next-line / parameter is promoted so class exists for sure
$type = DocParser::propertyType($reflection->getDeclaringClass()->getProperty($reflection->name));
}

if ($type === null) {
$type = DocParser::parameterType($reflection);
}
}

if ($type === null) {
return null;
Expand All @@ -64,7 +78,7 @@ private function typeFromDocBlock(\ReflectionProperty|\ReflectionParameter|\Refl
return $this->parseType($type, $reflection, $this->advancedParser);
}

private function nativeType(\ReflectionProperty|\ReflectionParameter|\ReflectionFunctionAbstract $reflection): ?Type
private function nativeType(ReflectionProperty|ReflectionParameter|ReflectionFunctionAbstract $reflection): ?Type
{
$reflectionType = $reflection instanceof ReflectionFunctionAbstract
? $reflection->getReturnType()
Expand All @@ -83,7 +97,7 @@ private function nativeType(\ReflectionProperty|\ReflectionParameter|\Reflection
return $this->parseType($type, $reflection, $this->nativeParser);
}

private function parseType(string $raw, \ReflectionProperty|\ReflectionParameter|\ReflectionFunctionAbstract $reflection, TypeParser $parser): Type
private function parseType(string $raw, ReflectionProperty|ReflectionParameter|ReflectionFunctionAbstract $reflection, TypeParser $parser): Type
{
try {
return $parser->parse($raw);
Expand Down
4 changes: 2 additions & 2 deletions src/Type/Parser/Factory/LexingTypeParserFactory.php
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ private function nativeParser(): TypeParser
{
$lexer = new NativeLexer();
$lexer = new AdvancedClassLexer($lexer, $this, $this->templateParser);
$lexer = new LexingParser($lexer);
$parser = new LexingParser($lexer);

return new CachedParser($lexer);
return new CachedParser($parser);
}
}
3 changes: 2 additions & 1 deletion src/Type/Parser/Lexer/Token/AdvancedClassNameToken.php
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
use CuyZ\Valinor\Type\Types\ArrayKeyType;
use CuyZ\Valinor\Type\ClassType;
use CuyZ\Valinor\Type\Types\NativeClassType;
use CuyZ\Valinor\Utility\Reflection\DocParser;
use CuyZ\Valinor\Utility\Reflection\Reflection;
use ReflectionClass;

Expand Down Expand Up @@ -182,7 +183,7 @@ private function assignGenerics(string $className, array $templates, array $gene
*/
private function parentType(ReflectionClass $reflection, ReflectionClass $parentReflection, TypeParser $typeParser): NativeClassType
{
$extendedClass = Reflection::extendedClassAnnotation($reflection);
$extendedClass = DocParser::classExtendsTypes($reflection);

if (count($extendedClass) > 1) {
throw new SeveralExtendTagsFound($reflection);
Expand Down
106 changes: 75 additions & 31 deletions src/Type/Parser/LexingParser.php
Original file line number Diff line number Diff line change
@@ -1,26 +1,82 @@
<?php

declare(strict_types=1);

namespace CuyZ\Valinor\Type\Parser;

use CuyZ\Valinor\Type\Parser\Lexer\TokenStream;
use CuyZ\Valinor\Type\Parser\Lexer\TypeLexer;
use CuyZ\Valinor\Type\Type;

use function array_filter;
use function array_map;
use function preg_split;
use function str_contains;
use function array_splice;
use function count;
use function in_array;
use function str_split;

/** @internal */
final class LexingParser implements TypeParser
class LexingParser implements TypeParser
{
public function __construct(private TypeLexer $lexer) {}

public function parse(string $raw): Type
{
$symbols = $this->splitTokens($raw);
$separators = [' ', '|', '&', '<', '>', '[', ']', '{', '}', ':', '?', ','];

$symbols = [];
$current = null;
$quote = null;

foreach (str_split($raw) as $char) {
if ($quote !== null) {
if ($char === $quote) {
if ($current !== null) {
$symbols[] = $current;
$current = null;
}

$symbols[] = $char;
$quote = null;
} else {
$current .= $char;
}

continue;
}

if ($char === '"' || $char === "'") {
if ($current !== null) {
$symbols[] = $current;
$current = null;
}

$quote = $char;
$symbols[] = $char;
continue;
}

if (in_array($char, $separators, true)) {
$count = count($symbols);

if ($char === ':' && $current === null && $count > 0 && $symbols[$count - 1] === ':') {
$symbols[$count - 1] = '::';
continue;
}

if ($current !== null) {
$symbols[] = $current;
}

$symbols[] = $char;
$current = null;
} else {
$current .= $char;
}
}

if ($current !== null) {
$symbols[] = $current;
}

$symbols = $this->detectAnonymousClass($symbols);

$symbols = array_map('trim', $symbols);
$symbols = array_filter($symbols, static fn ($value) => $value !== '');

Expand All @@ -33,33 +89,21 @@ public function parse(string $raw): Type
}

/**
* @return string[]
*/
private function splitTokens(string $raw): array
{
if (str_contains($raw, "@anonymous\0")) {
return $this->splitTokensContainingAnonymousClass($raw);
}

/** @phpstan-ignore-next-line */
return preg_split('/(::|[\s?|&<>,\[\]{}:\'"])/', $raw, -1, PREG_SPLIT_DELIM_CAPTURE);
}

/**
* @return string[]
* @param list<string> $symbols
* @return list<string>
*/
private function splitTokensContainingAnonymousClass(string $raw): array
private function detectAnonymousClass(array $symbols): array
{
/** @var string[] $splits */
$splits = preg_split('/([a-zA-Z_\x7f-\xff][\\\\\w\x7f-\xff]*+@anonymous\x00.*?\.php(?:0x?|:\d++\$)[\da-fA-F]++)/', $raw, -1, PREG_SPLIT_DELIM_CAPTURE);
$symbols = [];
foreach ($symbols as $key => $symbol) {
if (! str_contains($symbol, "@anonymous\0")) {
continue;
}

foreach ($splits as $symbol) {
if (str_contains($symbol, "@anonymous\0")) {
$symbols[] = $symbol;
} else {
$symbols = [...$symbols, ...$this->splitTokens($symbol)];
if (count($symbols) >= $key + 3) {
$symbols[$key] = $symbol . $symbols[$key + 1] . $symbols[$key + 2];
}

array_splice($symbols, $key + 1, 2);
}

return $symbols;
Expand Down
Loading

0 comments on commit a5f6e96

Please sign in to comment.