Skip to content

Commit

Permalink
Merge pull request #8 from clue-labs/escapes
Browse files Browse the repository at this point in the history
Support interpreting hex and octal escape sequences
  • Loading branch information
clue authored Dec 18, 2016
2 parents 6fcbecb + e7c8dcd commit 9c7c4e4
Show file tree
Hide file tree
Showing 3 changed files with 106 additions and 20 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,10 @@ way more sophisticated. It also supports parsing the following:
* Single quoted strings (`'hello world'`) which preserve any whitespace characters and
only accept escape sequences `\\` and `\'`, e.g. `'let\'s go'`.
* Double quoted strings (`"hello world"`) which preserve any whitespace characters and
support common escape sequences such as `\t\r\n` etc., e.g. `"hi there\nworld!"`.
support common escape sequences such as `\t\r\n` etc., hex escape sequences such as `\x20`
and octal escape sequences such as `\040`, e.g. `"hi there\nworld!"`.
* Unquoted strings are terminated at the next (unescaped) whitespace character and
support common escape sequences such as `\t\r\n` etc., e.g. `hi\ there\nworld!`.
support common escape sequences just like double quoted strings, e.g. `hi\ there\nworld!`.
* Ignores excessive whitespace around arguments, such as trailing whitespace or
multiple spaces between arguments.
* Makes no assumptions about your input encoding, so this works with binary data
Expand Down
39 changes: 21 additions & 18 deletions src/functions.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,6 @@
*/
function split($command)
{
// map of escaped characters and their replacement
static $escapes = array(
'n' => "\n",
'r' => "\r",
't' => "\t",
);

// whitespace characters count as argument separators
static $ws = array(
' ',
Expand All @@ -41,6 +34,7 @@ function split($command)

$inQuote = null;
$argument = '';
$part = '';

// read a single argument
for (; isset($command[$i]); ++$i) {
Expand All @@ -50,48 +44,57 @@ function split($command)
// we're within a 'single quoted' string
if ($c === '\\' && isset($command[$i + 1]) && ($command[$i + 1] === "'" || $command[$i + 1] === '\\')) {
// escaped single quote or backslash ends up as char in argument
$argument .= $command[++$i];
$part .= $command[++$i];
continue;
} elseif ($c === "'") {
// single quote ends
$inQuote = null;
$argument .= $part;
$part = '';
continue;
}
} else {
// we're not within any quotes or within a "double quoted" string
if ($c === '\\' && isset($command[$i + 1])) {
// any escaped character will be processed
$c = $command[++$i];
if (isset($escapes[$c])) {
// apply mapped character if applicable
$argument .= $escapes[$c];
} else {
// pass through original character otherwise
$argument .= $c;
}
// escaped characters will be interpreted when part is complete
$part .= $command[$i] . $command[$i + 1];
++$i;
continue;
} elseif ($inQuote === '"' && $c === '"') {
// double quote ends
$inQuote = null;

// previous double quoted part should be interpreted
$argument .= stripcslashes($part);
$part = '';
continue;
} elseif ($inQuote === null && ($c === '"' || $c === "'")) {
// start of quotes found
$inQuote = $c;

// previous unquoted part should be interpreted
$argument .= stripcslashes($part);
$part = '';
continue;
} elseif ($inQuote === null && in_array($c, $ws)) {
// whitespace character terminates unquoted argument
break;
}
}

$argument .= $c;
$part .= $c;
}

// end of argument reached. Still in quotes is a parse error.
if ($inQuote !== null) {
throw new \RuntimeException('Still in quotes (' . $inQuote . ')');
}

// add remaining part to current argument
if ($part !== '') {
$argument .= stripcslashes($part);
}

$args []= $argument;
}

Expand Down
82 changes: 82 additions & 0 deletions tests/SplitTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -181,4 +181,86 @@ public function testSingleStringWithSingleQuotesAndInterpretedEscapes()

$this->assertEquals(array("echo", "let's go"), $args);
}

public function testSingleStringWithInterpretedEscapes()
{
$args = Arguments\split('hello\\\\');

$this->assertEquals(array("hello\\"), $args);
}

public function testSingleStringWithInterpretedIncompleteEscapes()
{
$args = Arguments\split('hello\\');

$this->assertEquals(array("hello\\"), $args);
}

public function testSingleStringWithInterpretedHexEscapes()
{
$args = Arguments\split('hello\x20world');

$this->assertEquals(array("hello world"), $args);
}

public function testSingleStringWithInterpretedIncompleteHexEscapesEnd()
{
$args = Arguments\split('hello\x9');

$this->assertEquals(array("hello\t"), $args);
}

public function testSingleStringWithInterpretedIncompleteHexEscapesMiddle()
{
$args = Arguments\split('hello\x9world');

$this->assertEquals(array("hello\tworld"), $args);
}

public function testSingleStringWithInterpretedOctalEscapes()
{
$args = Arguments\split('hello\040world');

$this->assertEquals(array("hello world"), $args);
}

public function testSingleStringWithInterpretedShortOctalEscapes()
{
$args = Arguments\split('hello\40world');

$this->assertEquals(array("hello world"), $args);
}

public function testSingleStringWithUninterpretedNumberIsNotAnOctalEscape()
{
$args = Arguments\split('hello\\999world');

$this->assertEquals(array("hello999world"), $args);
}

// "\n"\n"\n"
public function testSingleStringWithCombinedDoubleQuotedPartsWithInterpretedEscapes()
{
$args = Arguments\split('"\n"\n"\n"');

$this->assertEquals(array("\n\n\n"), $args);
}

// '\n'\n'\n'
public function testSingleStringWithCombinedSingleQuotedPartsWithInterpretedEscapesOnlyInInnerUnquotedPart()
{
$s = "'";
$args = Arguments\split($s . '\n' . $s . '\n' . $s . '\n' . $s);

$this->assertEquals(array("\\n\n\\n"), $args);
}

// \n'\n'\n
public function testSingleStringWithCombinedSingleQuotedPartsWithInterpretedEscapesOnlyInOuterUnquotedParts()
{
$s = "'";
$args = Arguments\split('\n' . $s . '\n' . $s . '\n');

$this->assertEquals(array("\n\\n\n"), $args);
}
}

0 comments on commit 9c7c4e4

Please sign in to comment.