diff --git a/README.md b/README.md index 5074a36..3e0a68a 100644 --- a/README.md +++ b/README.md @@ -98,9 +98,10 @@ way more sophisticated. It also supports parsing the following: * Single quoted strings (`'hello world'`) which preserve any whitespace characters and only accept escape sequences `\\` and `\'`, e.g. `'let\'s go'`. * Double quoted strings (`"hello world"`) which preserve any whitespace characters and - support common escape sequences such as `\t\r\n` etc., e.g. `"hi there\nworld!"`. + support common escape sequences such as `\t\r\n` etc., hex escape sequences such as `\x20` + and octal escape sequences such as `\040`, e.g. `"hi there\nworld!"`. * Unquoted strings are terminated at the next (unescaped) whitespace character and - support common escape sequences such as `\t\r\n` etc., e.g. `hi\ there\nworld!`. + support common escape sequences just like double quoted strings, e.g. `hi\ there\nworld!`. * Ignores excessive whitespace around arguments, such as trailing whitespace or multiple spaces between arguments. * Makes no assumptions about your input encoding, so this works with binary data diff --git a/src/functions.php b/src/functions.php index 3759044..56f82a2 100644 --- a/src/functions.php +++ b/src/functions.php @@ -11,13 +11,6 @@ */ function split($command) { - // map of escaped characters and their replacement - static $escapes = array( - 'n' => "\n", - 'r' => "\r", - 't' => "\t", - ); - // whitespace characters count as argument separators static $ws = array( ' ', @@ -41,6 +34,7 @@ function split($command) $inQuote = null; $argument = ''; + $part = ''; // read a single argument for (; isset($command[$i]); ++$i) { @@ -50,33 +44,37 @@ function split($command) // we're within a 'single quoted' string if ($c === '\\' && isset($command[$i + 1]) && ($command[$i + 1] === "'" || $command[$i + 1] === '\\')) { // escaped single quote or backslash ends up as char in argument - $argument .= $command[++$i]; + $part .= $command[++$i]; continue; } elseif ($c === "'") { // single quote ends $inQuote = null; + $argument .= $part; + $part = ''; continue; } } else { // we're not within any quotes or within a "double quoted" string if ($c === '\\' && isset($command[$i + 1])) { - // any escaped character will be processed - $c = $command[++$i]; - if (isset($escapes[$c])) { - // apply mapped character if applicable - $argument .= $escapes[$c]; - } else { - // pass through original character otherwise - $argument .= $c; - } + // escaped characters will be interpreted when part is complete + $part .= $command[$i] . $command[$i + 1]; + ++$i; continue; } elseif ($inQuote === '"' && $c === '"') { // double quote ends $inQuote = null; + + // previous double quoted part should be interpreted + $argument .= stripcslashes($part); + $part = ''; continue; } elseif ($inQuote === null && ($c === '"' || $c === "'")) { // start of quotes found $inQuote = $c; + + // previous unquoted part should be interpreted + $argument .= stripcslashes($part); + $part = ''; continue; } elseif ($inQuote === null && in_array($c, $ws)) { // whitespace character terminates unquoted argument @@ -84,7 +82,7 @@ function split($command) } } - $argument .= $c; + $part .= $c; } // end of argument reached. Still in quotes is a parse error. @@ -92,6 +90,11 @@ function split($command) throw new \RuntimeException('Still in quotes (' . $inQuote . ')'); } + // add remaining part to current argument + if ($part !== '') { + $argument .= stripcslashes($part); + } + $args []= $argument; } diff --git a/tests/SplitTest.php b/tests/SplitTest.php index f304cb0..89cfdc3 100644 --- a/tests/SplitTest.php +++ b/tests/SplitTest.php @@ -181,4 +181,86 @@ public function testSingleStringWithSingleQuotesAndInterpretedEscapes() $this->assertEquals(array("echo", "let's go"), $args); } + + public function testSingleStringWithInterpretedEscapes() + { + $args = Arguments\split('hello\\\\'); + + $this->assertEquals(array("hello\\"), $args); + } + + public function testSingleStringWithInterpretedIncompleteEscapes() + { + $args = Arguments\split('hello\\'); + + $this->assertEquals(array("hello\\"), $args); + } + + public function testSingleStringWithInterpretedHexEscapes() + { + $args = Arguments\split('hello\x20world'); + + $this->assertEquals(array("hello world"), $args); + } + + public function testSingleStringWithInterpretedIncompleteHexEscapesEnd() + { + $args = Arguments\split('hello\x9'); + + $this->assertEquals(array("hello\t"), $args); + } + + public function testSingleStringWithInterpretedIncompleteHexEscapesMiddle() + { + $args = Arguments\split('hello\x9world'); + + $this->assertEquals(array("hello\tworld"), $args); + } + + public function testSingleStringWithInterpretedOctalEscapes() + { + $args = Arguments\split('hello\040world'); + + $this->assertEquals(array("hello world"), $args); + } + + public function testSingleStringWithInterpretedShortOctalEscapes() + { + $args = Arguments\split('hello\40world'); + + $this->assertEquals(array("hello world"), $args); + } + + public function testSingleStringWithUninterpretedNumberIsNotAnOctalEscape() + { + $args = Arguments\split('hello\\999world'); + + $this->assertEquals(array("hello999world"), $args); + } + + // "\n"\n"\n" + public function testSingleStringWithCombinedDoubleQuotedPartsWithInterpretedEscapes() + { + $args = Arguments\split('"\n"\n"\n"'); + + $this->assertEquals(array("\n\n\n"), $args); + } + + // '\n'\n'\n' + public function testSingleStringWithCombinedSingleQuotedPartsWithInterpretedEscapesOnlyInInnerUnquotedPart() + { + $s = "'"; + $args = Arguments\split($s . '\n' . $s . '\n' . $s . '\n' . $s); + + $this->assertEquals(array("\\n\n\\n"), $args); + } + + // \n'\n'\n + public function testSingleStringWithCombinedSingleQuotedPartsWithInterpretedEscapesOnlyInOuterUnquotedParts() + { + $s = "'"; + $args = Arguments\split('\n' . $s . '\n' . $s . '\n'); + + $this->assertEquals(array("\n\\n\n"), $args); + } }