Skip to content

Commit

Permalink
[osh-language] Compatiblity for DQ inside backticks inside DQ
Browse files Browse the repository at this point in the history
The Oil parsing model is smart enough to parse double quotes inside
backticks inside double quotes:

    echo "x `echo "hi"`"

Shells aren't, and use this syntax instead:

    echo "x `echo \"hi\"`"

This is issue #1449.
  • Loading branch information
Andy C committed Jan 14, 2023
1 parent 6cb28a9 commit 805bffc
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 23 deletions.
2 changes: 1 addition & 1 deletion frontend/id_kind_def.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ def AddKinds(spec):

# For recognizing \` and \" and \\ within backticks. There's an extra layer
# of backslash quoting.
spec.AddKind('Backtick', ['Right', 'Quoted', 'Other'])
spec.AddKind('Backtick', ['Right', 'Quoted', 'DoubleQuote', 'Other'])

spec.AddKind('History', ['Op', 'Num', 'Search', 'Other'])

Expand Down
8 changes: 5 additions & 3 deletions frontend/lexer_def.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,9 +359,11 @@ def R(pat, tok_type):
# Preprocessing before ShCommand
LEXER_DEF[lex_mode_e.Backtick] = [
C(r'`', Id.Backtick_Right),
# A backslash, and then one of the SAME FOUR escaped chars in the DQ mode.
R(r'\\[$`"\\]', Id.Backtick_Quoted),
R(r'[^`\\\0]+', Id.Backtick_Other), # contiguous run of litera
# A backslash, and then $ or ` or \
R(r'\\[$`\\]', Id.Backtick_Quoted),
# \" treated specially, depending on whether bacticks are double-quoted!
R(r'\\"', Id.Backtick_DoubleQuote),
R(r'[^`\\\0]+', Id.Backtick_Other), # contiguous run of literals
R(r'[^\0]', Id.Backtick_Other), # anything else
]

Expand Down
38 changes: 28 additions & 10 deletions osh/word_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,7 @@ def _ParseVarExpr(self, arg_lex_mode, allow_query=False):
def ReadBracedVarSub(self, left_token):
# type: (Token) -> Tuple[braced_var_sub, Token]
""" For Oil expressions like var x = ${x:-"default"}. """
part = self._ReadBracedVarSub(left_token, False) # not quoted
part = self._ReadBracedVarSub(left_token, d_quoted=False)
last_token = self.cur_token
return part, last_token

Expand Down Expand Up @@ -680,10 +680,10 @@ def _ReadDoubleQuotedLeftParts(self):
# type: () -> word_part_t
"""Read substitution parts in a double quoted context."""
if self.token_type in (Id.Left_DollarParen, Id.Left_Backtick):
return self._ReadCommandSub(self.token_type)
return self._ReadCommandSub(self.token_type, d_quoted=True)

if self.token_type == Id.Left_DollarBrace:
return self._ReadBracedVarSub(self.cur_token, True) # DQ
return self._ReadBracedVarSub(self.cur_token, d_quoted=True)

if self.token_type == Id.Left_DollarDParen:
return self._ReadArithSub()
Expand Down Expand Up @@ -741,10 +741,10 @@ def _ReadUnquotedLeftParts(self, try_triple_quote, triple_out):
if self.token_type in (
Id.Left_DollarParen, Id.Left_Backtick, Id.Left_ProcSubIn,
Id.Left_ProcSubOut):
return self._ReadCommandSub(self.token_type)
return self._ReadCommandSub(self.token_type, d_quoted=False)

if self.token_type == Id.Left_DollarBrace:
return self._ReadBracedVarSub(self.cur_token, False) # not DQ
return self._ReadBracedVarSub(self.cur_token, d_quoted=False)

if self.token_type == Id.Left_DollarDParen:
return self._ReadArithSub()
Expand Down Expand Up @@ -920,8 +920,8 @@ def ReadDoubleQuoted(self, left_token, parts):
self._ReadLikeDQ(left_token, True, parts)
return self.cur_token

def _ReadCommandSub(self, left_id):
# type: (Id_t) -> command_sub
def _ReadCommandSub(self, left_id, d_quoted=False):
# type: (Id_t, bool) -> command_sub
"""
NOTE: This is not in the grammar, because word parts aren't in the grammar!
Expand Down Expand Up @@ -968,22 +968,40 @@ def _ReadCommandSub(self, left_id):

self._Next(lex_mode_e.Backtick) # advance past `

log("d_quoted %s", d_quoted)

parts = [] # type: List[str]
while True:
self._Peek()
#print(self.cur_token)
#log("TOK %s", self.cur_token)

if self.token_type == Id.Backtick_Quoted:
parts.append(self.cur_token.val[1:]) # remove leading \
parts.append(self.cur_token.val[1:]) # Remove leading \

elif self.token_type == Id.Backtick_DoubleQuote:
# Compatibility: If backticks are double quoted, then double quotes
# within them have to be \"
# Shells aren't smart enough to match nested " and ` quotes (but OSH
# is)
if d_quoted:
parts.append(self.cur_token.val[1:]) # Remove leading \
else:
parts.append(self.cur_token.val)

elif self.token_type == Id.Backtick_Other:
parts.append(self.cur_token.val)

elif self.token_type == Id.Backtick_Right:
break

elif self.token_type == Id.Eof_Real:
# Note: this parse error is in the ORIGINAL context. No code_str yet.
p_die('Unexpected EOF while looking for closing backtick',
token=left_token)

else:
raise AssertionError(self.cur_token)

self._Next(lex_mode_e.Backtick)

# Calculate right SPID on CommandSub BEFORE re-parsing.
Expand Down Expand Up @@ -1529,7 +1547,7 @@ def _ReadCompoundWord3(self, lex_mode, eof_type, empty_ok):
# Users can also use look at ,(*.py|*.sh)
if (self.parse_opts.parse_at() and self.token_type == Id.ExtGlob_At and
num_parts == 0):
cs_part = self._ReadCommandSub(Id.Left_AtParen)
cs_part = self._ReadCommandSub(Id.Left_AtParen, d_quoted=False)
# RARE mutation of tok.id!
cs_part.left_token.id = Id.Left_AtParen
part = cs_part # for type safety
Expand Down
18 changes: 10 additions & 8 deletions spec/command-sub.test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,16 @@ cat $file
"
## END
#### Quoting " within ``
echo 1 `echo \"`
#echo 2 `echo \\"`
#echo 3 `echo \\\"`
#echo 4 `echo \\\\"`
## STDOUT:
1 "
## END
#### Quoting $ within ``
echo 1 `echo $`
echo 2 `echo \$`
Expand Down Expand Up @@ -232,14 +242,6 @@ echo `echo \\"foo\\"`
"foo"
## END
# Documented in doc/known-differences.md (and Morbig paper brought up the same
# issue)
## OK osh STDOUT:
"foo"
foo
"foo"
## END
#### More levels of double quotes in backticks
# Shells don't agree here, some of them give you form feeds!
# There are two levels of processing I don't understand.
Expand Down
2 changes: 1 addition & 1 deletion test/spec.sh
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ osh-only() {

# Regress bugs
bugs() {
sh-spec spec/bugs.test.sh --osh-failures-allowed 3 \
sh-spec spec/bugs.test.sh --osh-failures-allowed 2 \
${REF_SHELLS[@]} $ZSH $BUSYBOX_ASH $OSH_LIST "$@"
}

Expand Down

0 comments on commit 805bffc

Please sign in to comment.