Hook up the new functions to parse assignments.

a[i]=x now works. - Update the spec test numbers. Down to 92 failures! - Add a hard test case with alias and LHS array assignment. bash and mksh both pass. Addresses issue #175. Also: - Notes about the parser architecture.
oils-for-unix · Sep 16, 2018 · 5f79c1d · 5f79c1d
1 parent ea80189
commit 5f79c1d
Show file tree

Hide file tree

Showing 7 changed files with 106 additions and 159 deletions.
diff --git a/core/word.py b/core/word.py
@@ -378,23 +378,14 @@ def IsVarLike(w):
   return _LiteralPartId(w.parts[0]) == Id.Lit_VarLike
 
 
-def DetectAssignment_OLD(w):
-  """Tests whether a word looks like FOO=bar or FOO[x]=bar.
+def DetectAssignment(w):
+  """Detects whether a word looks like FOO=bar or FOO[x]=bar.
 
   Returns:
-    (string, op, CompoundWord) if it looks like FOO=bar
-    False                      if it doesn't
-
-
-  (token left,   # Lit_VarLike, Lit_ArrayLhsOpen, or Undefined_Tok
-   token? right, # Lit_ArrayLhsClose if it was detected
-   part_offset)  # where to start the token, 0
-
-  TODO: could use assign_parse
-  Or (spid, k, (spid1, spid2), op, v)
-  spid1 and spid2 are [ and ]
-
-  Or do we reparse right here?  Create our own ArithParser.
+    left_token or None   # Lit_VarLike, Lit_ArrayLhsOpen, or None if it's not an
+                         # assignment
+    close_token,         # Lit_ArrayLhsClose if it was detected, or None
+    part_offset          # where to start the value word, 0 if not an assignment
 
   Cases:
 
@@ -410,44 +401,6 @@ def DetectAssignment_OLD(w):
             # can't be nested.
   """
   assert w.tag == word_e.CompoundWord
-  if len(w.parts) == 0:
-    return False
-
-  part0 = w.parts[0]
-  if _LiteralPartId(part0) != Id.Lit_VarLike:
-    return False
-
-  # TODO:
-  # if id0 == Id.Lit_ArrayLhsOpen:
-  # Look through for Id.Lit_ArrayLhsClose
-  # If you find it, then it is an array
-
-  s = part0.token.val
-  assert s.endswith('=')
-  if s[-2] == '+':
-    op = assign_op_e.PlusEqual
-    name = s[:-2]
-  else:
-    op = assign_op_e.Equal
-    name = s[:-1]
-
-  rhs = ast.CompoundWord()
-  if len(w.parts) == 1:
-    # This is necessary so that EmptyUnquoted elision isn't applied.  EMPTY= is
-    # like EMPTY=''.
-    # TODO: Change to EmptyWord
-    rhs.parts.append(ast.EmptyPart())
-  else:
-    for p in w.parts[1:]:
-      rhs.parts.append(p)
-
-  return name, op, rhs
-
-
-def DetectAssignment(w):
-  """
-  """
-  assert w.tag == word_e.CompoundWord
   n = len(w.parts)
   if n == 0:
     return None, None, 0

diff --git a/core/word_eval.py b/core/word_eval.py
@@ -16,14 +16,15 @@
 from core import util
 from osh.meta import ast
 
-part_value_e = runtime.part_value_e
-value_e = runtime.value_e
-effect_e = runtime.effect_e
-
+word_e = ast.word_e
 bracket_op_e = ast.bracket_op_e
 suffix_op_e = ast.suffix_op_e
 word_part_e = ast.word_part_e
 
+part_value_e = runtime.part_value_e
+value_e = runtime.value_e
+effect_e = runtime.effect_e
+
 log = util.log
 e_die = util.e_die
 
@@ -804,6 +805,9 @@ def EvalWordToString(self, word, do_fnmatch=False):
       "$pat") echo 'equal to glob string' ;;  # must be glob escaped
     esac
     """
+    if word.tag == word_e.EmptyWord:
+      return runtime.Str('')
+
     part_vals = []
     for p in word.parts:
       self._EvalWordPart(p, part_vals, quoted=False)
@@ -844,11 +848,14 @@ def EvalRhsWord(self, word):
     Used for RHS of assignment.  There is no splitting.
 
     Args:
-      word.CompoundWord
+      ast.word_t
 
     Returns:
-      value
+      runtime.value_t
     """
+    if word.tag == word_e.EmptyWord:
+      return runtime.Str('')
+
     # Special case for a=(1 2).  ArrayLiteralPart won't appear in words that
     # don't look like assignments.
     if (len(word.parts) == 1 and

diff --git a/doc/architecture-notes.md b/doc/architecture-notes.md
@@ -1,6 +1,8 @@
 Notes on OSH Architecture
 -------------------------
 
+# Parse Time
+
 ## Where we (unfortunately) must re-parse previously parsed text
 
 - alias expansion
@@ -10,10 +12,19 @@ Notes on OSH Architecture
 Each of these cases has implications for translation, because we break the
 "arena invariant".
 
+### Extra Passes Over the LST
+
+These are handled up front, but not in a single pass.
+
+- Assignment / Env detection: `FOO=bar declare a[x]=1`
+  - s=1 doesn't cause reparsing, but a[x+1]=y does.
+- Brace Detection in a few places: `echo {a,b}`
+- Tilde Detection: `echo ~bob`, `home=~bob`
+
 ## Parser Lookahead
 
-- `func() { echo hi; }` vs.
-- `func=()  # an array`
+- `func() { echo hi; }` vs.  `func=()  # an array`
+- precedence parsing?  I think this is also a single token.
 
 ## Where the arena invariant is broken
 
@@ -31,6 +42,8 @@ This isn't re-parsing, but it's re-reading.
 
 - See `osh/parse_lib.py` and its callers.
 
+# Runtime
+
 ## Where code strings are evaluated
 
 - source and eval
@@ -59,6 +72,7 @@ This isn't re-parsing, but it's re-reading.
 - expr and $(( )) (expr not in shell)
 - later: find and our own language
 
+# Build Time
 
 ## Dependencies