-
Notifications
You must be signed in to change notification settings - Fork 464
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This commit adds some helper infrastructure to parse a multi-line string into a hierarchy based on indentation.
- Loading branch information
1 parent
3831e2b
commit 85602f4
Showing
3 changed files
with
293 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
// Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use | ||
// of this source code is governed by a BSD-style license that can be found in | ||
// the LICENSE file. | ||
|
||
// Package indenttree implements a simple text processor which parses a | ||
// hierarchy defined using indentation; see Parse. | ||
package indenttree | ||
|
||
import ( | ||
"slices" | ||
"strings" | ||
|
||
"github.com/cockroachdb/errors" | ||
) | ||
|
||
// Parse a multi-line input string into trees of nodes. For example: | ||
// | ||
// a | ||
// a1 | ||
// a11 | ||
// a2 | ||
// b | ||
// b1 | ||
// | ||
// is parsed into two Nodes (a and b). Node a has two children (a1, a2), and a2 | ||
// has one child (a11); node b has one child (b1). | ||
// | ||
// The indentation level is arbitrary but it must be consistent. across nodes. For example, the following is not valid: | ||
// | ||
// a | ||
// a1 | ||
// b | ||
// b1 | ||
// | ||
// Tabs cannot be used for indentation (they can cause confusion if editor | ||
// settings vary). Nodes cannot be skipped, for example the following is not | ||
// valid: | ||
// | ||
// a | ||
// a1 | ||
// a11 | ||
// b | ||
// b12 | ||
func Parse(input string) ([]Node, error) { | ||
input = strings.TrimSuffix(input, "\n") | ||
if input == "" { | ||
return nil, errors.Errorf("empty input") | ||
} | ||
lines := strings.Split(input, "\n") | ||
indentLevel := make([]int, len(lines)) | ||
for i, line := range lines { | ||
level := 0 | ||
for strings.HasPrefix(line[level:], " ") { | ||
level++ | ||
} | ||
if len(line) == level { | ||
return nil, errors.Errorf("empty line in input:\n%s", input) | ||
} | ||
if line[level] == '\t' { | ||
return nil, errors.Errorf("tab indentation in input:\n%s", input) | ||
} | ||
indentLevel[i] = level | ||
} | ||
levels := slices.Clone(indentLevel) | ||
slices.Sort(levels) | ||
levels = slices.Compact(levels) | ||
|
||
var parse func(levelIdx, startLineIdx, endLineIdx int) ([]Node, error) | ||
parse = func(levelIdx, startLineIdx, endLineIdx int) ([]Node, error) { | ||
if startLineIdx > endLineIdx { | ||
return nil, nil | ||
} | ||
level := levels[levelIdx] | ||
if indentLevel[startLineIdx] != level { | ||
return nil, errors.Errorf("inconsistent indentation in input:\n%s", input) | ||
} | ||
nextNode := startLineIdx + 1 | ||
for ; nextNode <= endLineIdx; nextNode++ { | ||
if indentLevel[nextNode] <= level { | ||
break | ||
} | ||
} | ||
node := Node{value: lines[startLineIdx][level:]} | ||
var err error | ||
node.children, err = parse(levelIdx+1, startLineIdx+1, nextNode-1) | ||
if err != nil { | ||
return nil, err | ||
} | ||
otherNodes, err := parse(levelIdx, nextNode, endLineIdx) | ||
if err != nil { | ||
return nil, err | ||
} | ||
return append([]Node{node}, otherNodes...), nil | ||
} | ||
return parse(0, 0, len(lines)-1) | ||
} | ||
|
||
// Node in a hierarchy returned by Parse. | ||
type Node struct { | ||
value string | ||
children []Node | ||
} | ||
|
||
// Value returns the contents of the line for this node (without the | ||
// indentation). | ||
func (n *Node) Value() string { | ||
return n.value | ||
} | ||
|
||
// Children returns the child nodes, if any. | ||
func (n *Node) Children() []Node { | ||
return n.children | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
// Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use | ||
// of this source code is governed by a BSD-style license that can be found in | ||
// the LICENSE file. | ||
|
||
package indenttree | ||
|
||
import ( | ||
"fmt" | ||
"testing" | ||
|
||
"github.com/cockroachdb/datadriven" | ||
"github.com/cockroachdb/pebble/internal/treeprinter" | ||
) | ||
|
||
func TestIndentTree(t *testing.T) { | ||
datadriven.RunTest(t, "testdata", func(t *testing.T, d *datadriven.TestData) string { | ||
switch d.Cmd { | ||
case "parse": | ||
nodes, err := Parse(d.Input) | ||
if err != nil { | ||
return fmt.Sprintf("error: %s", err) | ||
} | ||
tp := treeprinter.New() | ||
root := tp.Child("<root>") | ||
var dfs func(n Node, tp treeprinter.Node) | ||
dfs = func(n Node, parent treeprinter.Node) { | ||
child := parent.Child(n.Value()) | ||
for _, c := range n.Children() { | ||
dfs(c, child) | ||
} | ||
} | ||
for _, c := range nodes { | ||
dfs(c, root) | ||
} | ||
return tp.String() | ||
|
||
default: | ||
t.Fatalf("unknown command: %s", d.Cmd) | ||
return "" | ||
} | ||
}) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
parse | ||
a | ||
---- | ||
<root> | ||
└── a | ||
|
||
parse | ||
a | ||
b | ||
---- | ||
<root> | ||
├── a | ||
└── b | ||
|
||
parse | ||
a | ||
a1 | ||
a2 | ||
b | ||
b1 | ||
---- | ||
<root> | ||
├── a | ||
│ ├── a1 | ||
│ └── a2 | ||
└── b | ||
└── b1 | ||
|
||
parse | ||
a | ||
a1 | ||
a2 | ||
a21 | ||
a22 | ||
b | ||
b1 | ||
b2 | ||
b3 | ||
b31 | ||
b311 | ||
---- | ||
<root> | ||
├── a | ||
│ ├── a1 | ||
│ └── a2 | ||
│ ├── a21 | ||
│ └── a22 | ||
└── b | ||
├── b1 | ||
├── b2 | ||
└── b3 | ||
└── b31 | ||
└── b311 | ||
|
||
parse | ||
a | ||
a1 | ||
a2 | ||
a21 | ||
a211 | ||
b | ||
b1 | ||
b2 | ||
b211 | ||
b3 | ||
---- | ||
<root> | ||
├── a | ||
│ ├── a1 | ||
│ ├── a2 | ||
│ └── a21 | ||
│ └── a211 | ||
└── b | ||
├── b1 | ||
├── b2 | ||
│ └── b211 | ||
└── b3 | ||
|
||
parse | ||
a | ||
a1 | ||
b | ||
b1 | ||
---- | ||
<root> | ||
├── a | ||
│ └── a1 | ||
└── b | ||
└── b1 | ||
|
||
|
||
# Error cases. | ||
|
||
parse | ||
---- | ||
error: empty input | ||
|
||
parse | ||
a | ||
a1 | ||
b | ||
b1 | ||
---- | ||
error: inconsistent indentation in input: | ||
a | ||
a1 | ||
b | ||
b1 | ||
|
||
parse | ||
a | ||
a1 | ||
a2 | ||
a21 | ||
b | ||
b11 | ||
---- | ||
error: inconsistent indentation in input: | ||
a | ||
a1 | ||
a2 | ||
a21 | ||
b | ||
b11 | ||
|
||
parse | ||
a | ||
a1 | ||
a11 | ||
b | ||
b12 | ||
---- | ||
error: inconsistent indentation in input: | ||
a | ||
a1 | ||
a11 | ||
b | ||
b12 |