Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added token index #4

Merged
merged 1 commit into from
Aug 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 24 additions & 9 deletions shlex.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ type lexerState int
type Token struct {
tokenType TokenType
value string
index int
}

// Equal reports whether tokens a, and b, are equal.
Expand All @@ -70,7 +71,7 @@ func (a *Token) Equal(b *Token) bool {
if a.tokenType != b.tokenType {
return false
}
return a.value == b.value
return a.value == b.value && a.index == b.index
}

// Named classes of UTF-8 runes
Expand Down Expand Up @@ -169,6 +170,7 @@ func (l *Lexer) Next() (string, error) {
type Tokenizer struct {
input bufio.Reader
classifier tokenClassifier
index int
}

// NewTokenizer creates a new tokenizer from an input stream.
Expand All @@ -184,6 +186,7 @@ func NewTokenizer(r io.Reader) *Tokenizer {
// It will panic if it encounters a rune which it does not know how to handle.
func (t *Tokenizer) scanStream() (*Token, error) {
state := startState
tokenIndex := 0
var tokenType TokenType
var value []rune
var nextRune rune
Expand All @@ -192,6 +195,7 @@ func (t *Tokenizer) scanStream() (*Token, error) {

for {
nextRune, _, err = t.input.ReadRune()
t.index += 1
nextRuneType = t.classifier.ClassifyRune(nextRune)

if err == io.EOF {
Expand All @@ -204,6 +208,9 @@ func (t *Tokenizer) scanStream() (*Token, error) {
switch state {
case startState: // no runes read yet
{
if nextRuneType != spaceRuneClass {
tokenIndex = t.index - 1
}
switch nextRuneType {
case eofRuneClass:
{
Expand Down Expand Up @@ -247,14 +254,16 @@ func (t *Tokenizer) scanStream() (*Token, error) {
{
token := &Token{
tokenType: tokenType,
value: string(value)}
value: string(value),
index: tokenIndex}
return token, err
}
case spaceRuneClass:
{
token := &Token{
tokenType: tokenType,
value: string(value)}
value: string(value),
index: tokenIndex}
return token, err
}
case escapingQuoteRuneClass:
Expand Down Expand Up @@ -283,7 +292,8 @@ func (t *Tokenizer) scanStream() (*Token, error) {
err = fmt.Errorf("EOF found after escape character")
token := &Token{
tokenType: tokenType,
value: string(value)}
value: string(value),
index: tokenIndex}
return token, err
}
default:
Expand All @@ -301,7 +311,8 @@ func (t *Tokenizer) scanStream() (*Token, error) {
err = fmt.Errorf("EOF found after escape character")
token := &Token{
tokenType: tokenType,
value: string(value)}
value: string(value),
index: tokenIndex}
return token, err
}
default:
Expand All @@ -319,7 +330,8 @@ func (t *Tokenizer) scanStream() (*Token, error) {
err = fmt.Errorf("EOF found when expecting closing quote")
token := &Token{
tokenType: tokenType,
value: string(value)}
value: string(value),
index: tokenIndex}
return token, err
}
case escapingQuoteRuneClass:
Expand All @@ -344,7 +356,8 @@ func (t *Tokenizer) scanStream() (*Token, error) {
err = fmt.Errorf("EOF found when expecting closing quote")
token := &Token{
tokenType: tokenType,
value: string(value)}
value: string(value),
index: tokenIndex}
return token, err
}
case nonEscapingQuoteRuneClass:
Expand All @@ -364,7 +377,8 @@ func (t *Tokenizer) scanStream() (*Token, error) {
{
token := &Token{
tokenType: tokenType,
value: string(value)}
value: string(value),
index: tokenIndex}
return token, err
}
case spaceRuneClass:
Expand All @@ -373,7 +387,8 @@ func (t *Tokenizer) scanStream() (*Token, error) {
state = startState
token := &Token{
tokenType: tokenType,
value: string(value)}
value: string(value),
index: tokenIndex}
return token, err
} else {
value = append(value, nextRune)
Expand Down
20 changes: 10 additions & 10 deletions shlex_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,16 +45,16 @@ func TestClassifier(t *testing.T) {
func TestTokenizer(t *testing.T) {
testInput := strings.NewReader(testString)
expectedTokens := []*Token{
{WordToken, "one"},
{WordToken, "two"},
{WordToken, "three four"},
{WordToken, "five \"six\""},
{WordToken, "seven#eight"},
{CommentToken, " nine # ten"},
{WordToken, "eleven"},
{WordToken, "twelve\\"},
{WordToken, "thirteen=13"},
{WordToken, "fourteen/14"}}
{WordToken, "one", 0},
{WordToken, "two", 4},
{WordToken, "three four", 8},
{WordToken, "five \"six\"", 21},
{WordToken, "seven#eight", 36},
{CommentToken, " nine # ten", 48},
{WordToken, "eleven", 62},
{WordToken, "twelve\\", 69},
{WordToken, "thirteen=13", 79},
{WordToken, "fourteen/14", 91}}

tokenizer := NewTokenizer(testInput)
for i, want := range expectedTokens {
Expand Down