Skip to content

Commit

Permalink
feat: implement CharRefTokenizer
Browse files Browse the repository at this point in the history
  • Loading branch information
kkebo committed Nov 2, 2023
1 parent d7548c9 commit 022d9e2
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 2 deletions.
45 changes: 44 additions & 1 deletion Sources/Tokenizer/CharRefTokenizer.swift
Original file line number Diff line number Diff line change
@@ -1,3 +1,46 @@
private enum CharRefState {
case begin
case octothorpe
case numeric(base: UInt32)
case numericSemicolon
case named
case bogusName
}

struct CharRef {
var chars: (Character, Character)
var num: UInt8
}

enum CharRefStatus: ~Copyable {
case stuck
case progress
case done(CharRef)
}

struct CharRefTokenizer {
mutating func tokenize(_ input: inout String.Iterator) {}
private var state: CharRefState
private var result: Optional<CharRef>

init() {
self.state = .begin
self.result = nil
}

mutating func step(tokenizer: inout Tokenizer<some TokenSink>, input: inout String.Iterator) -> CharRefStatus {
if let result = self.result { return .done(result) }

return switch self.state {
case .begin: self.doBegin(tokenizer: &tokenizer, input: &input)
case .octothorpe: fatalError("not implemented")
case .numeric(_): fatalError("not implemented")
case .numericSemicolon: fatalError("not implemented")
case .named: fatalError("not implemented")
case .bogusName: fatalError("not implemented")
}
}

mutating func doBegin(tokenizer: inout Tokenizer<some TokenSink>, input: inout String.Iterator) -> CharRefStatus {
fatalError("not implemented")
}
}
26 changes: 25 additions & 1 deletion Sources/Tokenizer/Tokenizer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,9 @@ public struct Tokenizer<Sink: TokenSink>: ~Copyable {

// swift-format-ignore
private mutating func step(_ input: inout String.Iterator) -> ProcessResult {
self.charRefTokenizer?.tokenize(&input)
if case .some = self.charRefTokenizer {
return self.tokenizeCharRef(&input)
}

switch self.state {
case .data: while true {
Expand Down Expand Up @@ -844,6 +846,28 @@ public struct Tokenizer<Sink: TokenSink>: ~Copyable {
}
}

@inline(__always)
private mutating func tokenizeCharRef(_ input: inout String.Iterator) -> ProcessResult {
guard var tokenizer = self.charRefTokenizer else { preconditionFailure("unreachable") }
while true {
switch tokenizer.step(tokenizer: &self, input: &input) {
case .done(let charRef):
self.processCharRef(charRef)
self.charRefTokenizer = nil
return .continue
case .stuck:
self.charRefTokenizer = tokenizer
return .suspend
case .progress: break
}
}
}

@inline(__always)
private mutating func processCharRef(_ charRef: CharRef) {
fatalError("not implemented")
}

@inline(__always)
private mutating func getChar(from input: inout String.Iterator) -> Character? {
guard let reconsumeChar else {
Expand Down

0 comments on commit 022d9e2

Please sign in to comment.