diff --git a/private/model/api/docstring.go b/private/model/api/docstring.go index f5889978fa8..0c87438447e 100644 --- a/private/model/api/docstring.go +++ b/private/model/api/docstring.go @@ -3,15 +3,17 @@ package api import ( - "bytes" + "bufio" "encoding/json" "fmt" "html" + "io" "os" "regexp" "strings" xhtml "golang.org/x/net/html" + "golang.org/x/net/html/atom" ) type apiDocumentation struct { @@ -113,7 +115,6 @@ func docstring(doc string) string { doc = html.UnescapeString(doc) // Replace doc with full name if doc is empty. - doc = strings.TrimSpace(doc) if len(doc) == 0 { doc = fullname } @@ -125,17 +126,6 @@ const ( indent = " " ) -// style is what we want to prefix a string with. -// For instance,
  • Foo
  • Bar
  • , will generate -// * Foo -// * Bar -var style = map[string]string{ - "ul": indent + "* ", - "li": indent + "* ", - "code": indent, - "pre": indent, -} - // commentify converts a string to a Go comment func commentify(doc string) string { if len(doc) == 0 { @@ -160,257 +150,379 @@ func commentify(doc string) string { return "" } -// wrap returns a rewritten version of text to have line breaks -// at approximately length characters. Line breaks will only be -// inserted into whitespace. -func wrap(text string, length int, isIndented bool) string { - var buf bytes.Buffer - var last rune - var lastNL bool - var col int - - for _, c := range text { - switch c { - case '\r': // ignore this - continue // and also don't track `last` - case '\n': // ignore this too, but reset col - if col >= length || last == '\n' { - buf.WriteString("\n") - } - buf.WriteString("\n") - col = 0 - case ' ', '\t': // opportunity to split - if col >= length { - buf.WriteByte('\n') - col = 0 - if isIndented { - buf.WriteString(indent) - col += 3 - } - } else { - // We only want to write a leading space if the col is greater than zero. - // This will provide the proper spacing for documentation. - buf.WriteRune(c) - col++ // count column - } - default: - buf.WriteRune(c) - col++ +func wrap(text string, length int) string { + var b strings.Builder + + s := bufio.NewScanner(strings.NewReader(text)) + for s.Scan() { + splitLine(&b, strings.TrimRight(s.Text(), " \t"), length) + } + + return strings.TrimRight(b.String(), "\n") +} + +func splitLine(w stringWriter, line string, length int) { + leading := getLeadingWhitespace(line) + + line = line[len(leading):] + length -= len(leading) + + for len(line) > length { + // Find the next whitespace after the length + idx := strings.IndexAny(line[length:], " \t") + if idx == -1 { + break } - lastNL = c == '\n' - _ = lastNL - last = c + offset := length + idx + + w.WriteString(leading) + w.WriteString(line[:offset]) + w.WriteByte('\n') + line = line[offset+1:] + } + + if len(line) > 0 { + w.WriteString(leading) + w.WriteString(line) } - return buf.String() + // Add the newline back in that was stripped out by scanner. + w.WriteByte('\n') } -type tagInfo struct { - tag string - key string - val string - txt string - raw string - closingTag bool +func getLeadingWhitespace(v string) string { + var o strings.Builder + for _, c := range v { + if c == ' ' || c == '\t' { + o.WriteRune(c) + } else { + break + } + } + + return o.String() } // generateDoc will generate the proper doc string for html encoded or plain text doc entries. func generateDoc(htmlSrc string) string { tokenizer := xhtml.NewTokenizer(strings.NewReader(htmlSrc)) - tokens := buildTokenArray(tokenizer) - scopes := findScopes(tokens) - return walk(scopes) -} - -func buildTokenArray(tokenizer *xhtml.Tokenizer) []tagInfo { - tokens := []tagInfo{} - for tt := tokenizer.Next(); tt != xhtml.ErrorToken; tt = tokenizer.Next() { - switch tt { - case xhtml.TextToken: - txt := string(tokenizer.Text()) - if len(tokens) == 0 { - info := tagInfo{ - raw: txt, - } - tokens = append(tokens, info) - } - tn, _ := tokenizer.TagName() - key, val, _ := tokenizer.TagAttr() - info := tagInfo{ - tag: string(tn), - key: string(key), - val: string(val), - txt: txt, - } - tokens = append(tokens, info) - case xhtml.StartTagToken: - tn, _ := tokenizer.TagName() - key, val, _ := tokenizer.TagAttr() - info := tagInfo{ - tag: string(tn), - key: string(key), - val: string(val), - } - tokens = append(tokens, info) - case xhtml.SelfClosingTagToken, xhtml.EndTagToken: - tn, _ := tokenizer.TagName() - key, val, _ := tokenizer.TagAttr() - info := tagInfo{ - tag: string(tn), - key: string(key), - val: string(val), - closingTag: true, - } - tokens = append(tokens, info) - } + var builder strings.Builder + if err := encodeHTMLToText(&builder, tokenizer); err != nil { + panic(fmt.Sprintf("failed to generated docs, %v", err)) } - return tokens -} - -// walk is used to traverse each scoped block. These scoped -// blocks will act as blocked text where we do most of our -// text manipulation. -func walk(scopes [][]tagInfo) string { - doc := "" - // Documentation will be chunked by scopes. - // Meaning, for each scope will be divided by one or more newlines. - for _, scope := range scopes { - indentStr, isIndented := priorityIndentation(scope) - block := "" - href := "" - after := false - level := 0 - lastTag := "" - for _, token := range scope { - if token.closingTag { - endl := closeTag(token, level) - block += endl - level-- - lastTag = "" - } else if token.txt == "" { - if token.val != "" { - href, after = formatText(token, "") - } - if level == 1 && isIndented { - block += indentStr - } - level++ - lastTag = token.tag - } else { - if token.txt != " " { - str, _ := formatText(token, lastTag) - block += str - if after { - block += href - after = false - } - } else { - fmt.Println(token.tag) - str, _ := formatText(tagInfo{}, lastTag) - block += str - } + + return wrap(strings.Trim(builder.String(), "\n"), 72) +} + +type stringWriter interface { + Write([]byte) (int, error) + WriteByte(byte) error + WriteRune(rune) (int, error) + WriteString(string) (int, error) +} + +func encodeHTMLToText(w stringWriter, z *xhtml.Tokenizer) error { + encoder := newHTMLTokenEncoder(w) + defer encoder.Flush() + + for { + tt := z.Next() + if tt == xhtml.ErrorToken { + if err := z.Err(); err == io.EOF { + return nil + } else if err != nil { + return err } } - if !isIndented { - block = strings.TrimPrefix(block, " ") - } - block = wrap(block, 72, isIndented) - doc += block - } - return doc -} - -// closeTag will divide up the blocks of documentation to be formated properly. -func closeTag(token tagInfo, level int) string { - switch token.tag { - case "pre", "li", "div": - return "\n" - case "p", "h1", "h2", "h3", "h4", "h5", "h6": - return "\n\n" - case "code": - // indented code is only at the 0th level. - if level == 0 { - return "\n" + + if err := encoder.Encode(z.Token()); err != nil { + return err } } - return "" + + return nil } -// formatText will format any sort of text based off of a tag. It will also return -// a boolean to add the string after the text token. -func formatText(token tagInfo, lastTag string) (string, bool) { - switch token.tag { - case "a": - if token.val != "" { - return fmt.Sprintf(" (%s)", token.val), true +type htmlTokenHandler interface { + OnStartTagToken(xhtml.Token) htmlTokenHandler + OnEndTagToken(xhtml.Token, bool) + OnSelfClosingTagToken(xhtml.Token) + OnTextTagToken(xhtml.Token) +} + +type htmlTokenEncoder struct { + w stringWriter + depth int + handlers []tokenHandlerItem + baseHandler tokenHandlerItem +} + +type tokenHandlerItem struct { + handler htmlTokenHandler + depth int +} + +func newHTMLTokenEncoder(w stringWriter) *htmlTokenEncoder { + baseHandler := newBlockTokenHandler(w) + baseHandler.rootBlock = true + + return &htmlTokenEncoder{ + w: w, + baseHandler: tokenHandlerItem{ + handler: baseHandler, + }, + } +} + +func (e *htmlTokenEncoder) Flush() error { + e.baseHandler.handler.OnEndTagToken(xhtml.Token{Type: xhtml.TextToken}, true) + return nil +} + +func (e *htmlTokenEncoder) Encode(token xhtml.Token) error { + h := e.baseHandler + if len(e.handlers) != 0 { + h = e.handlers[len(e.handlers)-1] + } + + switch token.Type { + case xhtml.StartTagToken: + e.depth++ + + next := h.handler.OnStartTagToken(token) + if next != nil { + e.handlers = append(e.handlers, tokenHandlerItem{ + handler: next, + depth: e.depth, + }) } + + case xhtml.EndTagToken: + handlerBlockClosing := e.depth == h.depth + + h.handler.OnEndTagToken(token, handlerBlockClosing) + + // Remove all but the root handler as the handler is no longer needed. + if handlerBlockClosing { + e.handlers = e.handlers[:len(e.handlers)-1] + } + e.depth-- + + case xhtml.SelfClosingTagToken: + h.handler.OnSelfClosingTagToken(token) + + case xhtml.TextToken: + h.handler.OnTextTagToken(token) } - // We don't care about a single space nor no text. - if len(token.txt) == 0 || token.txt == " " { - return "", false + return nil +} + +type baseTokenHandler struct { + w stringWriter +} + +func (e *baseTokenHandler) OnStartTagToken(token xhtml.Token) htmlTokenHandler { return nil } +func (e *baseTokenHandler) OnEndTagToken(token xhtml.Token, blockClosing bool) {} +func (e *baseTokenHandler) OnSelfClosingTagToken(token xhtml.Token) {} +func (e *baseTokenHandler) OnTextTagToken(token xhtml.Token) { + e.w.WriteString(token.Data) +} + +type blockTokenHandler struct { + baseTokenHandler + + rootBlock bool + origWriter stringWriter + strBuilder *strings.Builder + + started bool + newlineBeforeNextBlock bool +} + +func newBlockTokenHandler(w stringWriter) *blockTokenHandler { + strBuilder := &strings.Builder{} + return &blockTokenHandler{ + origWriter: w, + strBuilder: strBuilder, + baseTokenHandler: baseTokenHandler{ + w: strBuilder, + }, + } +} +func (e *blockTokenHandler) OnStartTagToken(token xhtml.Token) htmlTokenHandler { + e.started = true + if e.newlineBeforeNextBlock { + e.w.WriteString("\n") + e.newlineBeforeNextBlock = false } - // Here we want to indent code blocks that are newlines - if lastTag == "code" { - // Greater than one, because we don't care about newlines in the beginning - block := "" - if lines := strings.Split(token.txt, "\n"); len(lines) > 1 { - for _, line := range lines { - block += indent + line - } - block += "\n" - return block, false + switch token.DataAtom { + case atom.A: + return newLinkTokenHandler(e.w, token) + case atom.Ul: + e.w.WriteString("\n") + e.newlineBeforeNextBlock = true + return newListTokenHandler(e.w) + + case atom.Div, atom.Dt, atom.P, atom.H1, atom.H2, atom.H3, atom.H4, atom.H5, atom.H6: + e.w.WriteString("\n") + e.newlineBeforeNextBlock = true + return newBlockTokenHandler(e.w) + + case atom.Pre, atom.Code: + if e.rootBlock { + e.w.WriteString("\n") + e.w.WriteString(indent) + e.newlineBeforeNextBlock = true } + return newBlockTokenHandler(e.w) } - return token.txt, false + + return nil } +func (e *blockTokenHandler) OnEndTagToken(token xhtml.Token, blockClosing bool) { + if !blockClosing { + return + } -// This is a parser to check what type of indention is needed. -func priorityIndentation(blocks []tagInfo) (string, bool) { - if len(blocks) == 0 { - return "", false + e.origWriter.WriteString(e.strBuilder.String()) + if e.newlineBeforeNextBlock { + e.origWriter.WriteString("\n") + e.newlineBeforeNextBlock = false } - v, ok := style[blocks[0].tag] - return v, ok + e.strBuilder.Reset() } -// Divides into scopes based off levels. -// For instance, -//

    Testing123

    -// This has 2 scopes, the

    and