Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Follow git logic when parsing patch identities #44

Merged
merged 2 commits into from
May 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 6 additions & 65 deletions gitdiff/patch_header.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,62 +68,6 @@ func (h *PatchHeader) Message() string {
return msg.String()
}

// PatchIdentity identifies a person who authored or committed a patch.
type PatchIdentity struct {
Name string
Email string
}

func (i PatchIdentity) String() string {
name := i.Name
if name == "" {
name = `""`
}
return fmt.Sprintf("%s <%s>", name, i.Email)
}

// ParsePatchIdentity parses a patch identity string. A valid string contains
// an optional name followed by an email address in angle brackets. The angle
// brackets must always exist, but may enclose an empty address. At least one
// of the name or the email address must be non-empty. If the string only
// contains an email address, that value is also used as the name.
//
// The name must not contain a left angle bracket, '<', and the email address
// must not contain a right angle bracket, '>'. Otherwise, there are no
// restrictions on the format of either field.
func ParsePatchIdentity(s string) (PatchIdentity, error) {
var emailStart, emailEnd int
for i, c := range s {
if c == '<' && emailStart == 0 {
emailStart = i + 1
}
if c == '>' && emailStart > 0 {
emailEnd = i
break
}
}
if emailStart > 0 && emailEnd == 0 {
return PatchIdentity{}, fmt.Errorf("invalid identity string: unclosed email section: %s", s)
}

var name, email string
if emailStart > 0 {
name = strings.TrimSpace(s[:emailStart-1])
}
if emailStart > 0 && emailEnd > 0 {
email = strings.TrimSpace(s[emailStart:emailEnd])
}
if name == "" && email != "" {
name = email
}

if name == "" && email == "" {
return PatchIdentity{}, fmt.Errorf("invalid identity string: %s", s)
}

return PatchIdentity{Name: name, Email: email}, nil
}

// ParsePatchDate parses a patch date string. It returns the parsed time or an
// error if s has an unknown format. ParsePatchDate supports the iso, rfc,
// short, raw, unix, and default formats (with local variants) used by the
Expand Down Expand Up @@ -425,16 +369,13 @@ func parseHeaderMail(mailLine string, r io.Reader, opts patchHeaderOptions) (*Pa
}
}

addrs, err := msg.Header.AddressList("From")
if err != nil && !errors.Is(err, mail.ErrHeaderNotPresent) {
return nil, err
}
if len(addrs) > 0 {
addr := addrs[0]
if addr.Name == "" {
addr.Name = addr.Address
from := msg.Header.Get("From")
if from != "" {
u, err := ParsePatchIdentity(from)
if err != nil {
return nil, err
}
h.Author = &PatchIdentity{Name: addr.Name, Email: addr.Address}
h.Author = &u
}

date := msg.Header.Get("Date")
Expand Down
99 changes: 22 additions & 77 deletions gitdiff/patch_header_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,83 +5,6 @@ import (
"time"
)

func TestParsePatchIdentity(t *testing.T) {
tests := map[string]struct {
Input string
Output PatchIdentity
Err interface{}
}{
"simple": {
Input: "Morton Haypenny <[email protected]>",
Output: PatchIdentity{
Name: "Morton Haypenny",
Email: "[email protected]",
},
},
"extraWhitespace": {
Input: " Morton Haypenny <[email protected] > ",
Output: PatchIdentity{
Name: "Morton Haypenny",
Email: "[email protected]",
},
},
"trailingCharacters": {
Input: "Morton Haypenny <[email protected]> unrelated garbage",
Output: PatchIdentity{
Name: "Morton Haypenny",
Email: "[email protected]",
},
},
"onlyEmail": {
Input: "<[email protected]>",
Output: PatchIdentity{
Name: "[email protected]",
Email: "[email protected]",
},
},
"emptyEmail": {
Input: "Morton Haypenny <>",
Output: PatchIdentity{
Name: "Morton Haypenny",
Email: "",
},
},
"missingEmail": {
Input: "Morton Haypenny",
Err: "invalid identity",
},
"missingNameAndEmptyEmail": {
Input: "<>",
Err: "invalid identity",
},
"empty": {
Input: "",
Err: "invalid identity",
},
"unclosedEmail": {
Input: "Morton Haypenny <[email protected]",
Err: "unclosed email",
},
}

for name, test := range tests {
t.Run(name, func(t *testing.T) {
id, err := ParsePatchIdentity(test.Input)
if test.Err != nil {
assertError(t, test.Err, err, "parsing identity")
return
}
if err != nil {
t.Fatalf("unexpected error parsing identity: %v", err)
}

if test.Output != id {
t.Errorf("incorrect identity: expected %#v, actual %#v", test.Output, id)
}
})
}
}

func TestParsePatchDate(t *testing.T) {
expected := time.Date(2020, 4, 9, 8, 7, 6, 0, time.UTC)

Expand Down Expand Up @@ -349,6 +272,28 @@ Another body line.
Body: expectedBody,
},
},
"mailboxRFC5322SpecialCharacters": {
Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <12345+dependabot[bot]@users.noreply.github.com>
Date: Sat, 11 Apr 2020 15:21:23 -0700
Subject: [PATCH] A sample commit to test header parsing

The medium format shows the body, which
may wrap on to multiple lines.

Another body line.
`,
Header: PatchHeader{
SHA: expectedSHA,
Author: &PatchIdentity{
Name: "dependabot[bot]",
Email: "12345+dependabot[bot]@users.noreply.github.com",
},
AuthorDate: expectedDate,
Title: expectedTitle,
Body: expectedBody,
},
},
"mailboxAppendix": {
Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001
From: Morton Haypenny <[email protected]>
Expand Down
166 changes: 166 additions & 0 deletions gitdiff/patch_identity.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
package gitdiff

import (
"fmt"
"strings"
)

// PatchIdentity identifies a person who authored or committed a patch.
type PatchIdentity struct {
Name string
Email string
}

func (i PatchIdentity) String() string {
name := i.Name
if name == "" {
name = `""`
}
return fmt.Sprintf("%s <%s>", name, i.Email)
}

// ParsePatchIdentity parses a patch identity string. A patch identity contains
// an email address and an optional name in [RFC 5322] format. This is either a
// plain email adddress or a name followed by an address in angle brackets:
//
// [email protected]
// Author Name <[email protected]>
//
// If the input is not one of these formats, ParsePatchIdentity applies a
// heuristic to separate the name and email portions. If both the name and
// email are missing or empty, ParsePatchIdentity returns an error. It
// otherwise does not validate the result.
//
// [RFC 5322]: https://datatracker.ietf.org/doc/html/rfc5322
func ParsePatchIdentity(s string) (PatchIdentity, error) {
s = normalizeSpace(s)
s = unquotePairs(s)

var name, email string
if at := strings.IndexByte(s, '@'); at >= 0 {
start, end := at, at
for start >= 0 && !isRFC5332Space(s[start]) && s[start] != '<' {
start--
}
for end < len(s) && !isRFC5332Space(s[end]) && s[end] != '>' {
end++
}
email = s[start+1 : end]

// Adjust the boundaries so that we drop angle brackets, but keep
// spaces when removing the email to form the name.
if start < 0 || s[start] != '<' {
start++
}
if end >= len(s) || s[end] != '>' {
end--
}
name = s[:start] + s[end+1:]
} else {
start, end := 0, 0
for i := 0; i < len(s); i++ {
if s[i] == '<' && start == 0 {
start = i + 1
}
if s[i] == '>' && start > 0 {
end = i
break
}
}
if start > 0 && end >= start {
email = strings.TrimSpace(s[start:end])
name = s[:start-1]
}
}

// After extracting the email, the name might contain extra whitespace
// again and may be surrounded by comment characters. The git source gives
// these examples of when this can happen:
//
// "Name <email@domain>"
// "email@domain (Name)"
// "Name <email@domain> (Comment)"
//
name = normalizeSpace(name)
if strings.HasPrefix(name, "(") && strings.HasSuffix(name, ")") {
name = name[1 : len(name)-1]
}
name = strings.TrimSpace(name)

// If the name is empty or contains email-like characters, use the email
// instead (assuming one exists)
if name == "" || strings.ContainsAny(name, "@<>") {
name = email
}

if name == "" && email == "" {
return PatchIdentity{}, fmt.Errorf("invalid identity string %q", s)
}
return PatchIdentity{Name: name, Email: email}, nil
}

// unquotePairs process the RFC5322 tokens "quoted-string" and "comment" to
// remove any "quoted-pairs" (backslash-espaced characters). It also removes
// the quotes from any quoted strings, but leaves the comment delimiters.
func unquotePairs(s string) string {
quote := false
comments := 0
escaped := false

var out strings.Builder
for i := 0; i < len(s); i++ {
if escaped {
escaped = false
} else {
switch s[i] {
case '\\':
// quoted-pair is only allowed in quoted-string/comment
if quote || comments > 0 {
escaped = true
continue // drop '\' character
}

case '"':
if comments == 0 {
quote = !quote
continue // drop '"' character
}

case '(':
if !quote {
comments++
}
case ')':
if comments > 0 {
comments--
}
}
}
out.WriteByte(s[i])
}
return out.String()
}

// normalizeSpace trims leading and trailing whitespace from s and converts
// inner sequences of one or more whitespace characters to single spaces.
func normalizeSpace(s string) string {
var sb strings.Builder
for i := 0; i < len(s); i++ {
c := s[i]
if !isRFC5332Space(c) {
if sb.Len() > 0 && isRFC5332Space(s[i-1]) {
sb.WriteByte(' ')
}
sb.WriteByte(c)
}
}
return sb.String()
}

func isRFC5332Space(c byte) bool {
switch c {
case '\t', '\n', '\r', ' ':
return true
}
return false
}
Loading
Loading