Skip to content

Commit

Permalink
Subject mime decode functions and test
Browse files Browse the repository at this point in the history
  • Loading branch information
Mizuho32 committed Oct 15, 2024
1 parent e1eb8d4 commit e795893
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 0 deletions.
60 changes: 60 additions & 0 deletions store/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,21 @@ package store

import (
"bytes"
"encoding/base64"
"fmt"
"io"
"regexp"
"strings"
"unicode"
"unicode/utf8"

"github.com/mjl-/mox/message"
"github.com/mjl-/mox/mlog"

"golang.org/x/text/encoding"
"golang.org/x/text/encoding/japanese"
encUnicode "golang.org/x/text/encoding/unicode"
"golang.org/x/text/transform"
)

// WordSearch holds context for a search, with scratch buffers to prevent
Expand Down Expand Up @@ -193,3 +201,55 @@ func toLower(buf []byte) []byte {
}
return r
}

func decodeRFC2047(encoded string) (string, error) {
// match e.g. =?(iso-2022-jp)?(B)?(Rnc6...)?=
r := regexp.MustCompile(`=\?([^?]+)\?([BQ])\?([^?]+)\?=`)
matches := r.FindAllStringSubmatch(encoded, -1)

if len(matches) == 0 { // no match. Looks ASCII.
return encoded, nil
}

var decodedStrings []string
for _, match := range matches {
charset := match[1]
encodingName := match[2]
encodedText := match[3]

// Decode Base64 or Quoted-Printable
var decodedBytes []byte
var err error
if encodingName == "B" {
decodedBytes, err = base64.StdEncoding.DecodeString(encodedText)
if err != nil {
return "", fmt.Errorf("Base64 decode error: %w", err)
}
} else {
return "", fmt.Errorf("not supported encoding: %s", encodingName)
}

// Select charset
var enc encoding.Encoding
switch strings.ToLower(charset) {
case "iso-2022-jp":
enc = japanese.ISO2022JP
case "utf-8":
enc = encUnicode.UTF8
default:
return "", fmt.Errorf("not supported charset: %s", charset)
}

// Decode with charset
reader := transform.NewReader(strings.NewReader(string(decodedBytes)), enc.NewDecoder())
decodedText, err := io.ReadAll(reader)
if err != nil {
return "", err
}

decodedStrings = append(decodedStrings, string(decodedText))
}

// Concat multiple strings
return strings.Join(decodedStrings, ""), nil
}
38 changes: 38 additions & 0 deletions store/search_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package store

import (
"fmt"
"testing"
)

func TestSubjectMatch(t *testing.T) {
// Auto detect subject text encoding and decode

//log := mlog.New("search", nil)

originalSubject := `テストテキスト Abc 123...`
asciiSubject := "test text Abc 123..."

encodedSubjectUTF8 := `=?UTF-8?B?44OG44K544OI44OG44Kt44K544OIIEFiYyAxMjMuLi4=?=`
encodedSubjectISO2022 := `=?iso-2022-jp?B?GyRCJUYlOSVIJUYlLSU5JUgbKEIgQWJjIDEyMy4uLg==?=`
encodedSubjectUTF8 = encodedSubjectUTF8 + " \n " + encodedSubjectUTF8
encodedSubjectISO2022 = encodedSubjectISO2022 + " \n " + encodedSubjectISO2022
originalSubject = originalSubject + originalSubject

encodedTexts := map[string]string{encodedSubjectUTF8: originalSubject, encodedSubjectISO2022: originalSubject, asciiSubject: asciiSubject}

for encodedSubject, originalSubject := range encodedTexts {

// Autodetect & decode
decodedSubject, err := decodeRFC2047(encodedSubject)

fmt.Printf("decoded text:%s\n", decodedSubject)
if err != nil {
t.Fatalf("Decode error: %v", err)
}

if originalSubject != decodedSubject {
t.Fatalf("Decode mismatch %s != %s", originalSubject, decodedSubject)
}
}
}

0 comments on commit e795893

Please sign in to comment.