Skip to content

Commit

Permalink
use textproto package to parse the content-type found
Browse files Browse the repository at this point in the history
  • Loading branch information
mzimmerman committed Aug 1, 2017
1 parent a82395a commit 68bf596
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 24 deletions.
50 changes: 29 additions & 21 deletions scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ import (
"errors"
"io"
"net/mail"
"net/textproto"
"strings"
)

// ErrInvalidMboxFormat is the error returned by the Next method of type Mbox if
Expand All @@ -34,25 +36,6 @@ func scanMessage(data []byte, atEOF bool) (int, []byte, error) {
advanceExtra = 1
e = bytes.Index(data, []byte("\nFrom "))
}
if e > 0 {
if ctStart := bytes.Index(data[:e+1], []byte(boundarySep)); ctStart != -1 {
if ctEnd := bytes.Index(data[ctStart+len(boundarySep):], []byte("\"")); ctEnd != -1 {
//log.Printf("ctStart = %d, ctEnd = %d", ctStart, ctEnd)
boundary := []byte("\n--" + string(data[ctStart+len(boundarySep):ctStart+len(boundarySep)+ctEnd]) + "--\n")
//log.Printf("Looking for boundary -- %q", boundary)
if boundEnd := bytes.Index(data, boundary); boundEnd == -1 {
//log.Printf("Asking for more!")
return 0, nil, nil // ask for more! We don't see the end boundary yet
} else {
e = bytes.Index(data[boundEnd:], []byte("\nFrom "))
if e != -1 {
e += boundEnd
}
//log.Printf("oldE = %d, newE = %d", oldE, e)
}
}
}
}
if e == -1 && !atEOF {
// request more data
return advanceExtra, nil, nil
Expand All @@ -72,14 +55,39 @@ func scanMessage(data []byte, atEOF bool) (int, []byte, error) {
}
return len(data) + advanceExtra, data[n+1:], nil
}
tpr := textproto.NewReader(bufio.NewReader(bytes.NewReader(data[n+1:])))
header, err := tpr.ReadMIMEHeader()
if err != nil {
return 0, nil, err
}
cth := header.Get("Content-Type")
boundaryEnd := ""
if strings.Contains(cth, "multipart") {
splt := strings.Split(cth, "; ")
for _, v := range splt {
if strings.HasPrefix(v, "boundary=") {
c := strings.Index(v, "=") + 1
boundaryEnd = "--" + strings.Trim(v[c:], `"'`) + "--"
break
}
}
}
if boundaryEnd != "" {
b := bytes.Index(data, []byte(boundaryEnd))
if b == -1 {
return 0, nil, nil // need more data!
}
if e < b {
e = bytes.Index(data[b:], []byte("\nFrom "))
e += b
}
}
if data[e-1] != '\n' {
return e + 1 + advanceExtra, data[n+1 : e+1], nil
}
return e + 1 + advanceExtra, data[n+1 : e], nil
}

const boundarySep = "Content-Type: multipart/alternative; boundary=\""

// Scanner provides an interface to read a sequence of messages from an mbox.
// Calling the Next method steps through the messages. The current message can
// then be accessed by calling the Message method.
Expand Down
77 changes: 74 additions & 3 deletions scanner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,8 @@ From one place.
From: herp.derp at example.com (Herp Derp)
Date: Thu, 01 Jan 2015 00:00:01 +0100
Subject: Test
Content-Type: multipart/alternative; boundary="_----------=bestboundaryever"
Content-Type: multipart/alternative;
boundary=Apple-Mail-D55D9B1A-A379-4D5C-BDA9-00D35DF424A0
This is a test of boundaries. Don't accept a new email via \nFrom until the boundary is done!'
Expand All @@ -441,7 +442,7 @@ From Herp Derp with love.
From Herp Derp with love.
Bye.
--_----------=bestboundaryever--
--Apple-Mail-D55D9B1A-A379-4D5C-BDA9-00D35DF424A0--
From another!
From: herp.derp at example.com (Herp Derp)
Expand All @@ -451,7 +452,77 @@ Subject: Test
This is the second email in a test of boundaries.
`
expected := []string{
"This is a test of boundaries. Don't accept a new email via \\nFrom until the boundary is done!'\n\nAnd, by the way, this is how a \"From\" line is escaped in mboxo format:\nFrom Herp Derp with love.\n\nFrom Herp Derp with love.\n\nBye.\n--_----------=bestboundaryever--\n",
"This is a test of boundaries. Don't accept a new email via \\nFrom until the boundary is done!'\n\nAnd, by the way, this is how a \"From\" line is escaped in mboxo format:\nFrom Herp Derp with love.\n\nFrom Herp Derp with love.\n\nBye.\n--Apple-Mail-D55D9B1A-A379-4D5C-BDA9-00D35DF424A0--\n",
"This is the second email in a test of boundaries.\n",
}
b := bytes.NewBufferString(sourceData)
m := NewScanner(b)

for i := range expected {
if !m.Next() {
t.Errorf("Next() failed; pass %d", i)
}
if m.Err() != nil {
t.Errorf("Unexpected error after Next(): %v", m.Err())
}

msg := m.Message()
if msg == nil {
t.Errorf("message is nil; pass %d", i)
continue
}
body := new(bytes.Buffer)
_, err := body.ReadFrom(msg.Body)
if err != nil {
t.Errorf("%d - Unexpected error reading message body: %v", i, err)
continue
}
if body.String() != expected[i] {
t.Errorf("%d - Expected:\n %q\ngot\n%q", i, expected[i], body.String())
}
if m.Err() != nil {
t.Errorf("%d - Unexpected error after Message(): %v", i, m.Err())
}
}

if m.Next() {
t.Errorf("Next() succeeded")
}
if m.Err() != nil {
t.Errorf("Unexpected error after Next(): %v", m.Err())
}
if msg := m.Message(); msg != nil {
t.Errorf("message is not nil")
}
if m.Err() != nil {
t.Errorf("Unexpected error after Message(): %v", m.Err())
}
}

func TestScanMessageWithTextBoundary(t *testing.T) {
sourceData := `
From one place.
From: herp.derp at example.com (Herp Derp)
Date: Thu, 01 Jan 2015 00:00:01 +0100
Subject: Test
Content-Type: text/html; charset="utf-8";
boundary="monkey_d3df4dc8-da5e-47dd-be15-f19c5ed55194"
This is a test of boundaries. Don't accept a new email via \nFrom until the boundary is done!'
And, by the way, this is how a "From" line is escaped in mboxo format:
Bye.
From another!
From: herp.derp at example.com (Herp Derp)
Date: Thu, 01 Jan 2015 00:00:01 +0100
Subject: Test
This is the second email in a test of boundaries.
`
expected := []string{
"This is a test of boundaries. Don't accept a new email via \\nFrom until the boundary is done!'\n\nAnd, by the way, this is how a \"From\" line is escaped in mboxo format:\n\nBye.\n",
"This is the second email in a test of boundaries.\n",
}
b := bytes.NewBufferString(sourceData)
Expand Down

0 comments on commit 68bf596

Please sign in to comment.