diff options
author | Max Magorsch <arzano@gentoo.org> | 2020-06-22 20:48:50 +0000 |
---|---|---|
committer | Max Magorsch <arzano@gentoo.org> | 2020-06-22 20:48:50 +0000 |
commit | 247543b9ebade874d0278ac73d40d8ef86b6824f (patch) | |
tree | d43e4ea9cfcb5598175cb31e31ab3134d9bf84be | |
parent | Cache pages to improve the performance (diff) | |
download | archives-247543b9ebade874d0278ac73d40d8ef86b6824f.tar.gz archives-247543b9ebade874d0278ac73d40d8ef86b6824f.tar.bz2 archives-247543b9ebade874d0278ac73d40d8ef86b6824f.zip |
Improve the importer further
Wait until all go routines have finished before
terminating the application. Also improve the
handling of nested multipart bodies
Signed-off-by: Max Magorsch <arzano@gentoo.org>
-rw-r--r-- | archives.go | 1 | ||||
-rw-r--r-- | pkg/importer/utils.go | 87 |
2 files changed, 77 insertions, 11 deletions
diff --git a/archives.go b/archives.go index dd517ed..7cbba67 100644 --- a/archives.go +++ b/archives.go @@ -39,6 +39,7 @@ func main() { app.Serve() } + importer.WaitGroup.Wait() } // TODO this has to be solved differently diff --git a/pkg/importer/utils.go b/pkg/importer/utils.go index de5b27c..026ba41 100644 --- a/pkg/importer/utils.go +++ b/pkg/importer/utils.go @@ -14,6 +14,7 @@ import ( "os" "regexp" "strings" + "sync" "time" ) @@ -27,6 +28,9 @@ type MailIdentifier struct { var mails []*models.Message // TODO +var WaitGroup sync.WaitGroup + +// TODO func initImport(path string, info os.FileInfo, err error) error { if err != nil { return err @@ -69,19 +73,21 @@ func importMail(path, filename string) error { return err } + WaitGroup.Add(1) go importIntoDatabase(path, filename, m) return nil } func importIntoDatabase(path, filename string, m *mail.Message) { + bodyContent, attachments := parseBody(m) msg := models.Message{ Id: m.Header.Get("X-Archives-Hash"), MessageId: m.Header.Get("Message-Id"), Filename: filename, From: m.Header.Get("From"), - To: strings.Split(m.Header.Get("To"), ","), - Cc: strings.Split(m.Header.Get("Cc"), ","), + To: parseAddressList(m.Header.Get("To")), + Cc: parseAddressList(m.Header.Get("Cc")), Subject: m.Header.Get("Subject"), List: getListName(path), @@ -90,8 +96,8 @@ func importIntoDatabase(path, filename string, m *mail.Message) { Date: getDate(m.Header), InReplyToId: getInReplyToMail(m.Header.Get("In-Reply-To"), m.Header.Get("From")), //References: getReferencesToMail(strings.Split(m.Header.Get("References"), ","), m.Header.Get("From")), - Body: getBody(m.Header, m.Body), - Attachments: getAttachments(m.Header, m.Body), + Body: bodyContent, + Attachments: attachments, StartsThread: m.Header.Get("In-Reply-To") == "" && m.Header.Get("References") == "", @@ -107,6 +113,15 @@ func importIntoDatabase(path, filename string, m *mail.Message) { insertReferencesToMail(strings.Split(m.Header.Get("References"), ","), m.Header.Get("X-Archives-Hash"), m.Header.Get("From")) + WaitGroup.Done() +} + +func parseAddressList(addressList string) []string { + result := strings.Split(addressList, ",") + if len(result) == 1 && strings.TrimSpace(result[0]) == "" { + return nil + } + return result } func getInReplyToMail(messageId, from string) string { @@ -162,21 +177,25 @@ func getDepth(path, maildirPath string) int { return strings.Count(strings.ReplaceAll(path, maildirPath, ""), "/") } -func getBody(header mail.Header, body io.Reader) string { +func parseBody(m *mail.Message) (string, []models.Attachment) { + header := m.Header + body := m.Body + foundPlainText := false if isMultipartMail(header) { + var attachments []models.Attachment boundary := regexp.MustCompile(`boundary="(.*?)"`). FindStringSubmatch( header.Get("Content-Type")) if len(boundary) != 2 { //err - return "" + return "", attachments } parsedBody := "" mr := multipart.NewReader(body, boundary[1]) for { p, err := mr.NextPart() if err != nil { - return parsedBody + return parsedBody, attachments } bodyContent, err := ioutil.ReadAll(p) if err != nil { @@ -185,15 +204,61 @@ func getBody(header mail.Header, body io.Reader) string { continue } if strings.Contains(p.Header.Get("Content-Type"), "text/plain") { - return string(bodyContent) - } else if strings.Contains(p.Header.Get("Content-Type"), "text/html") { parsedBody = string(bodyContent) + foundPlainText = true + } else if strings.Contains(p.Header.Get("Content-Type"), "text/html") { + if !foundPlainText { + parsedBody = string(bodyContent) + } + } else if strings.Contains(p.Header.Get("Content-Type"), "multipart") { + //////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////// + boundary := regexp.MustCompile(`boundary="(.*?)"`). + FindStringSubmatch(p.Header.Get("Content-Type")) + if len(boundary) != 2 { + //err + continue + } + mr := multipart.NewReader(bytes.NewReader(bodyContent), boundary[1]) + for { + p, err := mr.NextPart() + if err != nil { + break + } + bodyContent, err := ioutil.ReadAll(p) + if err != nil { + fmt.Println("Error while reading the body:") + fmt.Println(err) + continue + } + if strings.Contains(p.Header.Get("Content-Type"), "text/plain") { + parsedBody = string(bodyContent) + } else if strings.Contains(p.Header.Get("Content-Type"), "text/html") { + if !foundPlainText { + parsedBody = string(bodyContent) + } + } else { + attachments = append(attachments, models.Attachment{ + Filename: getAttachmentFileName(p.Header.Get("Content-Type")), + Mime: p.Header.Get("Content-Type"), + Content: string(bodyContent), + }) + } + } + //////////////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////////////// + } else { + attachments = append(attachments, models.Attachment{ + Filename: getAttachmentFileName(p.Header.Get("Content-Type")), + Mime: p.Header.Get("Content-Type"), + Content: string(bodyContent), + }) } } - return parsedBody + return parsedBody, attachments } else { content, _ := ioutil.ReadAll(body) - return string(content) + return string(content), nil } } |