From b078f717e6bd9e35026390a43fcc876c3f60d31e Mon Sep 17 00:00:00 2001
From: Xavier ALT <xal@odoo.com>
Date: Wed, 2 Aug 2023 09:33:14 +0000
Subject: [PATCH] [FIX] mail: correctly parse body as html for pure-html email

Some external tools send email as pure html (no multipart) and when
parsing such email we ends up having the raw HTML as body (text)

This commit ensure we correctly parse and sanitize the body as HTML
for such emails.

closes odoo/odoo#130489

Task-id: 3451889
Signed-off-by: Thibault Delavallee (tde) <tde@openerp.com>
---
 addons/mail/models/mail_thread.py           |  3 +++
 addons/test_mail/data/test_mail_data.py     | 30 +++++++++++++++++++++
 addons/test_mail/tests/test_mail_gateway.py |  6 +++++
 3 files changed, 39 insertions(+)

diff --git a/addons/mail/models/mail_thread.py b/addons/mail/models/mail_thread.py
index e07e8f487ab6..43111c60a717 100644
--- a/addons/mail/models/mail_thread.py
+++ b/addons/mail/models/mail_thread.py
@@ -1275,6 +1275,9 @@ class MailThread(models.AbstractModel):
             if message.get_content_type() == 'text/plain':
                 # text/plain -> <pre/>
                 body = tools.append_content_to_html(u'', body, preserve=True)
+            elif message.get_content_type() == 'text/html':
+                # we only strip_classes here everything else will be done in by html field of mail.message
+                body = tools.html_sanitize(body, sanitize_tags=False, strip_classes=True)
         else:
             alternative = False
             mixed = False
diff --git a/addons/test_mail/data/test_mail_data.py b/addons/test_mail/data/test_mail_data.py
index 70e0f4af8a7d..341a18b0409a 100644
--- a/addons/test_mail/data/test_mail_data.py
+++ b/addons/test_mail/data/test_mail_data.py
@@ -105,6 +105,36 @@ Please call me as soon as possible this afternoon!
 Sylvie
 """
 
+MAIL_TEMPLATE_HTML = """Return-Path: {return_path}
+To: {to}
+cc: {cc}
+Received: by mail1.openerp.com (Postfix, from userid 10002)
+    id 5DF9ABFB2A; Fri, 10 Aug 2012 16:16:39 +0200 (CEST)
+From: {email_from}
+Subject: {subject}
+MIME-Version: 1.0
+Content-Type: text/html; charset=utf-8
+Content-Transfer-Encoding: quoted-printable
+Date: Fri, 10 Aug 2012 14:16:26 +0000
+Message-ID: {msg_id}
+{extra}
+
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+ <head>=20
+  <meta http-equiv=3D"Content-Type" content=3D"text/html; charset=3Dutf-8" />
+ </head>=20
+ <body style=3D"margin: 0; padding: 0; background: #ffffff;-webkit-text-size-adjust: 100%;">=20
+
+  <p>Please call me as soon as possible this afternoon!</p>
+
+  <p>--<br/>
+     Sylvie
+  <p>
+ </body>
+</html>
+"""
+
 MAIL_MULTIPART_MIXED = """Return-Path: <ignasse.carambar@gmail.com>
 X-Original-To: raoul@grosbedon.fr
 Delivered-To: raoul@grosbedon.fr
diff --git a/addons/test_mail/tests/test_mail_gateway.py b/addons/test_mail/tests/test_mail_gateway.py
index a6ce238086d2..4eb22aa18620 100644
--- a/addons/test_mail/tests/test_mail_gateway.py
+++ b/addons/test_mail/tests/test_mail_gateway.py
@@ -42,6 +42,12 @@ class TestEmailParsing(TestMailCommon):
         res = self.env['mail.thread'].message_parse(self.from_string(plaintext))
         self.assertIn('Please call me as soon as possible this afternoon!', res['body'])
 
+        # test pure html
+        html = self.format(test_mail_data.MAIL_TEMPLATE_HTML, email_from='"Sylvie Lelitre" <test.sylvie.lelitre@agrolait.com>')
+        res = self.env['mail.thread'].message_parse(self.from_string(html))
+        self.assertIn('<p>Please call me as soon as possible this afternoon!</p>', res['body'])
+        self.assertNotIn('<!DOCTYPE', res['body'])
+
         # test multipart / text and html -> html has priority
         multipart = self.format(MAIL_TEMPLATE, email_from='"Sylvie Lelitre" <test.sylvie.lelitre@agrolait.com>')
         res = self.env['mail.thread'].message_parse(self.from_string(multipart))
-- 
GitLab