From dc0ccc1916182e472f864909813f03ce422c0df7 Mon Sep 17 00:00:00 2001
From: miad-odoo <miad@odoo.com>
Date: Tue, 11 Apr 2023 14:41:42 +0200
Subject: [PATCH] [FIX] mass_mailing: fix finding duplicate mails

Before the commit, the _get_seen_list() function in the mass_mailing module was
not able to correctly identify all the duplicate email addresses in a given mass
mailing. This was because the function chose and used only one way to find an
email address for each record in the mailing list, even though there are many
ways to find an email address for a record.

For example, a crm.lead record might have an email address in its partner_id
field, but it might also have an email address in its email_normalized field.
This can vary from record to record.

To fix this issue, the _get_seen_list() function was updated to only look at the
email address to which emails have already been sent, rather than trying to
fetch it from the record itself. This ensures that all duplicate emails are
correctly identified and that no duplicate emails are sent in the mass mailing.

Task-3234378

closes odoo/odoo#118220

Signed-off-by: Thibault Delavallee (tde) <tde@openerp.com>
---
 addons/mass_mailing/models/mailing.py         | 31 +++--------------
 .../models/mailing_models.py                  | 10 ++++++
 .../security/ir.model.access.csv              |  2 ++
 .../test_mass_mailing/tests/test_mailing.py   | 33 ++++++++++++++++++-
 4 files changed, 48 insertions(+), 28 deletions(-)

diff --git a/addons/mass_mailing/models/mailing.py b/addons/mass_mailing/models/mailing.py
index 13cd2e35b27a..d177f1d88fcc 100644
--- a/addons/mass_mailing/models/mailing.py
+++ b/addons/mass_mailing/models/mailing.py
@@ -470,36 +470,13 @@ class MassMailing(models.Model):
         self.ensure_one()
         target = self.env[self.mailing_model_real]
 
-        # avoid loading a large number of records in memory
-        # + use a basic heuristic for extracting emails
         query = """
-            SELECT lower(substring(t.%(mail_field)s, '([^ ,;<@]+@[^> ,;]+)'))
+            SELECT s.email
               FROM mailing_trace s
               JOIN %(target)s t ON (s.res_id = t.id)
-             WHERE substring(t.%(mail_field)s, '([^ ,;<@]+@[^> ,;]+)') IS NOT NULL
+             WHERE s.email IS NOT NULL
         """
 
-        # Apply same 'get email field' rule from mail_thread.message_get_default_recipients
-        if 'partner_id' in target._fields and target._fields['partner_id'].store:
-            mail_field = 'email'
-            query = """
-                SELECT lower(substring(p.%(mail_field)s, '([^ ,;<@]+@[^> ,;]+)'))
-                  FROM mailing_trace s
-                  JOIN %(target)s t ON (s.res_id = t.id)
-                  JOIN res_partner p ON (t.partner_id = p.id)
-                 WHERE substring(p.%(mail_field)s, '([^ ,;<@]+@[^> ,;]+)') IS NOT NULL
-            """
-        elif issubclass(type(target), self.pool['mail.thread.blacklist']):
-            mail_field = 'email_normalized'
-        elif 'email_from' in target._fields and target._fields['email_from'].store:
-            mail_field = 'email_from'
-        elif 'partner_email' in target._fields and target._fields['partner_email'].store:
-            mail_field = 'partner_email'
-        elif 'email' in target._fields and target._fields['email'].store:
-            mail_field = 'email'
-        else:
-            raise UserError(_("Unsupported mass mailing model %s", self.mailing_model_id.name))
-
         if self.unique_ab_testing:
             query += """
                AND s.campaign_id = %%(mailing_campaign_id)s;
@@ -509,8 +486,8 @@ class MassMailing(models.Model):
                AND s.mass_mailing_id = %%(mailing_id)s
                AND s.model = %%(target_model)s;
             """
-        query = query % {'target': target._table, 'mail_field': mail_field}
-        params = {'mailing_id': self.id, 'mailing_campaign_id': self.campaign_id.id, 'target_model': self.mailing_model_real}
+        query = query % {'target': target._table}
+        params = {'mailing_campaign_id': self.campaign_id.id, 'mailing_id': self.id, 'target_model': self.mailing_model_real}
         self._cr.execute(query, params)
         seen_list = set(m[0] for m in self._cr.fetchall())
         _logger.info(
diff --git a/addons/test_mass_mailing/models/mailing_models.py b/addons/test_mass_mailing/models/mailing_models.py
index 8748f4792110..2350e7aa1921 100644
--- a/addons/test_mass_mailing/models/mailing_models.py
+++ b/addons/test_mass_mailing/models/mailing_models.py
@@ -108,6 +108,16 @@ class MailingOptOut(models.Model):
                 }
         return default_recipients
 
+class MailingTestPartner(models.Model):
+    _description = 'Mailing Model with partner_id'
+    _name = 'mailing.test.partner'
+    _inherit = ['mail.thread.blacklist']
+    _primary_email = 'email_from'
+
+    name = fields.Char()
+    email_from = fields.Char()
+    partner_id = fields.Many2one('res.partner', 'Customer')
+
 
 class MailingPerformance(models.Model):
     """ A very simple model only inheriting from mail.thread to test pure mass
diff --git a/addons/test_mass_mailing/security/ir.model.access.csv b/addons/test_mass_mailing/security/ir.model.access.csv
index 617995ff0fac..0dd20b446d7c 100644
--- a/addons/test_mass_mailing/security/ir.model.access.csv
+++ b/addons/test_mass_mailing/security/ir.model.access.csv
@@ -11,6 +11,8 @@ access_mailing_performance_all,access.mailing.performance.all,model_mailing_perf
 access_mailing_performance_user,access.mailing.performance.user,model_mailing_performance,base.group_user,1,1,1,1
 access_mailing_performance_blacklist_all,access.mailing.performance.blacklist.all,model_mailing_performance_blacklist,,0,0,0,0
 access_mailing_performance_blacklist_user,access.mailing.performance.blacklist.user,model_mailing_performance_blacklist,base.group_user,1,1,1,1
+access_mailing_test_partner_all,access.mailing.test.partner.all,model_mailing_test_partner,,0,0,0,0
+access_mailing_test_partner_user,access.mailing.test.partner.user,model_mailing_test_partner,base.group_user,1,1,1,1
 access_mailing_test_partner_unstored_all,access.mailing.test.partner.unstored.all,model_mailing_test_partner_unstored,,0,0,0,0
 access_mailing_test_partner_unstored_user,access.mailing.test.partner.unstored.user,model_mailing_test_partner_unstored,base.group_user,1,1,1,1
 access_mailing_test_utm_all,access.mailing.test.utm.all,model_mailing_test_utm,,0,0,0,0
diff --git a/addons/test_mass_mailing/tests/test_mailing.py b/addons/test_mass_mailing/tests/test_mailing.py
index 34dabd4b451b..56fd2c5f92ec 100644
--- a/addons/test_mass_mailing/tests/test_mailing.py
+++ b/addons/test_mass_mailing/tests/test_mailing.py
@@ -5,7 +5,7 @@ from odoo.addons.test_mass_mailing.data.mail_test_data import MAIL_TEMPLATE
 from odoo.addons.test_mass_mailing.tests.common import TestMassMailCommon
 from odoo.tests import tagged
 from odoo.tests.common import users
-from odoo.tools import mute_logger
+from odoo.tools import email_normalize, mute_logger
 
 
 @tagged('mass_mailing')
@@ -412,6 +412,37 @@ class TestMassMailing(TestMassMailCommon):
         )
         self.assertEqual(mailing.ignored, 3)
 
+    @users('user_marketing')
+    def test_mailing_w_seenlist(self):
+        """
+        Tests whether function `_get_seen_list` is correctly able to identify duplicate emails,
+        even through different batches.
+        Mails use different names to make sure they are recognized as duplicates even without being
+        normalized (e.g.: '"jc" <0@example.com>' and '"vd" <0@example.com>' are duplicates)
+        """
+        BATCH_SIZE = 5
+        names = ['jc', 'vd']
+        emails = [f'test.{i}@example.com' for i in range(BATCH_SIZE)]
+        records = self.env['mailing.test.partner'].create([{
+            'name': f'test_duplicates {i}', 'email_from': f'"{names[i % 2]}" <{emails[i % BATCH_SIZE]}>'
+        } for i in range(20)])
+
+        mailing = self.env['mailing.mailing'].create({
+            'mailing_domain': [('name', 'ilike', 'test_duplicates %')],
+            'mailing_model_id': self.env.ref('test_mass_mailing.model_mailing_test_partner').id,
+            'name': 'test duplicates',
+            'subject': 'test duplicates',
+        })
+
+        with self.mock_mail_gateway():
+            for i in range(0, 20, BATCH_SIZE):
+                mailing.action_send_mail(records[i:i + BATCH_SIZE].mapped('id'))
+            self.assertEqual(len(self._mails), BATCH_SIZE)
+            self.assertEqual(mailing.ignored, 15)
+            mails_sent = [email_normalize(mail['email_to'][0]) for mail in self._mails]
+            for email in emails:
+                self.assertEqual(mails_sent.count(email), 1)
+
     @users('user_marketing')
     def test_mailing_w_seenlist_unstored_partner(self):
         """ Test seen list when partners are not stored. """
-- 
GitLab