Skip to content
Snippets Groups Projects
Commit a70327da authored by Thibault Delavallée's avatar Thibault Delavallée
Browse files

[IMP] tools, base, mail: better support non-ascii / IDNA when normalizing

PURPOSE

Be defensive when dealing with email fields, notably when having multi-emails
or email field containing an already-formatted email.

SPECIFICATIONS

As of rfc5322 section 3.4.1 local-part is case-sensitive. However most main
providers do consider the local-part as case insensitive. With the introduction
of smtp-utf8 within odoo, this assumption is certain to fall short for
international emails. We now consider that

  * if local part is ascii: normalize still 'lower' ;
  * else: use as it, SMTP-UF8 is made for non-ascii local parts;

Concerning domain part of the address, as of v14 international domain (IDNA)
are handled fine. The domain is always lowercase, lowering it is fine as it
is probably an error. With the introduction of IDNA, there is an encoding
that allow non-ascii characters to be encoded to ascii ones, using 'idna.encode'.

Also remove usage of 'email_re' in mailing email check. It is too restrictive
compared to real formatting we support (or try to). Valid outgoing emails
were directly canceled, notably when containing unicode.

Task-2612945 (Mail: Defensive email formatting)

Part-of: odoo/odoo#74474
parent 516ccbc9
No related branches found
No related tags found
No related merge requests found
......@@ -65,7 +65,7 @@ class MailComposeMessage(models.TransientModel):
partner_id = (mail_values.get('recipient_ids') or [(False, '')])[0][1]
mail_to = tools.email_normalize(partners_email.get(partner_id), force_single=False)
if (opt_out_list and mail_to in opt_out_list) or (seen_list and mail_to in seen_list) \
or (not mail_to or not email_re.findall(mail_to)):
or not mail_to:
# prevent sending to blocked addresses that were included by mistake
mail_values['state'] = 'cancel'
elif seen_list is not None:
......
......@@ -214,9 +214,11 @@ class TestMassMailing(TestMassMailCommon):
'partner': customer_fmt,
'state': 'sent'},
{'email': 'test.customer.😊@example.com',
# mail to avoids double encapsulation
'email_to_recipients': [[f'"{customer_unic.name}" <test.customer.😊@example.com>']],
'failure_type': False,
'partner': customer_unic,
'state': 'ignored'}, # email_re usage forbids mailing to unicode
'state': 'sent'},
{'email': 'test.customer.case@example.com',
'email_to_recipients': [[f'"{customer_case.name}" <test.customer.case@example.com>']],
'failure_type': False,
......@@ -242,7 +244,7 @@ class TestMassMailing(TestMassMailCommon):
'state': 'sent'},
{'email': 'record.😊@example.com',
'failure_type': False,
'state': 'ignored'}, # email_re usage forbids mailing to unicode
'state': 'sent'},
],
mailing,
test_records,
......
......@@ -397,6 +397,8 @@ class TestEmailTools(BaseCase):
""" Test 'email_normalize'. Note that it is built on 'email_split' so
some use cases are already managed in 'test_email_split(_and_format)'
hence having more specific test cases here about normalization itself. """
format_name = 'My Super Prénom'
format_name_ascii = '=?utf-8?b?TXkgU3VwZXIgUHLDqW5vbQ==?='
sources = [
'"Super Déboulonneur" <deboulonneur@example.com>', # formatted
'Déboulonneur deboulonneur@example.com', # wrong formatting
......@@ -409,17 +411,42 @@ class TestEmailTools(BaseCase):
]
expected_list = [
'deboulonneur@example.com',
'déboulonneur deboulonneur@example.com',
'Déboulonneur deboulonneur@example.com',
'deboulonneur@example.comdéboulonneur',
False,
'@example.com', # funny
'deboulonneur.😊@example.com',
'déboulonneur@examplé.com',
'déboulonneur@examplé.com',
'DéBoulonneur@examplé.com',
]
for source, expected in zip(sources, expected_list):
expected_fmt_utf8_list = [
f'"{format_name}" <deboulonneur@example.com>',
f'"{format_name}" <Déboulonneur deboulonneur@example.com>',
f'"{format_name}" <deboulonneur@example.comdéboulonneur>',
f'"{format_name}" <@>',
f'"{format_name}" <@example.com>',
f'"{format_name}" <deboulonneur.😊@example.com>',
f'"{format_name}" <déboulonneur@examplé.com>',
f'"{format_name}" <DéBoulonneur@examplé.com>',
]
expected_fmt_ascii_list = [
f'{format_name_ascii} <deboulonneur@example.com>',
f'{format_name_ascii} <Déboulonneur deboulonneur@example.com>',
f'{format_name_ascii} <deboulonneur@example.xn--comdboulonneur-ekb>',
f'{format_name_ascii} <@>',
f'{format_name_ascii} <@example.com>',
f'{format_name_ascii} <deboulonneur.😊@example.com>',
f'{format_name_ascii} <déboulonneur@xn--exampl-gva.com>',
f'{format_name_ascii} <DéBoulonneur@xn--exampl-gva.com>',
]
for source, expected, expected_utf8_fmt, expected_ascii_fmt in zip(sources, expected_list, expected_fmt_utf8_list, expected_fmt_ascii_list):
with self.subTest(source=source):
self.assertEqual(email_normalize(source, force_single=True), expected)
# standard usage of formataddr
self.assertEqual(formataddr((format_name, (expected or '')), charset='utf-8'), expected_utf8_fmt)
# check using INDA at format time, using ascii charset as done when
# sending emails (see extract_rfc2822_addresses)
self.assertEqual(formataddr((format_name, (expected or '')), charset='ascii'), expected_ascii_fmt)
def test_email_split(self):
""" Test 'email_split' """
......
......@@ -459,7 +459,6 @@ mail_header_msgid_re = re.compile('<[^<>]+>')
email_addr_escapes_re = re.compile(r'[\\"]')
def generate_tracking_message_id(res_id):
"""Returns a string that can be used in the Message-ID RFC822 header field
......@@ -538,14 +537,26 @@ def email_split_and_format(text):
return [formataddr((name, email)) for (name, email) in email_split_tuples(text)]
def email_normalize(text, force_single=True):
""" Sanitize and standardize email address entries.
A normalized email is considered as :
- having a left part + @ + a right part (the domain can be without '.something')
- being lower case
- having no name before the address. Typically, having no 'Name <>'
Ex:
- Possible Input Email : 'Name <NaMe@DoMaIn.CoM>'
- Normalized Output Email : 'name@domain.com'
""" Sanitize and standardize email address entries. As of rfc5322 section
3.4.1 local-part is case-sensitive. However most main providers do consider
the local-part as case insensitive. With the introduction of smtp-utf8
within odoo, this assumption is certain to fall short for international
emails. We now consider that
* if local part is ascii: normalize still 'lower' ;
* else: use as it, SMTP-UF8 is made for non-ascii local parts;
Concerning domain part of the address, as of v14 international domain (IDNA)
are handled fine. The domain is always lowercase, lowering it is fine as it
is probably an error. With the introduction of IDNA, there is an encoding
that allow non-ascii characters to be encoded to ascii ones, using 'idna.encode'.
A normalized email is considered as :
- having a left part + @ + a right part (the domain can be without '.something')
- having no name before the address. Typically, having no 'Name <>'
Ex:
- Possible Input Email : 'Name <NaMe@DoMaIn.CoM>'
- Normalized Output Email : 'name@domain.com'
:param boolean force_single: if True, text should contain a single email
(default behavior in stable 14+). If more than one email is found no
......@@ -556,7 +567,16 @@ def email_normalize(text, force_single=True):
emails = email_split(text)
if not emails or (len(emails) != 1 and force_single):
return False
return emails[0].lower()
local_part, at, domain = emails[0].rpartition('@')
try:
local_part.encode('ascii')
except UnicodeEncodeError:
pass
else:
local_part = local_part.lower()
return local_part + at + domain.lower()
def email_normalize_all(text):
""" Tool method allowing to extract email addresses from a text input and returning
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment