Skip to content
Snippets Groups Projects
Commit 983d5eb9 authored by Thibault Delavallée's avatar Thibault Delavallée
Browse files

[FIX] tools: html_email_clean: fixed regex for

signature that was buggy when having dots.
Also fixed read more link addition.

Added test case that triggered the error.

bzr revid: tde@openerp.com-20131016103516-w44j6r5oaljpwvmx
parent 54f74096
No related branches found
No related tags found
No related merge requests found
......@@ -223,6 +223,13 @@ class TestCleaner(unittest2.TestCase):
for ext in test_mail_examples.THUNDERBIRD_1_OUT:
self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
def test_70_read_more(self):
new_html = html_email_clean(test_mail_examples.BUG1, remove=True, shorten=True, max_length=100)
for ext in test_mail_examples.BUG_1_IN:
self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed valid content')
for ext in test_mail_examples.BUG_1_OUT:
self.assertNotIn(ext, new_html, 'html_email_cleaner did not removed invalid content')
def test_90_misc(self):
# False boolean for text must return empty string
new_html = html_email_clean(False)
......
......@@ -637,3 +637,59 @@ MSOFFICE_3 = """<div>
MSOFFICE_3_IN = ['I saw your boss yesterday']
MSOFFICE_3_OUT = ['I noticed you recently downloaded OpenERP.', 'You indicated that you wish', 'Belgium: +32.81.81.37.00']
# ------------------------------------------------------------
# Test cases coming from bugs
# ------------------------------------------------------------
# bug: read more not apparent, strange message in read more span
BUG1 = """<pre>Hi Migration Team,
Paragraph 1, blah blah blah blah blah blah blah blah blah blah blah blah
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
blah blah blah blah blah blah blah blah.
Paragraph 2, blah blah blah blah blah blah blah blah blah blah blah blah
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
blah blah blah blah blah blah blah blah.
Paragraph 3, blah blah blah blah blah blah blah blah blah blah blah blah
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
blah blah blah blah blah blah blah blah.
Thanks.
Regards,
--
Olivier Laurent
Migration Manager
OpenERP SA
Chaussée de Namur, 40
B-1367 Gérompont
Tel: +32.81.81.37.00
Web: http://www.openerp.com</pre>"""
BUG_1_IN = [
'Hi Migration Team',
'Paragraph 1'
]
BUG_1_OUT = [
'Olivier Laurent',
'Chaussée de Namur',
'81.81.37.00',
'openerp.com',
]
......@@ -206,7 +206,7 @@ def html_email_clean(html, remove=False, shorten=False, max_length=300):
# form node and tag text-based quotes and signature
quote_tags = re.compile(r'(\n(>)+[^\n\r]*)')
signature = re.compile(r'([-]{2,}[\s]?[\r\n]{1,2}[^.]+)')
signature = re.compile(r'([-]{2,}[\s]?[\r\n]{1,2}[\s\S]+)')
for node in root.getiterator():
_tag_matching_regex_in_text(quote_tags, node, 'span', {'text_quote': '1'})
_tag_matching_regex_in_text(signature, node, 'span', {'text_signature': '1'})
......@@ -263,8 +263,7 @@ def html_email_clean(html, remove=False, shorten=False, max_length=300):
# create outertext node
new_node = _create_node('span', outertext[stop_idx:])
# add newly created nodes in dom
node.addnext(new_node)
node.addnext(read_more_node)
node.append(read_more_node)
# tag node
new_node.set('in_overlength', '1')
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment