[FIX] website_slides: get video id from youtube url

The regexp used to get the youtube video id from its url is broken in multiple ways. If the video ID starts with the letter 'v' in the short (youtu.be) url then the url is considered invalid. i.e. https://youtu.be/vmhB-pt7EfA If the query option `v=<video id>` is not the first option of the query in the regular url (youtube.com/watch) then the url is also considered invalid. i.e. https://www.youtube.com/watch?feature=youtu.be&v=vmhB-pt7EfA The problem has been solved by replacing the over-engineered regexp by the dedicated parsing tools of `urllib.parse`. opw-2006330 closes odoo/odoo#34036 Signed-off-by: Julien Castiaux <Julien00859@users.noreply.github.com>

[FIX] website_slides: get video id from youtube url
cfa1e101 · Julien Castiaux · 38cc2952 · cfa1e101 · cfa1e101 · cfa1e101
Commit cfa1e101 authored 5 years ago by Julien Castiaux
--- a/addons/website_slides/models/slides.py
+++ b/addons/website_slides/models/slides.py
@@ -531,11 +531,16 @@ class Slide(models.Model):
        return result

    def _find_document_data_from_url(self, url):
-        expr = re.compile(r'^.*((youtu.be/)|(v/)|(\/u\/\w\/)|(embed\/)|(watch\?))\??v?=?([^#\&\?]*).*')
-        arg = expr.match(url)
-        document_id = arg and arg.group(7) or False
-        if document_id:
-            return ('youtube', document_id)
+        url_obj = urls.url_parse(url)
+        if url_obj.ascii_host == 'youtu.be':
+            return ('youtube', url_obj.path[1:] if url_obj.path else False)
+        elif url_obj.ascii_host in ('youtube.com', 'www.youtube.com', 'm.youtube.com'):
+            v_query_value = url_obj.decode_query().get('v')
+            if v_query_value:
+                return ('youtube', v_query_value)
+            split_path = url_obj.path.split('/')
+            if len(split_path) >= 3 and split_path[1] in ('v', 'embed'):
+                return ('youtube', split_path[2])

        expr = re.compile(r'(^https:\/\/docs.google.com|^https:\/\/drive.google.com).*\/d\/([^\/]*)')
        arg = expr.match(url)

--- a/addons/website_slides/tests/__init__.py
+++ b/addons/website_slides/tests/__init__.py
+from . import test_from_url
\ No newline at end of file
--- a/addons/website_slides/tests/test_from_url.py
+++ b/addons/website_slides/tests/test_from_url.py
+import odoo.tests
+
+
+class TestFromURL(odoo.tests.TransactionCase):
+    def test_youtube_urls(self):
+        urls = {
+            'W0JQcpGLSFw': [
+                'https://youtu.be/W0JQcpGLSFw',
+                'https://www.youtube.com/watch?v=W0JQcpGLSFw',
+                'https://www.youtube.com/watch?v=W0JQcpGLSFw&list=PL1-aSABtP6ACZuppkBqXFgzpNb2nVctZx',
+            ],
+            'vmhB-pt7EfA': [  # id starts with v, it is important
+                'https://youtu.be/vmhB-pt7EfA',
+                'https://www.youtube.com/watch?feature=youtu.be&v=vmhB-pt7EfA',
+                'https://www.youtube.com/watch?v=vmhB-pt7EfA&list=PL1-aSABtP6ACZuppkBqXFgzpNb2nVctZx&index=7',
+            ],
+            'hlhLv0GN1hA': [
+                'https://www.youtube.com/v/hlhLv0GN1hA',
+                'https://www.youtube.com/embed/hlhLv0GN1hA',
+                'https://m.youtube.com/watch?v=hlhLv0GN1hA'
+            ],
+        }
+
+        model = self.env['slide.slide']
+        for id, urls in urls.items():
+            for url in urls:
+                with self.subTest(url=url, id=id):
+                    document = model._find_document_data_from_url(url)
+                    self.assertEqual(document[0], 'youtube')
+                    self.assertEqual(document[1], id)