From b1d14ba642eea5bab574090fa32661b487533ec2 Mon Sep 17 00:00:00 2001
From: "Benjamin Hanquin (beha)" <beha@odoo.com>
Date: Thu, 3 Aug 2023 16:22:09 +0200
Subject: [PATCH] [IMP] website: filter out unpublished records for non
 internal users

Issue:
Search on website has a big performance issue because the database has
too many product_template.

Analyze :
The website search mechanism has hardcoded sql query. And the generic
query don't have any filtering. Which lead to make a expensive search
(similarity) on the whole product_template even if the products are
not published and then displayed to the user.

Fix :
Filter the is_published field in the generic website method when the
model has a 'is_published' field and the request is not done by a user
(thus for customers or portal users).

Note:
The internal user is still able to search on unpublished product.Thus
have no performance improvement.

Benchmark:
| SQL Query | # Input data | Before PR | After PR |
|:---------:|:------------:|:---------:|:--------:|
|General best_similarity |700000 products (161 published) | 12.65 s | 0.12 s |
|FROM ir_translation|2,796,000 ir_translation | 6.338 s (586k hit) | 0.091 s (185 hit) |

Related task:
task-3473786

closes odoo/odoo#130734

Signed-off-by: Romain Derie (rde) <rde@odoo.com>
---
 addons/website/models/website.py | 37 ++++++++++++++++++++++++++------
 1 file changed, 31 insertions(+), 6 deletions(-)

diff --git a/addons/website/models/website.py b/addons/website/models/website.py
index 99e2900844cc..03047483dc96 100644
--- a/addons/website/models/website.py
+++ b/addons/website/models/website.py
@@ -1752,6 +1752,18 @@ class Website(models.Model):
                 similarities=sql.SQL(', ').join(similarities)
             )
 
+            where_clause = sql.SQL("")
+            # Filter unpublished records for portal and public user for
+            # performance.
+            # TODO: Same for `active` field?
+            filter_is_published = (
+                'is_published' in model._fields
+                and model._fields['is_published'].base_field.model_name == model_name
+                and not self.env.user.has_group('base.group_user')
+            )
+            if filter_is_published:
+                where_clause = sql.SQL("WHERE is_published")
+
             from_clause = sql.SQL("FROM {table}").format(table=sql.Identifier(model._table))
             # Specific handling for fields being actually part of another model
             # through the `inherits` mechanism.
@@ -1770,12 +1782,14 @@ class Website(models.Model):
             query = sql.SQL("""
                 SELECT {table}.id, {best_similarity} AS _best_similarity
                 {from_clause}
+                {where_clause}
                 ORDER BY _best_similarity desc
                 LIMIT 1000
             """).format(
                 table=sql.Identifier(model._table),
                 best_similarity=best_similarity,
                 from_clause=from_clause,
+                where_clause=where_clause,
             )
             self.env.cr.execute(query, {'search': search})
             ids = {row[0] for row in self.env.cr.fetchall() if row[1] and row[1] >= similarity_threshold}
@@ -1811,20 +1825,31 @@ class Website(models.Model):
                         search=unaccent(sql.Placeholder('search')),
                         field=unaccent(sql.SQL('value'))
                     )
-                    names = ['%s,%s' % (model._name, field) for field in fields]
-                    query = sql.SQL("""
-                        SELECT res_id, {similarity} AS _similarity
-                        FROM ir_translation
+                    where_clause = """
                         WHERE lang = {lang}
                         AND name = ANY({names})
                         AND type = 'model'
                         AND value IS NOT NULL
+                    """
+                    if filter_is_published:
+                        # TODO: This should also filter out unpublished records
+                        # if this `is_published` field is not part of the model
+                        # table directly but part of an `inherits` table.
+                        where_clause += f"AND res_id in (SELECT id FROM {model._table} WHERE is_published) "
+                    where_clause = sql.SQL(where_clause).format(
+                        lang=sql.Placeholder('lang'),
+                        names=sql.Placeholder('names'),
+                    )
+                    names = ['%s,%s' % (model._name, field) for field in fields]
+                    query = sql.SQL("""
+                        SELECT res_id, {similarity} AS _similarity
+                        FROM ir_translation
+                        {where_clause}
                         ORDER BY _similarity desc
                         LIMIT 1000
                     """).format(
                         similarity=similarity,
-                        lang=sql.Placeholder('lang'),
-                        names=sql.Placeholder('names'),
+                        where_clause=where_clause,
                     )
                 self.env.cr.execute(query, {'lang': self.env.lang, 'names': names, 'search': search})
                 ids.update(row[0] for row in self.env.cr.fetchall() if row[1] and row[1] >= similarity_threshold)
-- 
GitLab