[FIX] hr_work_entry: improve query and add indices

The query below https://github.com/odoo/odoo/blob/81497125d8100c6bdd2dc30434232a88a419a3e3/addons/hr_work_entry/models/hr_work_entry.py#L92-L115 has bad performance without the bespoken indices on `date_start` and `date_stop`. We can speed it up more with an index on `employee_id`. This is not enough for DBs with many work entries (500K+), specially during upgrades. Here we optimize the query to take into account only the work entries being modified. This issue was observed during an upgrade saas~12.3->13.0 where the payslip recomputation never ends due to the increased amount of hr work entries created. Note how the first 1K payslips are processed in 1 hour (~16 payslips per minute), while the latest 3 (before the upgrade was killed) took 1 min. ``` 2021-11-02 21:05:44,403 2229 INFO db_42897 odoo.modules.migration: module hr_payroll: Running migration [$saas~12.4.1.0] end-compute-amount 2021-11-02 21:06:44,577 2229 INFO db_42897 odoo.upgrade: [1.61%] 1120/69635 payslip processed in 0:01:00.036716 (total estimated time: 1:02:12.729213) 2021-11-02 21:07:44,602 2229 INFO db_42897 odoo.upgrade: [2.66%] 1853/69635 payslip processed in 0:02:00.062972 (total estimated time: 1:15:11.918540) ... 2021-11-05 09:59:46,565 2229 INFO db_42897 odoo.upgrade: [47.95%] 33390/69635 payslip processed in 2 days, 12:54:02.025479 (total estimated time: 5 days, 7:00:30.261882) 2021-11-05 10:01:04,990 2229 INFO db_42897 odoo.upgrade: [47.95%] 33393/69635 payslip processed in 2 days, 12:55:20.450549 (total estimated time: 5 days, 7:02:32.725840) ``` opw-2672031 closes odoo/odoo#80857 X-original-commit: 95d3145a Signed-off-by: Nicolas Seinlet (nse) <nse@odoo.com>

[FIX] hr_work_entry: improve query and add indices
7b2889fe · Alvaro Fuentes · 1e04e1bf · 7b2889fe
Commit 7b2889fe authored 3 years ago by Alvaro Fuentes
--- a/addons/hr_work_entry/models/hr_work_entry.py
+++ b/addons/hr_work_entry/models/hr_work_entry.py
@@ -3,9 +3,10 @@

 from contextlib import contextmanager
 from dateutil.relativedelta import relativedelta
+import itertools
 from psycopg2 import OperationalError

-from odoo import api, fields, models
+from odoo import api, fields, models, tools


 class HrWorkEntry(models.Model):
@@ -15,7 +16,7 @@ class HrWorkEntry(models.Model):

    name = fields.Char(required=True)
    active = fields.Boolean(default=True)
-    employee_id = fields.Many2one('hr.employee', required=True, domain="['|', ('company_id', '=', False), ('company_id', '=', company_id)]")
+    employee_id = fields.Many2one('hr.employee', required=True, domain="['|', ('company_id', '=', False), ('company_id', '=', company_id)]", index=True)
    date_start = fields.Datetime(required=True, string='From')
    date_stop = fields.Datetime(compute='_compute_date_stop', store=True, readonly=False, string='To')
    duration = fields.Float(compute='_compute_duration', store=True, string="Period")
@@ -36,6 +37,9 @@ class HrWorkEntry(models.Model):
        ('_work_entry_start_before_end', 'check (date_stop > date_start)', 'Starting time should be before end time.')
    ]

+    def init(self):
+        tools.create_index(self._cr, "hr_work_entry_date_start_date_stop_index", self._table, ["date_start", "date_stop"])
+
    @api.depends('state')
    def _compute_conflict(self):
        for rec in self:
@@ -78,11 +82,11 @@ class HrWorkEntry(models.Model):
        conflict = self._mark_conflicting_work_entries(min(self.mapped('date_start')), max(self.mapped('date_stop')))
        return undefined_type or conflict

-    @api.model
    def _mark_conflicting_work_entries(self, start, stop):
        """
        Set `state` to `conflict` for overlapping work entries
        between two dates.
+        If `self.ids` is truthy then check conflicts with the corresponding work entries.
        Return True if overlapping work entries were detected.
        """
        # Use the postgresql range type `tsrange` which is a range of timestamp
@@ -92,26 +96,21 @@ class HrWorkEntry(models.Model):
        # limit the resulting set size and fasten the query.
        self.flush(['date_start', 'date_stop', 'employee_id', 'active'])
        query = """
-            SELECT b1.id
-            FROM hr_work_entry b1
-            WHERE
-            b1.date_start <= %s
-            AND b1.date_stop >= %s
-            AND active = TRUE
-            AND EXISTS (
-                SELECT 1
-                FROM hr_work_entry b2
-                WHERE
-                    b2.date_start <= %s
-                    AND b2.date_stop >= %s
-                    AND active = TRUE
-                    AND tsrange(b1.date_start, b1.date_stop, '()') && tsrange(b2.date_start, b2.date_stop, '()')
-                    AND b1.id <> b2.id
-                    AND b1.employee_id = b2.employee_id
-            );
-        """
-        self.env.cr.execute(query, (stop, start, stop, start))
-        conflicts = [res.get('id') for res in self.env.cr.dictfetchall()]
+            SELECT b1.id,
+                   b2.id
+              FROM hr_work_entry b1
+              JOIN hr_work_entry b2
+                ON b1.employee_id = b2.employee_id
+               AND b1.id <> b2.id
+             WHERE b1.date_start <= %(stop)s
+               AND b1.date_stop >= %(start)s
+               AND b1.active = TRUE
+               AND b2.active = TRUE
+               AND tsrange(b1.date_start, b1.date_stop, '()') && tsrange(b2.date_start, b2.date_stop, '()')
+               AND {}
+        """.format("b2.id IN %(ids)s" if self.ids else "b2.date_start <= %(stop)s AND b2.date_stop >= %(start)s")
+        self.env.cr.execute(query, {"stop": stop, "start": start, "ids": tuple(self.ids)})
+        conflicts = set(itertools.chain.from_iterable(self.env.cr.fetchall()))
        self.browse(conflicts).write({
            'state': 'conflict',
        })