From 34b6259b28eb963f6c5660c0e289219b6761645b Mon Sep 17 00:00:00 2001
From: Stanislas Sobieski <sts@odoo.com>
Date: Thu, 27 Apr 2023 11:32:06 +0000
Subject: [PATCH] [FIX] cloc: avoid memory issue on big file

Before this commit:
Files that should be ignored in the manifest but aren't (js library for example)
it can happen that files have huge lines, the regex to substract the
comments will overuse memory.
For example, a file of 13M with a line of more that 8M characters, the
memory consumptions peak at 1.7G

The results might be different, but it's an acceptable compromise

closes odoo/odoo#120111

X-original-commit: 0e56e4a7bd6de42d729441a53995ddd459d5e633
Signed-off-by: Thibault Francois <tfr@odoo.com>
---
 odoo/tools/cloc.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/odoo/tools/cloc.py b/odoo/tools/cloc.py
index acc1398465e4..762b17006df2 100644
--- a/odoo/tools/cloc.py
+++ b/odoo/tools/cloc.py
@@ -22,6 +22,7 @@ DEFAULT_EXCLUDE = [
 
 STANDARD_MODULES = ['web', 'web_enterprise', 'website_animate', 'base']
 MAX_FILE_SIZE = 25 * 2**20 # 25 MB
+MAX_LINE_SIZE = 100000
 
 class Cloc(object):
     def __init__(self):
@@ -61,6 +62,10 @@ class Cloc(object):
         # Based on https://stackoverflow.com/questions/241327
         s = s.strip() + "\n"
         total = s.count("\n")
+        # To avoid to use too much memory we don't try to count file
+        # with very large line, usually minified file
+        if max(len(l) for l in s.split('\n')) > MAX_LINE_SIZE:
+            return -1, "Max line size exceeded"
         def replacer(match):
             s = match.group(0)
             return " " if s.startswith('/') else s
-- 
GitLab