[IMP] [base, account_facturx]: Add PDF/A(-3B) support

Improve the factur-x export in two ways: make the exported PDF PDF/A-3B compliant, and add the factur-x XMP metadata inside the file. The added .ICC profile comes from https://www.color.org/srgbprofiles.xalter License terms can be found here: https://www.color.org/profiles2.xalter#license Task id # 2668919 closes odoo/odoo#78974 Signed-off-by: Laurent Smet <las@openerp.com>

[IMP] [base, account_facturx]: Add PDF/A(-3B) support
60e9632a · Nicolas (vin) · 79a95b0a · 60e9632a · 60e9632a · 60e9632a
Commit 60e9632a authored 3 years ago by Nicolas (vin)
--- a/addons/account_facturx/data/facturx_templates.xml
+++ b/addons/account_facturx/data/facturx_templates.xml
@@ -231,5 +231,88 @@
                </rsm:SupplyChainTradeTransaction>
            </rsm:CrossIndustryInvoice>
        </template>
+
+        <template id="account_invoice_pdfa_3_facturx_metadata">
+            <x:xmpmeta xmlns:x="adobe:ns:meta/">
+                <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+                    <rdf:Description xmlns:pdfaid="http://www.aiim.org/pdfa/ns/id/" rdf:about="">
+                        <pdfaid:part>3</pdfaid:part>
+                        <pdfaid:conformance>B</pdfaid:conformance>
+                    </rdf:Description>
+                    <rdf:Description xmlns:dc="http://purl.org/dc/elements/1.1/" rdf:about="">
+                        <dc:title>
+                            <rdf:Alt>
+                                <rdf:li t-att="{'xml:lang': 'x-default'}" t-esc="title"/>
+                            </rdf:Alt>
+                        </dc:title>
+                        <dc:creator>
+                            <rdf:Seq>
+                                <rdf:li>Odoo</rdf:li>
+                            </rdf:Seq>
+                        </dc:creator>
+                        <dc:description>
+                            <rdf:Alt>
+                                <rdf:li t-att="{'xml:lang': 'x-default'}">Invoice generated by Odoo</rdf:li>
+                            </rdf:Alt>
+                        </dc:description>
+                    </rdf:Description>
+                    <rdf:Description xmlns:pdf="http://ns.adobe.com/pdf/1.3/" rdf:about="">
+                        <pdf:Producer>Odoo</pdf:Producer>
+                    </rdf:Description>
+                    <rdf:Description xmlns:xmp="http://ns.adobe.com/xap/1.0/" rdf:about="">
+                        <xmp:CreatorTool>Odoo</xmp:CreatorTool>
+                        <xmp:CreateDate t-esc="date"/>
+                        <xmp:ModifyDate t-esc="date"/>
+                    </rdf:Description>
+                    <rdf:Description xmlns:pdfaExtension="http://www.aiim.org/pdfa/ns/extension/"
+                                     xmlns:pdfaSchema="http://www.aiim.org/pdfa/ns/schema#"
+                                     xmlns:pdfaProperty="http://www.aiim.org/pdfa/ns/property#" rdf:about="">
+                        <pdfaExtension:schemas>
+                            <rdf:Bag>
+                                <rdf:li rdf:parseType="Resource">
+                                    <pdfaSchema:schema>Factur-X PDFA Extension Schema</pdfaSchema:schema>
+                                    <pdfaSchema:namespaceURI>urn:factur-x:pdfa:CrossIndustryDocument:invoice:1p0#</pdfaSchema:namespaceURI>
+                                    <pdfaSchema:prefix>fx</pdfaSchema:prefix>
+                                    <pdfaSchema:property>
+                                        <rdf:Seq>
+                                            <rdf:li rdf:parseType="Resource">
+                                                <pdfaProperty:name>DocumentFileName</pdfaProperty:name>
+                                                <pdfaProperty:valueType>Text</pdfaProperty:valueType>
+                                                <pdfaProperty:category>external</pdfaProperty:category>
+                                                <pdfaProperty:description>name of the embedded XML invoice file</pdfaProperty:description>
+                                            </rdf:li>
+                                            <rdf:li rdf:parseType="Resource">
+                                                <pdfaProperty:name>DocumentType</pdfaProperty:name>
+                                                <pdfaProperty:valueType>Text</pdfaProperty:valueType>
+                                                <pdfaProperty:category>external</pdfaProperty:category>
+                                                <pdfaProperty:description>INVOICE</pdfaProperty:description>
+                                            </rdf:li>
+                                            <rdf:li rdf:parseType="Resource">
+                                                <pdfaProperty:name>Version</pdfaProperty:name>
+                                                <pdfaProperty:valueType>Text</pdfaProperty:valueType>
+                                                <pdfaProperty:category>external</pdfaProperty:category>
+                                                <pdfaProperty:description>The actual version of the Factur-X XML schema</pdfaProperty:description>
+                                            </rdf:li>
+                                            <rdf:li rdf:parseType="Resource">
+                                                <pdfaProperty:name>ConformanceLevel</pdfaProperty:name>
+                                                <pdfaProperty:valueType>Text</pdfaProperty:valueType>
+                                                <pdfaProperty:category>external</pdfaProperty:category>
+                                                <pdfaProperty:description>The conformance level of the embedded Factur-X data</pdfaProperty:description>
+                                            </rdf:li>
+                                        </rdf:Seq>
+                                    </pdfaSchema:property>
+                                </rdf:li>
+                            </rdf:Bag>
+                        </pdfaExtension:schemas>
+                    </rdf:Description>
+                    <rdf:Description xmlns:fx="urn:factur-x:pdfa:CrossIndustryDocument:invoice:1p0#" rdf:about="">
+                        <fx:ConformanceLevel>EN 16931</fx:ConformanceLevel>
+                        <fx:DocumentFileName>factur-x.xml</fx:DocumentFileName>
+                        <fx:DocumentType>INVOICE</fx:DocumentType>
+                        <fx:Version>1.0</fx:Version>
+                    </rdf:Description>
+                </rdf:RDF>
+            </x:xmpmeta>
+        </template>
    </data>
 </odoo>
--- a/addons/account_facturx/models/ir_actions_report.py
+++ b/addons/account_facturx/models/ir_actions_report.py
 # -*- coding: utf-8 -*-
+from io import BytesIO
+from logging import getLogger
+from PyPDF2 import PdfFileReader

-from odoo import models, fields, api, _
+from odoo import fields, models
+from odoo import tools
+from odoo.tools.pdf import OdooPdfFileWriter

-from PyPDF2 import PdfFileWriter, PdfFileReader
-
-import io
+_logger = getLogger(__name__)


 class IrActionsReport(models.Model):
@@ -17,16 +20,31 @@ class IrActionsReport(models.Model):
            if invoice.is_sale_document() and invoice.state != 'draft':
                xml_content = invoice._export_as_facturx_xml()

-                # Add attachment.
-                reader_buffer = io.BytesIO(pdf_content)
+                reader_buffer = BytesIO(pdf_content)
                reader = PdfFileReader(reader_buffer)
-                writer = PdfFileWriter()
+                writer = OdooPdfFileWriter()
                writer.cloneReaderDocumentRoot(reader)
-                writer.addAttachment('factur-x.xml', xml_content)
-                buffer = io.BytesIO()
+
+                if tools.str2bool(self.env['ir.config_parameter'].sudo().get_param('edi.use_pdfa', 'False')):
+                    try:
+                        writer.convert_to_pdfa()
+                    except Exception as e:
+                        _logger.exception("Error while converting to PDF/A: %s", e)
+
+                    metadata_template = self.env.ref('account_facturx.account_invoice_pdfa_3_facturx_metadata', False)
+                    if metadata_template:
+                        metadata_content = metadata_template.render({
+                            'title': invoice.name,
+                            'date': fields.Date.context_today(self),
+                        })
+                        writer.add_file_metadata(metadata_content)
+
+                writer.addAttachment('factur-x.xml', xml_content, '/application#2Fxml')
+
+                buffer = BytesIO()
                writer.write(buffer)
                pdf_content = buffer.getvalue()
-
-                reader_buffer.close()
                buffer.close()
+                reader_buffer.close()
+
        return super(IrActionsReport, self)._post_pdf(save_in_attachment, pdf_content=pdf_content, res_ids=res_ids)
--- a/odoo/tools/data/files/sRGB2014.icc
+++ b/odoo/tools/data/files/sRGB2014.icc
--- a/odoo/tools/data/files/sRGB2014.icc.LICENSE
+++ b/odoo/tools/data/files/sRGB2014.icc.LICENSE
+Copyright (c) 2015 International Color Consortium
+
+This profile is made available by the International Color Consortium, and may be copied, distributed, embedded, made,
+used, and sold without restriction. Altered versions of this profile shall have the original identification and
+copyright information removed and shall not be misrepresented as the original profile.
--- a/odoo/tools/pdf.py
+++ b/odoo/tools/pdf.py
 # -*- coding: utf-8 -*-
 # Part of Odoo. See LICENSE file for full copyright and licensing details.
 import io
+
+from datetime import datetime
+from hashlib import md5
+from logging import getLogger
 from PyPDF2 import PdfFileWriter, PdfFileReader
+from PyPDF2.generic import DictionaryObject, NameObject, ArrayObject, DecodedStreamObject, NumberObject, createStringObject, ByteStringObject
+from zlib import compress, decompress
+
+try:
+    from fontTools.ttLib import TTFont
+except ImportError:
+    TTFont = None
+
+from odoo.tools.misc import file_open
+
+_logger = getLogger(__name__)
+DEFAULT_PDF_DATETIME_FORMAT = "D:%Y%m%d%H%M%S+00'00'"


 def merge_pdf(pdf_data):
@@ -19,3 +35,228 @@ def merge_pdf(pdf_data):
    merged_pdf = _buffer.getvalue()
    _buffer.close()
    return merged_pdf
+
+
+class OdooPdfFileWriter(PdfFileWriter):
+
+    def __init__(self, *args, **kwargs):
+        """
+        Override of the init to initialise additional variables.
+        :param pdf_content: if given, will initialise the reader with the pdf content.
+        """
+        super().__init__(*args, **kwargs)
+        self._reader = None
+
+    def addAttachment(self, name, data, subtype=""):
+        """
+        Add an attachment to the pdf. Supports adding multiple attachment, while respecting PDF/A rules.
+        :param name: The name of the attachement
+        :param data: The data of the attachement
+        :param subtype: The mime-type of the attachement. This is required by PDF/A, but not essential otherwise.
+        It should take the form of "/xxx%2Fxxx". E.g. for "text/xml": "/text%2Fxml"
+        """
+        attachment = self._create_attachment_object({
+            'filename': name,
+            'content': data,
+            'subtype': subtype,
+        })
+        if self._root_object.get('/Names') and self._root_object['/Names'].get('/EmbeddedFiles'):
+            names_array = self._root_object["/Names"]["/EmbeddedFiles"]["/Names"]
+            names_array.extend([attachment.getObject()['/F'], attachment])
+        else:
+            names_array = ArrayObject()
+            names_array.extend([attachment.getObject()['/F'], attachment])
+
+            embedded_files_names_dictionary = DictionaryObject()
+            embedded_files_names_dictionary.update({
+                NameObject("/Names"): names_array
+            })
+            embedded_files_dictionary = DictionaryObject()
+            embedded_files_dictionary.update({
+                NameObject("/EmbeddedFiles"): embedded_files_names_dictionary
+            })
+            self._root_object.update({
+                NameObject("/Names"): embedded_files_dictionary
+            })
+
+        if self._root_object.get('/AF'):
+            attachment_array = self._root_object['/AF']
+            attachment_array.extend([attachment])
+        else:
+            # Create a new object containing an array referencing embedded file
+            # And reference this array in the root catalogue
+            attachment_array = self._addObject(ArrayObject([attachment]))
+            self._root_object.update({
+                NameObject("/AF"): attachment_array
+            })
+
+    def cloneReaderDocumentRoot(self, reader):
+        super().cloneReaderDocumentRoot(reader)
+        self._reader = reader
+        # Try to read the header coming in, and reuse it in our new PDF
+        # This is done in order to allows modifying PDF/A files after creating them (as PyPDF does not read it)
+        stream = reader.stream
+        stream.seek(0)
+        header = stream.readlines(9)
+        # Should always be true, the first line of a pdf should have 9 bytes (%PDF-1.x plus a newline)
+        if len(header) == 1:
+            # If we found a header, set it back to the new pdf
+            self._header = header[0]
+            # Also check the second line. If it is PDF/A, it should be a line starting by % following by four bytes + \n
+            second_line = stream.readlines(1)[0]
+            if second_line.decode('latin-1')[0] == '%' and len(second_line) == 6:
+                self._header += second_line
+        # Look if we have an ID in the incoming stream and use it.
+        pdf_id = reader.trailer.get('/ID', None)
+        if pdf_id:
+            self._ID = pdf_id
+
+    def convert_to_pdfa(self):
+        """
+        Transform the opened PDF file into a PDF/A compliant file
+        """
+        # Set the PDF version to 1.7 (as PDF/A-3 is based on version 1.7) and make it PDF/A compliant.
+        # See https://github.com/veraPDF/veraPDF-validation-profiles/wiki/PDFA-Parts-2-and-3-rules#rule-612-1
+
+        # " The file header shall begin at byte zero and shall consist of "%PDF-1.n" followed by a single EOL marker,
+        # where 'n' is a single digit number between 0 (30h) and 7 (37h) "
+        # " The aforementioned EOL marker shall be immediately followed by a % (25h) character followed by at least four
+        # bytes, each of whose encoded byte values shall have a decimal value greater than 127 "
+        self._header = b"%PDF-1.7\n%\xFF\xFF\xFF\xFF"
+
+        # Add a document ID to the trailer. This is only needed when using encryption with regular PDF, but is required
+        # when using PDF/A
+        pdf_id = ByteStringObject(md5(self._reader.stream.getvalue()).digest())
+        # The first string is based on the content at the time of creating the file, while the second is based on the
+        # content of the file when it was last updated. When creating a PDF, both are set to the same value.
+        self._ID = ArrayObject((pdf_id, pdf_id))
+
+        with file_open('data/files/sRGB2014.icc', subdir='tools', mode='rb') as icc_profile:
+            icc_profile_file_data = compress(icc_profile.read())
+
+        icc_profile_stream_obj = DecodedStreamObject()
+        icc_profile_stream_obj.setData(icc_profile_file_data)
+        icc_profile_stream_obj.update({
+            NameObject("/Filter"): NameObject("/FlateDecode"),
+            NameObject("/N"): NumberObject(3),
+            NameObject("/Length"): NameObject(str(len(icc_profile_file_data))),
+        })
+
+        icc_profile_obj = self._addObject(icc_profile_stream_obj)
+
+        output_intent_dict_obj = DictionaryObject()
+        output_intent_dict_obj.update({
+            NameObject("/S"): NameObject("/GTS_PDFA1"),
+            NameObject("/OutputConditionIdentifier"): createStringObject("sRGB"),
+            NameObject("/DestOutputProfile"): icc_profile_obj,
+            NameObject("/Type"): NameObject("/OutputIntent"),
+        })
+
+        output_intent_obj = self._addObject(output_intent_dict_obj)
+        self._root_object.update({
+            NameObject("/OutputIntents"): ArrayObject([output_intent_obj]),
+        })
+
+        pages = self._root_object['/Pages']['/Kids']
+
+        # PDF/A needs the glyphs width array embedded in the pdf to be consistent with the ones from the font file.
+        # But it seems like it is not the case when exporting from wkhtmltopdf.
+        if TTFont:
+            fonts = {}
+            # First browse through all the pages of the pdf file, to get a reference to all the fonts used in the PDF.
+            for page in pages:
+                for font in page.getObject()['/Resources']['/Font'].values():
+                    for descendant in font.getObject()['/DescendantFonts']:
+                        fonts[descendant.idnum] = descendant.getObject()
+
+            # Then for each font, rewrite the width array with the information taken directly from the font file.
+            # The new width are calculated such as width = round(1000 * font_glyph_width / font_units_per_em)
+            # See: http://martin.hoppenheit.info/blog/2018/pdfa-validation-and-inconsistent-glyph-width-information/
+            for font in fonts.values():
+                font_file = font['/FontDescriptor']['/FontFile2']
+                stream = io.BytesIO(decompress(font_file._data))
+                ttfont = TTFont(stream)
+                font_upm = ttfont['head'].unitsPerEm
+                glyphs = ttfont.getGlyphSet()._hmtx.metrics
+                glyph_widths = []
+                for key, values in glyphs.items():
+                    if key[:5] == 'glyph':
+                        glyph_widths.append(NumberObject(round(1000.0 * values[0] / font_upm)))
+
+                font[NameObject('/W')] = ArrayObject([NumberObject(1), ArrayObject(glyph_widths)])
+                stream.close()
+        else:
+            _logger.warning('The fonttools package is not installed. Generated PDF may not be PDF/A compliant.')
+
+        outlines = self._root_object['/Outlines'].getObject()
+        outlines[NameObject('/Count')] = NumberObject(1)
+
+        # Set odoo as producer
+        self.addMetadata({
+            '/Creator': "Odoo",
+            '/Producer': "Odoo",
+        })
+
+    def add_file_metadata(self, metadata_content):
+        """
+        Set the XMP metadata of the pdf, wrapping it with the necessary XMP header/footer.
+        These are required for a PDF/A file to be completely compliant. Ommiting them would result in validation errors.
+        :param metadata_content: bytes of the metadata to add to the pdf.
+        """
+        # See https://wwwimages2.adobe.com/content/dam/acom/en/devnet/xmp/pdfs/XMP%20SDK%20Release%20cc-2016-08/XMPSpecificationPart1.pdf
+        # Page 10/11
+        header = b'<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>'
+        footer = b'<?xpacket end="w"?>'
+        metadata = b'%s%s%s' % (header, metadata_content, footer)
+        file_entry = DecodedStreamObject()
+        file_entry.setData(metadata)
+        file_entry.update({
+            NameObject("/Type"): NameObject("/Metadata"),
+            NameObject("/Subtype"): NameObject("/XML"),
+            NameObject("/Length"): NameObject(str(len(metadata))),
+        })
+
+        # Add the new metadata to the pdf, then redirect the reference to refer to this new object.
+        metadata_object = self._addObject(file_entry)
+        self._root_object.update({NameObject("/Metadata"): metadata_object})
+
+    def _create_attachment_object(self, attachment):
+        ''' Create a PyPdf2.generic object representing an embedded file.
+
+        :param attachment: A dictionary containing:
+            * filename: The name of the file to embed (required)
+            * content:  The bytes of the file to embed (required)
+            * subtype: The mime-type of the file to embed (optional)
+        :return:
+        '''
+        file_entry = DecodedStreamObject()
+        file_entry.setData(attachment['content'])
+        file_entry.update({
+            NameObject("/Type"): NameObject("/EmbeddedFile"),
+            NameObject("/Params"):
+                DictionaryObject({
+                    NameObject('/CheckSum'): createStringObject(md5(attachment['content']).hexdigest()),
+                    NameObject('/ModDate'): createStringObject(datetime.now().strftime(DEFAULT_PDF_DATETIME_FORMAT)),
+                    NameObject('/Size'): NameObject(str(len(attachment['content']))),
+                }),
+        })
+        if attachment.get('subtype'):
+            file_entry.update({
+                NameObject("/Subtype"): NameObject(attachment['subtype']),
+            })
+        file_entry_object = self._addObject(file_entry)
+        filename_object = createStringObject(attachment['filename'])
+        filespec_object = DictionaryObject({
+            NameObject("/AFRelationship"): NameObject("/Data"),
+            NameObject("/Type"): NameObject("/Filespec"),
+            NameObject("/F"): filename_object,
+            NameObject("/EF"):
+                DictionaryObject({
+                    NameObject("/F"): file_entry_object,
+                    NameObject('/UF'): file_entry_object,
+                }),
+            NameObject("/UF"): filename_object,
+        })
+        if attachment.get('description'):
+            filespec_object.update({NameObject("/Desc"): createStringObject(attachment['description'])})
+        return self._addObject(filespec_object)