Skip to content
Snippets Groups Projects
Commit 60e9632a authored by Nicolas (vin)'s avatar Nicolas (vin)
Browse files

[IMP] [base, account_facturx]: Add PDF/A(-3B) support

Improve the factur-x export in two ways: make the exported PDF
PDF/A-3B compliant, and add the factur-x XMP metadata inside the file.

The added .ICC profile comes from https://www.color.org/srgbprofiles.xalter
License terms can be found here: https://www.color.org/profiles2.xalter#license



Task id # 2668919

closes odoo/odoo#78974

Signed-off-by: default avatarLaurent Smet <las@openerp.com>
parent 79a95b0a
No related branches found
No related tags found
No related merge requests found
......@@ -231,5 +231,88 @@
</rsm:SupplyChainTradeTransaction>
</rsm:CrossIndustryInvoice>
</template>
<template id="account_invoice_pdfa_3_facturx_metadata">
<x:xmpmeta xmlns:x="adobe:ns:meta/">
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
<rdf:Description xmlns:pdfaid="http://www.aiim.org/pdfa/ns/id/" rdf:about="">
<pdfaid:part>3</pdfaid:part>
<pdfaid:conformance>B</pdfaid:conformance>
</rdf:Description>
<rdf:Description xmlns:dc="http://purl.org/dc/elements/1.1/" rdf:about="">
<dc:title>
<rdf:Alt>
<rdf:li t-att="{'xml:lang': 'x-default'}" t-esc="title"/>
</rdf:Alt>
</dc:title>
<dc:creator>
<rdf:Seq>
<rdf:li>Odoo</rdf:li>
</rdf:Seq>
</dc:creator>
<dc:description>
<rdf:Alt>
<rdf:li t-att="{'xml:lang': 'x-default'}">Invoice generated by Odoo</rdf:li>
</rdf:Alt>
</dc:description>
</rdf:Description>
<rdf:Description xmlns:pdf="http://ns.adobe.com/pdf/1.3/" rdf:about="">
<pdf:Producer>Odoo</pdf:Producer>
</rdf:Description>
<rdf:Description xmlns:xmp="http://ns.adobe.com/xap/1.0/" rdf:about="">
<xmp:CreatorTool>Odoo</xmp:CreatorTool>
<xmp:CreateDate t-esc="date"/>
<xmp:ModifyDate t-esc="date"/>
</rdf:Description>
<rdf:Description xmlns:pdfaExtension="http://www.aiim.org/pdfa/ns/extension/"
xmlns:pdfaSchema="http://www.aiim.org/pdfa/ns/schema#"
xmlns:pdfaProperty="http://www.aiim.org/pdfa/ns/property#" rdf:about="">
<pdfaExtension:schemas>
<rdf:Bag>
<rdf:li rdf:parseType="Resource">
<pdfaSchema:schema>Factur-X PDFA Extension Schema</pdfaSchema:schema>
<pdfaSchema:namespaceURI>urn:factur-x:pdfa:CrossIndustryDocument:invoice:1p0#</pdfaSchema:namespaceURI>
<pdfaSchema:prefix>fx</pdfaSchema:prefix>
<pdfaSchema:property>
<rdf:Seq>
<rdf:li rdf:parseType="Resource">
<pdfaProperty:name>DocumentFileName</pdfaProperty:name>
<pdfaProperty:valueType>Text</pdfaProperty:valueType>
<pdfaProperty:category>external</pdfaProperty:category>
<pdfaProperty:description>name of the embedded XML invoice file</pdfaProperty:description>
</rdf:li>
<rdf:li rdf:parseType="Resource">
<pdfaProperty:name>DocumentType</pdfaProperty:name>
<pdfaProperty:valueType>Text</pdfaProperty:valueType>
<pdfaProperty:category>external</pdfaProperty:category>
<pdfaProperty:description>INVOICE</pdfaProperty:description>
</rdf:li>
<rdf:li rdf:parseType="Resource">
<pdfaProperty:name>Version</pdfaProperty:name>
<pdfaProperty:valueType>Text</pdfaProperty:valueType>
<pdfaProperty:category>external</pdfaProperty:category>
<pdfaProperty:description>The actual version of the Factur-X XML schema</pdfaProperty:description>
</rdf:li>
<rdf:li rdf:parseType="Resource">
<pdfaProperty:name>ConformanceLevel</pdfaProperty:name>
<pdfaProperty:valueType>Text</pdfaProperty:valueType>
<pdfaProperty:category>external</pdfaProperty:category>
<pdfaProperty:description>The conformance level of the embedded Factur-X data</pdfaProperty:description>
</rdf:li>
</rdf:Seq>
</pdfaSchema:property>
</rdf:li>
</rdf:Bag>
</pdfaExtension:schemas>
</rdf:Description>
<rdf:Description xmlns:fx="urn:factur-x:pdfa:CrossIndustryDocument:invoice:1p0#" rdf:about="">
<fx:ConformanceLevel>EN 16931</fx:ConformanceLevel>
<fx:DocumentFileName>factur-x.xml</fx:DocumentFileName>
<fx:DocumentType>INVOICE</fx:DocumentType>
<fx:Version>1.0</fx:Version>
</rdf:Description>
</rdf:RDF>
</x:xmpmeta>
</template>
</data>
</odoo>
# -*- coding: utf-8 -*-
from io import BytesIO
from logging import getLogger
from PyPDF2 import PdfFileReader
from odoo import models, fields, api, _
from odoo import fields, models
from odoo import tools
from odoo.tools.pdf import OdooPdfFileWriter
from PyPDF2 import PdfFileWriter, PdfFileReader
import io
_logger = getLogger(__name__)
class IrActionsReport(models.Model):
......@@ -17,16 +20,31 @@ class IrActionsReport(models.Model):
if invoice.is_sale_document() and invoice.state != 'draft':
xml_content = invoice._export_as_facturx_xml()
# Add attachment.
reader_buffer = io.BytesIO(pdf_content)
reader_buffer = BytesIO(pdf_content)
reader = PdfFileReader(reader_buffer)
writer = PdfFileWriter()
writer = OdooPdfFileWriter()
writer.cloneReaderDocumentRoot(reader)
writer.addAttachment('factur-x.xml', xml_content)
buffer = io.BytesIO()
if tools.str2bool(self.env['ir.config_parameter'].sudo().get_param('edi.use_pdfa', 'False')):
try:
writer.convert_to_pdfa()
except Exception as e:
_logger.exception("Error while converting to PDF/A: %s", e)
metadata_template = self.env.ref('account_facturx.account_invoice_pdfa_3_facturx_metadata', False)
if metadata_template:
metadata_content = metadata_template.render({
'title': invoice.name,
'date': fields.Date.context_today(self),
})
writer.add_file_metadata(metadata_content)
writer.addAttachment('factur-x.xml', xml_content, '/application#2Fxml')
buffer = BytesIO()
writer.write(buffer)
pdf_content = buffer.getvalue()
reader_buffer.close()
buffer.close()
reader_buffer.close()
return super(IrActionsReport, self)._post_pdf(save_in_attachment, pdf_content=pdf_content, res_ids=res_ids)
File added
Copyright (c) 2015 International Color Consortium
This profile is made available by the International Color Consortium, and may be copied, distributed, embedded, made,
used, and sold without restriction. Altered versions of this profile shall have the original identification and
copyright information removed and shall not be misrepresented as the original profile.
# -*- coding: utf-8 -*-
# Part of Odoo. See LICENSE file for full copyright and licensing details.
import io
from datetime import datetime
from hashlib import md5
from logging import getLogger
from PyPDF2 import PdfFileWriter, PdfFileReader
from PyPDF2.generic import DictionaryObject, NameObject, ArrayObject, DecodedStreamObject, NumberObject, createStringObject, ByteStringObject
from zlib import compress, decompress
try:
from fontTools.ttLib import TTFont
except ImportError:
TTFont = None
from odoo.tools.misc import file_open
_logger = getLogger(__name__)
DEFAULT_PDF_DATETIME_FORMAT = "D:%Y%m%d%H%M%S+00'00'"
def merge_pdf(pdf_data):
......@@ -19,3 +35,228 @@ def merge_pdf(pdf_data):
merged_pdf = _buffer.getvalue()
_buffer.close()
return merged_pdf
class OdooPdfFileWriter(PdfFileWriter):
def __init__(self, *args, **kwargs):
"""
Override of the init to initialise additional variables.
:param pdf_content: if given, will initialise the reader with the pdf content.
"""
super().__init__(*args, **kwargs)
self._reader = None
def addAttachment(self, name, data, subtype=""):
"""
Add an attachment to the pdf. Supports adding multiple attachment, while respecting PDF/A rules.
:param name: The name of the attachement
:param data: The data of the attachement
:param subtype: The mime-type of the attachement. This is required by PDF/A, but not essential otherwise.
It should take the form of "/xxx%2Fxxx". E.g. for "text/xml": "/text%2Fxml"
"""
attachment = self._create_attachment_object({
'filename': name,
'content': data,
'subtype': subtype,
})
if self._root_object.get('/Names') and self._root_object['/Names'].get('/EmbeddedFiles'):
names_array = self._root_object["/Names"]["/EmbeddedFiles"]["/Names"]
names_array.extend([attachment.getObject()['/F'], attachment])
else:
names_array = ArrayObject()
names_array.extend([attachment.getObject()['/F'], attachment])
embedded_files_names_dictionary = DictionaryObject()
embedded_files_names_dictionary.update({
NameObject("/Names"): names_array
})
embedded_files_dictionary = DictionaryObject()
embedded_files_dictionary.update({
NameObject("/EmbeddedFiles"): embedded_files_names_dictionary
})
self._root_object.update({
NameObject("/Names"): embedded_files_dictionary
})
if self._root_object.get('/AF'):
attachment_array = self._root_object['/AF']
attachment_array.extend([attachment])
else:
# Create a new object containing an array referencing embedded file
# And reference this array in the root catalogue
attachment_array = self._addObject(ArrayObject([attachment]))
self._root_object.update({
NameObject("/AF"): attachment_array
})
def cloneReaderDocumentRoot(self, reader):
super().cloneReaderDocumentRoot(reader)
self._reader = reader
# Try to read the header coming in, and reuse it in our new PDF
# This is done in order to allows modifying PDF/A files after creating them (as PyPDF does not read it)
stream = reader.stream
stream.seek(0)
header = stream.readlines(9)
# Should always be true, the first line of a pdf should have 9 bytes (%PDF-1.x plus a newline)
if len(header) == 1:
# If we found a header, set it back to the new pdf
self._header = header[0]
# Also check the second line. If it is PDF/A, it should be a line starting by % following by four bytes + \n
second_line = stream.readlines(1)[0]
if second_line.decode('latin-1')[0] == '%' and len(second_line) == 6:
self._header += second_line
# Look if we have an ID in the incoming stream and use it.
pdf_id = reader.trailer.get('/ID', None)
if pdf_id:
self._ID = pdf_id
def convert_to_pdfa(self):
"""
Transform the opened PDF file into a PDF/A compliant file
"""
# Set the PDF version to 1.7 (as PDF/A-3 is based on version 1.7) and make it PDF/A compliant.
# See https://github.com/veraPDF/veraPDF-validation-profiles/wiki/PDFA-Parts-2-and-3-rules#rule-612-1
# " The file header shall begin at byte zero and shall consist of "%PDF-1.n" followed by a single EOL marker,
# where 'n' is a single digit number between 0 (30h) and 7 (37h) "
# " The aforementioned EOL marker shall be immediately followed by a % (25h) character followed by at least four
# bytes, each of whose encoded byte values shall have a decimal value greater than 127 "
self._header = b"%PDF-1.7\n%\xFF\xFF\xFF\xFF"
# Add a document ID to the trailer. This is only needed when using encryption with regular PDF, but is required
# when using PDF/A
pdf_id = ByteStringObject(md5(self._reader.stream.getvalue()).digest())
# The first string is based on the content at the time of creating the file, while the second is based on the
# content of the file when it was last updated. When creating a PDF, both are set to the same value.
self._ID = ArrayObject((pdf_id, pdf_id))
with file_open('data/files/sRGB2014.icc', subdir='tools', mode='rb') as icc_profile:
icc_profile_file_data = compress(icc_profile.read())
icc_profile_stream_obj = DecodedStreamObject()
icc_profile_stream_obj.setData(icc_profile_file_data)
icc_profile_stream_obj.update({
NameObject("/Filter"): NameObject("/FlateDecode"),
NameObject("/N"): NumberObject(3),
NameObject("/Length"): NameObject(str(len(icc_profile_file_data))),
})
icc_profile_obj = self._addObject(icc_profile_stream_obj)
output_intent_dict_obj = DictionaryObject()
output_intent_dict_obj.update({
NameObject("/S"): NameObject("/GTS_PDFA1"),
NameObject("/OutputConditionIdentifier"): createStringObject("sRGB"),
NameObject("/DestOutputProfile"): icc_profile_obj,
NameObject("/Type"): NameObject("/OutputIntent"),
})
output_intent_obj = self._addObject(output_intent_dict_obj)
self._root_object.update({
NameObject("/OutputIntents"): ArrayObject([output_intent_obj]),
})
pages = self._root_object['/Pages']['/Kids']
# PDF/A needs the glyphs width array embedded in the pdf to be consistent with the ones from the font file.
# But it seems like it is not the case when exporting from wkhtmltopdf.
if TTFont:
fonts = {}
# First browse through all the pages of the pdf file, to get a reference to all the fonts used in the PDF.
for page in pages:
for font in page.getObject()['/Resources']['/Font'].values():
for descendant in font.getObject()['/DescendantFonts']:
fonts[descendant.idnum] = descendant.getObject()
# Then for each font, rewrite the width array with the information taken directly from the font file.
# The new width are calculated such as width = round(1000 * font_glyph_width / font_units_per_em)
# See: http://martin.hoppenheit.info/blog/2018/pdfa-validation-and-inconsistent-glyph-width-information/
for font in fonts.values():
font_file = font['/FontDescriptor']['/FontFile2']
stream = io.BytesIO(decompress(font_file._data))
ttfont = TTFont(stream)
font_upm = ttfont['head'].unitsPerEm
glyphs = ttfont.getGlyphSet()._hmtx.metrics
glyph_widths = []
for key, values in glyphs.items():
if key[:5] == 'glyph':
glyph_widths.append(NumberObject(round(1000.0 * values[0] / font_upm)))
font[NameObject('/W')] = ArrayObject([NumberObject(1), ArrayObject(glyph_widths)])
stream.close()
else:
_logger.warning('The fonttools package is not installed. Generated PDF may not be PDF/A compliant.')
outlines = self._root_object['/Outlines'].getObject()
outlines[NameObject('/Count')] = NumberObject(1)
# Set odoo as producer
self.addMetadata({
'/Creator': "Odoo",
'/Producer': "Odoo",
})
def add_file_metadata(self, metadata_content):
"""
Set the XMP metadata of the pdf, wrapping it with the necessary XMP header/footer.
These are required for a PDF/A file to be completely compliant. Ommiting them would result in validation errors.
:param metadata_content: bytes of the metadata to add to the pdf.
"""
# See https://wwwimages2.adobe.com/content/dam/acom/en/devnet/xmp/pdfs/XMP%20SDK%20Release%20cc-2016-08/XMPSpecificationPart1.pdf
# Page 10/11
header = b'<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>'
footer = b'<?xpacket end="w"?>'
metadata = b'%s%s%s' % (header, metadata_content, footer)
file_entry = DecodedStreamObject()
file_entry.setData(metadata)
file_entry.update({
NameObject("/Type"): NameObject("/Metadata"),
NameObject("/Subtype"): NameObject("/XML"),
NameObject("/Length"): NameObject(str(len(metadata))),
})
# Add the new metadata to the pdf, then redirect the reference to refer to this new object.
metadata_object = self._addObject(file_entry)
self._root_object.update({NameObject("/Metadata"): metadata_object})
def _create_attachment_object(self, attachment):
''' Create a PyPdf2.generic object representing an embedded file.
:param attachment: A dictionary containing:
* filename: The name of the file to embed (required)
* content: The bytes of the file to embed (required)
* subtype: The mime-type of the file to embed (optional)
:return:
'''
file_entry = DecodedStreamObject()
file_entry.setData(attachment['content'])
file_entry.update({
NameObject("/Type"): NameObject("/EmbeddedFile"),
NameObject("/Params"):
DictionaryObject({
NameObject('/CheckSum'): createStringObject(md5(attachment['content']).hexdigest()),
NameObject('/ModDate'): createStringObject(datetime.now().strftime(DEFAULT_PDF_DATETIME_FORMAT)),
NameObject('/Size'): NameObject(str(len(attachment['content']))),
}),
})
if attachment.get('subtype'):
file_entry.update({
NameObject("/Subtype"): NameObject(attachment['subtype']),
})
file_entry_object = self._addObject(file_entry)
filename_object = createStringObject(attachment['filename'])
filespec_object = DictionaryObject({
NameObject("/AFRelationship"): NameObject("/Data"),
NameObject("/Type"): NameObject("/Filespec"),
NameObject("/F"): filename_object,
NameObject("/EF"):
DictionaryObject({
NameObject("/F"): file_entry_object,
NameObject('/UF'): file_entry_object,
}),
NameObject("/UF"): filename_object,
})
if attachment.get('description'):
filespec_object.update({NameObject("/Desc"): createStringObject(attachment['description'])})
return self._addObject(filespec_object)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment