You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

72 lines
2.5 KiB

# -*- coding: utf-8 -*-
# Copyright 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis
# Copyright 2016 Tecnativa - Vicent Cubells
# License AGPL-3.0 or later (
import re
import logging
from lxml import etree, html
from openerp import api, models
_logger = logging.getLogger(__name__)
class IrFieldsConverter(models.Model):
_inherit = "ir.fields.converter"
def imgs_from_html(self, html_content, limit=None, fail=False):
"""Extract all images in order from an HTML field in a generator.
:param str html_content:
HTML contents from where to extract the images.
:param int limit:
Only get up to this number of images.
:param bool fail:
If ``True``, exceptions will be raised.
# Parse HTML
doc = html.fromstring(html_content)
except (TypeError, etree.XMLSyntaxError, etree.ParserError):
if fail:
_logger.exception("Failure parsing this HTML:\n%s",
# Required tools
query = """
//img[@src] |
//*[contains(translate(@style, "BACKGROUND", "background"),
[contains(translate(@style, "URL", "url"), 'url(')]
rgx = r"""
url\(\s* # Start function
(?P<url>[^)]*) # URL string
\s*\) # End function
rgx = re.compile(rgx, re.IGNORECASE | re.VERBOSE)
# Loop through possible image URLs
for lap, element in enumerate(doc.xpath(query)):
if limit and lap >= limit:
if element.tag == "img":
yield element.attrib["src"]
for rule in element.attrib["style"].split(";"):
# Extract background image
parts = rule.split(":", 1)
if parts[0].strip().lower() in {"background",
yield ([1])
# Malformed CSS or no match for URL
except (IndexError, AttributeError):