You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
72 lines
2.5 KiB
72 lines
2.5 KiB
# -*- coding: utf-8 -*-
|
|
# Copyright 2016-2017 Jairo Llopis <jairo.llopis@tecnativa.com>
|
|
# Copyright 2016 Tecnativa - Vicent Cubells
|
|
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
|
|
|
import re
|
|
import logging
|
|
from lxml import etree, html
|
|
from odoo import api, models
|
|
|
|
_logger = logging.getLogger(__name__)
|
|
|
|
|
|
class IrFieldsConverter(models.AbstractModel):
|
|
_inherit = "ir.fields.converter"
|
|
|
|
@api.model
|
|
def imgs_from_html(self, html_content, limit=None, fail=False):
|
|
"""Extract all images in order from an HTML field in a generator.
|
|
|
|
:param str html_content:
|
|
HTML contents from where to extract the images.
|
|
|
|
:param int limit:
|
|
Only get up to this number of images.
|
|
|
|
:param bool fail:
|
|
If ``True``, exceptions will be raised.
|
|
"""
|
|
# Parse HTML
|
|
try:
|
|
doc = html.fromstring(html_content)
|
|
except (TypeError, etree.XMLSyntaxError, etree.ParserError):
|
|
if fail:
|
|
raise
|
|
else:
|
|
_logger.exception("Failure parsing this HTML:\n%s",
|
|
html_content)
|
|
return
|
|
|
|
# Required tools
|
|
query = """
|
|
//img[@src] |
|
|
//*[contains(translate(@style, "BACKGROUND", "background"),
|
|
'background')]
|
|
[contains(translate(@style, "URL", "url"), 'url(')]
|
|
"""
|
|
rgx = r"""
|
|
url\(\s* # Start function
|
|
(?P<url>[^)]*) # URL string
|
|
\s*\) # End function
|
|
"""
|
|
rgx = re.compile(rgx, re.IGNORECASE | re.VERBOSE)
|
|
|
|
# Loop through possible image URLs
|
|
for lap, element in enumerate(doc.xpath(query)):
|
|
if limit and lap >= limit:
|
|
break
|
|
if element.tag == "img":
|
|
yield element.attrib["src"]
|
|
else:
|
|
for rule in element.attrib["style"].split(";"):
|
|
# Extract background image
|
|
parts = rule.split(":", 1)
|
|
try:
|
|
if parts[0].strip().lower() in {"background",
|
|
"background-image"}:
|
|
yield (rgx.search(parts[1])
|
|
.group("url").strip("\"'"))
|
|
# Malformed CSS or no match for URL
|
|
except (IndexError, AttributeError):
|
|
pass
|