You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

72 lines
2.5 KiB

  1. # -*- coding: utf-8 -*-
  2. # Copyright 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis
  3. # Copyright 2016 Tecnativa - Vicent Cubells
  4. # License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
  5. import re
  6. import logging
  7. from lxml import etree, html
  8. from openerp import api, models
  9. _logger = logging.getLogger(__name__)
  10. class IrFieldsConverter(models.Model):
  11. _inherit = "ir.fields.converter"
  12. @api.model
  13. def imgs_from_html(self, html_content, limit=None, fail=False):
  14. """Extract all images in order from an HTML field in a generator.
  15. :param str html_content:
  16. HTML contents from where to extract the images.
  17. :param int limit:
  18. Only get up to this number of images.
  19. :param bool fail:
  20. If ``True``, exceptions will be raised.
  21. """
  22. # Parse HTML
  23. try:
  24. doc = html.fromstring(html_content)
  25. except (TypeError, etree.XMLSyntaxError, etree.ParserError):
  26. if fail:
  27. raise
  28. else:
  29. _logger.exception("Failure parsing this HTML:\n%s",
  30. html_content)
  31. return
  32. # Required tools
  33. query = """
  34. //img[@src] |
  35. //*[contains(translate(@style, "BACKGROUND", "background"),
  36. 'background')]
  37. [contains(translate(@style, "URL", "url"), 'url(')]
  38. """
  39. rgx = r"""
  40. url\(\s* # Start function
  41. (?P<url>[^)]*) # URL string
  42. \s*\) # End function
  43. """
  44. rgx = re.compile(rgx, re.IGNORECASE | re.VERBOSE)
  45. # Loop through possible image URLs
  46. for lap, element in enumerate(doc.xpath(query)):
  47. if limit and lap >= limit:
  48. break
  49. if element.tag == "img":
  50. yield element.attrib["src"]
  51. else:
  52. for rule in element.attrib["style"].split(";"):
  53. # Extract background image
  54. parts = rule.split(":", 1)
  55. try:
  56. if parts[0].strip().lower() in {"background",
  57. "background-image"}:
  58. yield (rgx.search(parts[1])
  59. .group("url").strip("\"'"))
  60. # Malformed CSS or no match for URL
  61. except (IndexError, AttributeError):
  62. pass