You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

71 lines
2.4 KiB

  1. # -*- coding: utf-8 -*-
  2. # © 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis
  3. # License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
  4. import re
  5. import logging
  6. from lxml import etree, html
  7. from openerp import api, models
  8. _logger = logging.getLogger(__name__)
  9. class IrFieldsConverter(models.Model):
  10. _inherit = "ir.fields.converter"
  11. @api.model
  12. def imgs_from_html(self, html_content, limit=None, fail=False):
  13. """Extract all images in order from an HTML field in a generator.
  14. :param str html_content:
  15. HTML contents from where to extract the images.
  16. :param int limit:
  17. Only get up to this number of images.
  18. :param bool fail:
  19. If ``True``, exceptions will be raised.
  20. """
  21. # Parse HTML
  22. try:
  23. doc = html.fromstring(html_content)
  24. except (TypeError, etree.XMLSyntaxError, etree.ParserError):
  25. if fail:
  26. raise
  27. else:
  28. _logger.exception("Failure parsing this HTML:\n%s",
  29. html_content)
  30. return
  31. # Required tools
  32. query = """
  33. //img[@src] |
  34. //*[contains(translate(@style, "BACKGROUND", "background"),
  35. 'background')]
  36. [contains(translate(@style, "URL", "url"), 'url(')]
  37. """
  38. rgx = r"""
  39. url\(\s* # Start function
  40. (?P<url>[^)]*) # URL string
  41. \s*\) # End function
  42. """
  43. rgx = re.compile(rgx, re.IGNORECASE | re.VERBOSE)
  44. # Loop through possible image URLs
  45. for lap, element in enumerate(doc.xpath(query)):
  46. if limit and lap >= limit:
  47. break
  48. if element.tag == "img":
  49. yield element.attrib["src"]
  50. else:
  51. for rule in element.attrib["style"].split(";"):
  52. # Extract background image
  53. parts = rule.split(":", 1)
  54. try:
  55. if parts[0].strip().lower() in {"background",
  56. "background-image"}:
  57. yield (rgx.search(parts[1])
  58. .group("url").strip("\"'"))
  59. # Malformed CSS or no match for URL
  60. except (IndexError, AttributeError):
  61. pass