72 lines
2.3 KiB

  1. # Copyright 2016-2017 Jairo Llopis <jairo.llopis@tecnativa.com>
  2. # Copyright 2016 Tecnativa - Vicent Cubells
  3. # License AGPL-3.0 or later (https://www.gnu.org/licenses/agpl).
  4. import logging
  5. from lxml import etree, html
  6. from odoo import api, models
  7. _logger = logging.getLogger(__name__)
  8. class IrFieldsConverter(models.AbstractModel):
  9. _inherit = "ir.fields.converter"
  10. @api.model
  11. def text_from_html(self, html_content, max_words=None, max_chars=None,
  12. ellipsis=u"", fail=False):
  13. """Extract text from an HTML field in a generator.
  14. :param str html_content:
  15. HTML contents from where to extract the text.
  16. :param int max_words:
  17. Maximum amount of words allowed in the resulting string.
  18. :param int max_chars:
  19. Maximum amount of characters allowed in the resulting string. If
  20. you apply this limit, beware that the last word could get cut in an
  21. unexpected place.
  22. :param str ellipsis:
  23. Character(s) to be appended to the end of the resulting string if
  24. it gets truncated after applying limits set in :param:`max_words`
  25. or :param:`max_chars`. If you want nothing applied, just set an
  26. empty string.
  27. :param bool fail:
  28. If ``True``, exceptions will be raised. Otherwise, an empty string
  29. will be returned on failure.
  30. """
  31. # Parse HTML
  32. try:
  33. doc = html.fromstring(html_content)
  34. except (TypeError, etree.XMLSyntaxError, etree.ParserError):
  35. if fail:
  36. raise
  37. else:
  38. _logger.exception("Failure parsing this HTML:\n%s",
  39. html_content)
  40. return ""
  41. # Get words
  42. words = u"".join(doc.xpath("//text()")).split()
  43. # Truncate words
  44. suffix = max_words and len(words) > max_words
  45. if max_words:
  46. words = words[:max_words]
  47. # Get text
  48. text = u" ".join(words)
  49. # Truncate text
  50. suffix = suffix or max_chars and len(text) > max_chars
  51. if max_chars:
  52. text = text[:max_chars - (len(ellipsis) if suffix else 0)].strip()
  53. # Append ellipsis if needed
  54. if suffix:
  55. text += ellipsis
  56. return text