You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

73 lines
2.3 KiB

  1. # -*- coding: utf-8 -*-
  2. # Copyright 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis
  3. # Copyright 2016 Tecnativa - Vicent Cubells
  4. # License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
  5. import logging
  6. from lxml import etree, html
  7. from openerp import api, models
  8. _logger = logging.getLogger(__name__)
  9. class IrFieldsConverter(models.Model):
  10. _inherit = "ir.fields.converter"
  11. @api.model
  12. def text_from_html(self, html_content, max_words=None, max_chars=None,
  13. ellipsis=u"", fail=False):
  14. """Extract text from an HTML field in a generator.
  15. :param str html_content:
  16. HTML contents from where to extract the text.
  17. :param int max_words:
  18. Maximum amount of words allowed in the resulting string.
  19. :param int max_chars:
  20. Maximum amount of characters allowed in the resulting string. If
  21. you apply this limit, beware that the last word could get cut in an
  22. unexpected place.
  23. :param str ellipsis:
  24. Character(s) to be appended to the end of the resulting string if
  25. it gets truncated after applying limits set in :param:`max_words`
  26. or :param:`max_chars`. If you want nothing applied, just set an
  27. empty string.
  28. :param bool fail:
  29. If ``True``, exceptions will be raised. Otherwise, an empty string
  30. will be returned on failure.
  31. """
  32. # Parse HTML
  33. try:
  34. doc = html.fromstring(html_content)
  35. except (TypeError, etree.XMLSyntaxError, etree.ParserError):
  36. if fail:
  37. raise
  38. else:
  39. _logger.exception("Failure parsing this HTML:\n%s",
  40. html_content)
  41. return ""
  42. # Get words
  43. words = u"".join(doc.xpath("//text()")).split()
  44. # Truncate words
  45. suffix = max_words and len(words) > max_words
  46. if max_words:
  47. words = words[:max_words]
  48. # Get text
  49. text = u" ".join(words)
  50. # Truncate text
  51. suffix = suffix or max_chars and len(text) > max_chars
  52. if max_chars:
  53. text = text[:max_chars - (len(ellipsis) if suffix else 0)].strip()
  54. # Append ellipsis if needed
  55. if suffix:
  56. text += ellipsis
  57. return text