You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

74 lines
2.6 KiB

  1. # -*- coding: utf-8 -*-
  2. # © 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis
  3. # License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
  4. from lxml import etree
  5. from openerp.tools.misc import mute_logger
  6. from openerp.tests.common import TransactionCase
  7. class ExtractorCase(TransactionCase):
  8. def setUp(self):
  9. super(ExtractorCase, self).setUp()
  10. # Shortcut
  11. self.text_from_html = self.env["ir.fields.converter"].text_from_html
  12. self.logger = 'openerp.addons.html_text.models.ir_fields_converter'
  13. def test_excerpts(self):
  14. """Text gets correctly extracted."""
  15. html = u"""
  16. <html>
  17. <body>
  18. <div class="this should not appear">
  19. <h1>I'm a title</h1>
  20. <p>I'm a paragraph</p>
  21. <small>¡Pues yo soy español!</small>
  22. </div>
  23. </body>
  24. </html>
  25. """
  26. self.assertEqual(
  27. self.text_from_html(html),
  28. u"I'm a title I'm a paragraph ¡Pues yo soy español!")
  29. self.assertEqual(
  30. self.text_from_html(html, 8),
  31. u"I'm a title I'm a paragraph ¡Pues yo…")
  32. self.assertEqual(
  33. self.text_from_html(html, 8, 31),
  34. u"I'm a title I'm a paragraph ¡P…")
  35. self.assertEqual(
  36. self.text_from_html(html, 7, ellipsis=""),
  37. u"I'm a title I'm a paragraph ¡Pues")
  38. def test_empty_html(self):
  39. """Empty HTML handled correctly."""
  40. with mute_logger(self.logger):
  41. self.assertEqual(self.text_from_html(""), "")
  42. with self.assertRaises(etree.XMLSyntaxError):
  43. with mute_logger(self.logger):
  44. self.text_from_html("", fail=True)
  45. def test_false_html(self):
  46. """``False`` HTML handled correctly."""
  47. with mute_logger(self.logger):
  48. self.assertEqual(self.text_from_html(False), "")
  49. with self.assertRaises(TypeError):
  50. with mute_logger(self.logger):
  51. self.text_from_html(False, fail=True)
  52. def test_bad_html(self):
  53. """Bad HTML handled correctly.
  54. Newer versions of lxml parse this as
  55. '<html><body><p>&lt;<bad/></p></body></html>'
  56. so the exception is not guaranteed and the result may vary. """
  57. with mute_logger(self.logger):
  58. self.assertIn(self.text_from_html("<<bad>"), ("<", ""))
  59. try:
  60. with mute_logger(self.logger):
  61. res = self.text_from_html("<<bad>", fail=True)
  62. self.assertEqual(res, "<")
  63. except etree.ParserError:
  64. pass