Browse Source
Merge pull request #874 from Tecnativa/10.0-html_text
Merge pull request #874 from Tecnativa/10.0-html_text
[MIG][10.0][html_text] Migratepull/882/head
Pedro M. Baeza
7 years ago
committed by
GitHub
13 changed files with 372 additions and 0 deletions
-
79html_text/README.rst
-
4html_text/__init__.py
-
25html_text/__manifest__.py
-
24html_text/i18n/ca.po
-
24html_text/i18n/de.po
-
24html_text/i18n/es.po
-
24html_text/i18n/es_ES.po
-
24html_text/i18n/tr.po
-
4html_text/models/__init__.py
-
73html_text/models/ir_fields_converter.py
-
BINhtml_text/static/description/icon.png
-
4html_text/tests/__init__.py
-
63html_text/tests/test_extractor.py
@ -0,0 +1,79 @@ |
|||||
|
.. image:: https://img.shields.io/badge/licence-AGPL--3-blue.svg |
||||
|
:target: http://www.gnu.org/licenses/agpl-3.0-standalone.html |
||||
|
:alt: License: AGPL-3 |
||||
|
|
||||
|
==================== |
||||
|
Text from HTML field |
||||
|
==================== |
||||
|
|
||||
|
This module provides some technical features that allow to extract text from |
||||
|
any chunk of HTML, without HTML tags or attributes. You can chose either: |
||||
|
|
||||
|
* To truncate the result by amount of words or characters. |
||||
|
* To append an ellipsis (or any character(s)) at the end of the result. |
||||
|
|
||||
|
It can be used to easily generate excerpts. |
||||
|
|
||||
|
Usage |
||||
|
===== |
||||
|
|
||||
|
This module just adds a technical utility, but nothing for the end user. |
||||
|
|
||||
|
If you are a developer and need this utility for your module, see these |
||||
|
examples and read the docs inside the code. |
||||
|
|
||||
|
Python example:: |
||||
|
|
||||
|
@api.multi |
||||
|
def some_method(self): |
||||
|
# Get truncated text from an HTML field. It will 40 words and 100 |
||||
|
# characters at most, and will have "..." appended at the end if it |
||||
|
# gets truncated. |
||||
|
truncated_text = self.env["ir.fields.converter"].text_from_html( |
||||
|
self.html_field, 40, 100, "...") |
||||
|
|
||||
|
QWeb example:: |
||||
|
|
||||
|
<t t-esc="env['ir.fields.converter'].text_from_html(doc.html_field)"/> |
||||
|
|
||||
|
.. image:: https://odoo-community.org/website/image/ir.attachment/5784_f2813bd/datas |
||||
|
:alt: Try me on Runbot |
||||
|
:target: https://runbot.odoo-community.org/runbot/149/10.0 |
||||
|
|
||||
|
Known issues / Roadmap |
||||
|
====================== |
||||
|
|
||||
|
* An option could be added to try to respect the basic HTML tags inside the |
||||
|
excerpt (``<b>``, ``<i>``, ``<p>``, etc.). |
||||
|
|
||||
|
Bug Tracker |
||||
|
=========== |
||||
|
|
||||
|
Bugs are tracked on `GitHub Issues |
||||
|
<https://github.com/OCA/server-tools/issues>`_. In case of trouble, please |
||||
|
check there if your issue has already been reported. If you spotted it first, |
||||
|
help us smashing it by providing a detailed and welcomed feedback. |
||||
|
|
||||
|
Credits |
||||
|
======= |
||||
|
|
||||
|
Contributors |
||||
|
------------ |
||||
|
|
||||
|
* Jairo Llopis <yajo.sk8@gmail.com> |
||||
|
* Vicent Cubells <vicent.cubells@tecnativa.com> |
||||
|
|
||||
|
Maintainer |
||||
|
---------- |
||||
|
|
||||
|
.. image:: https://odoo-community.org/logo.png |
||||
|
:alt: Odoo Community Association |
||||
|
:target: https://odoo-community.org |
||||
|
|
||||
|
This module is maintained by the OCA. |
||||
|
|
||||
|
OCA, or the Odoo Community Association, is a nonprofit organization whose |
||||
|
mission is to support the collaborative development of Odoo features and |
||||
|
promote its widespread use. |
||||
|
|
||||
|
To contribute to this module, please visit https://odoo-community.org. |
@ -0,0 +1,4 @@ |
|||||
|
# -*- coding: utf-8 -*- |
||||
|
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). |
||||
|
|
||||
|
from . import models |
@ -0,0 +1,25 @@ |
|||||
|
# -*- coding: utf-8 -*- |
||||
|
# Copyright 2016-2017 Jairo Llopis <jairo.llopis@tecnativa.com> |
||||
|
# Copyright 2016 Tecnativa - Vicent Cubells |
||||
|
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). |
||||
|
{ |
||||
|
"name": "Text from HTML field", |
||||
|
"summary": "Generate excerpts from any HTML field", |
||||
|
"version": "10.0.1.0.0", |
||||
|
"category": "Tools", |
||||
|
"website": "https://tecnativa.com", |
||||
|
"author": "Grupo ESOC Ingeniería de Servicios, " |
||||
|
"Tecnativa, " |
||||
|
"Odoo Community Association (OCA)", |
||||
|
"license": "AGPL-3", |
||||
|
"application": False, |
||||
|
"installable": True, |
||||
|
"external_dependencies": { |
||||
|
"python": [ |
||||
|
"lxml.html", |
||||
|
], |
||||
|
}, |
||||
|
"depends": [ |
||||
|
"base", |
||||
|
], |
||||
|
} |
@ -0,0 +1,24 @@ |
|||||
|
# Translation of Odoo Server. |
||||
|
# This file contains the translation of the following modules: |
||||
|
# * html_text |
||||
|
# |
||||
|
# Translators: |
||||
|
# Marc Tormo i Bochaca <mtbochaca@gmail.com>, 2017 |
||||
|
msgid "" |
||||
|
msgstr "" |
||||
|
"Project-Id-Version: Odoo Server 9.0c\n" |
||||
|
"Report-Msgid-Bugs-To: \n" |
||||
|
"POT-Creation-Date: 2017-04-19 18:00+0000\n" |
||||
|
"PO-Revision-Date: 2017-04-19 18:00+0000\n" |
||||
|
"Last-Translator: Marc Tormo i Bochaca <mtbochaca@gmail.com>, 2017\n" |
||||
|
"Language-Team: Catalan (https://www.transifex.com/oca/teams/23907/ca/)\n" |
||||
|
"MIME-Version: 1.0\n" |
||||
|
"Content-Type: text/plain; charset=UTF-8\n" |
||||
|
"Content-Transfer-Encoding: \n" |
||||
|
"Language: ca\n" |
||||
|
"Plural-Forms: nplurals=2; plural=(n != 1);\n" |
||||
|
|
||||
|
#. module: html_text |
||||
|
#: model:ir.model,name:html_text.model_ir_fields_converter |
||||
|
msgid "ir.fields.converter" |
||||
|
msgstr "ir.fields.converter" |
@ -0,0 +1,24 @@ |
|||||
|
# Translation of Odoo Server. |
||||
|
# This file contains the translation of the following modules: |
||||
|
# * html_text |
||||
|
# |
||||
|
# Translators: |
||||
|
# Rudolf Schnapka <rs@techno-flex.de>, 2017 |
||||
|
msgid "" |
||||
|
msgstr "" |
||||
|
"Project-Id-Version: Odoo Server 9.0c\n" |
||||
|
"Report-Msgid-Bugs-To: \n" |
||||
|
"POT-Creation-Date: 2017-04-19 18:00+0000\n" |
||||
|
"PO-Revision-Date: 2017-04-19 18:00+0000\n" |
||||
|
"Last-Translator: Rudolf Schnapka <rs@techno-flex.de>, 2017\n" |
||||
|
"Language-Team: German (https://www.transifex.com/oca/teams/23907/de/)\n" |
||||
|
"MIME-Version: 1.0\n" |
||||
|
"Content-Type: text/plain; charset=UTF-8\n" |
||||
|
"Content-Transfer-Encoding: \n" |
||||
|
"Language: de\n" |
||||
|
"Plural-Forms: nplurals=2; plural=(n != 1);\n" |
||||
|
|
||||
|
#. module: html_text |
||||
|
#: model:ir.model,name:html_text.model_ir_fields_converter |
||||
|
msgid "ir.fields.converter" |
||||
|
msgstr "ir.fields.converter" |
@ -0,0 +1,24 @@ |
|||||
|
# Translation of Odoo Server. |
||||
|
# This file contains the translation of the following modules: |
||||
|
# * html_text |
||||
|
# |
||||
|
# Translators: |
||||
|
# Pedro M. Baeza <pedro.baeza@gmail.com>, 2016 |
||||
|
msgid "" |
||||
|
msgstr "" |
||||
|
"Project-Id-Version: Odoo Server 9.0c\n" |
||||
|
"Report-Msgid-Bugs-To: \n" |
||||
|
"POT-Creation-Date: 2016-12-17 02:07+0000\n" |
||||
|
"PO-Revision-Date: 2016-12-17 02:07+0000\n" |
||||
|
"Last-Translator: Pedro M. Baeza <pedro.baeza@gmail.com>, 2016\n" |
||||
|
"Language-Team: Spanish (https://www.transifex.com/oca/teams/23907/es/)\n" |
||||
|
"MIME-Version: 1.0\n" |
||||
|
"Content-Type: text/plain; charset=UTF-8\n" |
||||
|
"Content-Transfer-Encoding: \n" |
||||
|
"Language: es\n" |
||||
|
"Plural-Forms: nplurals=2; plural=(n != 1);\n" |
||||
|
|
||||
|
#. module: html_text |
||||
|
#: model:ir.model,name:html_text.model_ir_fields_converter |
||||
|
msgid "ir.fields.converter" |
||||
|
msgstr "ir.fields.converter" |
@ -0,0 +1,24 @@ |
|||||
|
# Translation of Odoo Server. |
||||
|
# This file contains the translation of the following modules: |
||||
|
# * html_text |
||||
|
# |
||||
|
# Translators: |
||||
|
# Fernando Lara <gennesis45@gmail.com>, 2017 |
||||
|
msgid "" |
||||
|
msgstr "" |
||||
|
"Project-Id-Version: Odoo Server 9.0c\n" |
||||
|
"Report-Msgid-Bugs-To: \n" |
||||
|
"POT-Creation-Date: 2017-02-16 10:39+0000\n" |
||||
|
"PO-Revision-Date: 2017-02-16 10:39+0000\n" |
||||
|
"Last-Translator: Fernando Lara <gennesis45@gmail.com>, 2017\n" |
||||
|
"Language-Team: Spanish (Spain) (https://www.transifex.com/oca/teams/23907/es_ES/)\n" |
||||
|
"MIME-Version: 1.0\n" |
||||
|
"Content-Type: text/plain; charset=UTF-8\n" |
||||
|
"Content-Transfer-Encoding: \n" |
||||
|
"Language: es_ES\n" |
||||
|
"Plural-Forms: nplurals=2; plural=(n != 1);\n" |
||||
|
|
||||
|
#. module: html_text |
||||
|
#: model:ir.model,name:html_text.model_ir_fields_converter |
||||
|
msgid "ir.fields.converter" |
||||
|
msgstr "ir.documentos.conversor" |
@ -0,0 +1,24 @@ |
|||||
|
# Translation of Odoo Server. |
||||
|
# This file contains the translation of the following modules: |
||||
|
# * html_text |
||||
|
# |
||||
|
# Translators: |
||||
|
# Ahmet Altinisik <aaltinisik@altinkaya.com.tr>, 2016 |
||||
|
msgid "" |
||||
|
msgstr "" |
||||
|
"Project-Id-Version: Odoo Server 9.0c\n" |
||||
|
"Report-Msgid-Bugs-To: \n" |
||||
|
"POT-Creation-Date: 2016-12-29 03:40+0000\n" |
||||
|
"PO-Revision-Date: 2016-12-29 03:40+0000\n" |
||||
|
"Last-Translator: Ahmet Altinisik <aaltinisik@altinkaya.com.tr>, 2016\n" |
||||
|
"Language-Team: Turkish (https://www.transifex.com/oca/teams/23907/tr/)\n" |
||||
|
"MIME-Version: 1.0\n" |
||||
|
"Content-Type: text/plain; charset=UTF-8\n" |
||||
|
"Content-Transfer-Encoding: \n" |
||||
|
"Language: tr\n" |
||||
|
"Plural-Forms: nplurals=2; plural=(n > 1);\n" |
||||
|
|
||||
|
#. module: html_text |
||||
|
#: model:ir.model,name:html_text.model_ir_fields_converter |
||||
|
msgid "ir.fields.converter" |
||||
|
msgstr "ir.fields.converter" |
@ -0,0 +1,4 @@ |
|||||
|
# -*- coding: utf-8 -*- |
||||
|
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). |
||||
|
|
||||
|
from . import ir_fields_converter |
@ -0,0 +1,73 @@ |
|||||
|
# -*- coding: utf-8 -*- |
||||
|
# Copyright 2016-2017 Jairo Llopis <jairo.llopis@tecnativa.com> |
||||
|
# Copyright 2016 Tecnativa - Vicent Cubells |
||||
|
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). |
||||
|
|
||||
|
import logging |
||||
|
from lxml import etree, html |
||||
|
from odoo import api, models |
||||
|
|
||||
|
_logger = logging.getLogger(__name__) |
||||
|
|
||||
|
|
||||
|
class IrFieldsConverter(models.AbstractModel): |
||||
|
_inherit = "ir.fields.converter" |
||||
|
|
||||
|
@api.model |
||||
|
def text_from_html(self, html_content, max_words=None, max_chars=None, |
||||
|
ellipsis=u"…", fail=False): |
||||
|
"""Extract text from an HTML field in a generator. |
||||
|
|
||||
|
:param str html_content: |
||||
|
HTML contents from where to extract the text. |
||||
|
|
||||
|
:param int max_words: |
||||
|
Maximum amount of words allowed in the resulting string. |
||||
|
|
||||
|
:param int max_chars: |
||||
|
Maximum amount of characters allowed in the resulting string. If |
||||
|
you apply this limit, beware that the last word could get cut in an |
||||
|
unexpected place. |
||||
|
|
||||
|
:param str ellipsis: |
||||
|
Character(s) to be appended to the end of the resulting string if |
||||
|
it gets truncated after applying limits set in :param:`max_words` |
||||
|
or :param:`max_chars`. If you want nothing applied, just set an |
||||
|
empty string. |
||||
|
|
||||
|
:param bool fail: |
||||
|
If ``True``, exceptions will be raised. Otherwise, an empty string |
||||
|
will be returned on failure. |
||||
|
""" |
||||
|
# Parse HTML |
||||
|
try: |
||||
|
doc = html.fromstring(html_content) |
||||
|
except (TypeError, etree.XMLSyntaxError, etree.ParserError): |
||||
|
if fail: |
||||
|
raise |
||||
|
else: |
||||
|
_logger.exception("Failure parsing this HTML:\n%s", |
||||
|
html_content) |
||||
|
return "" |
||||
|
|
||||
|
# Get words |
||||
|
words = u"".join(doc.xpath("//text()")).split() |
||||
|
|
||||
|
# Truncate words |
||||
|
suffix = max_words and len(words) > max_words |
||||
|
if max_words: |
||||
|
words = words[:max_words] |
||||
|
|
||||
|
# Get text |
||||
|
text = u" ".join(words) |
||||
|
|
||||
|
# Truncate text |
||||
|
suffix = suffix or max_chars and len(text) > max_chars |
||||
|
if max_chars: |
||||
|
text = text[:max_chars - (len(ellipsis) if suffix else 0)].strip() |
||||
|
|
||||
|
# Append ellipsis if needed |
||||
|
if suffix: |
||||
|
text += ellipsis |
||||
|
|
||||
|
return text |
After Width: 128 | Height: 128 | Size: 9.2 KiB |
@ -0,0 +1,4 @@ |
|||||
|
# -*- coding: utf-8 -*- |
||||
|
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). |
||||
|
|
||||
|
from . import test_extractor |
@ -0,0 +1,63 @@ |
|||||
|
# -*- coding: utf-8 -*- |
||||
|
# Copyright 2016-2017 Jairo Llopis <jairo.llopis@tecnativa.com> |
||||
|
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). |
||||
|
|
||||
|
from lxml import etree |
||||
|
from odoo.tools import mute_logger |
||||
|
from odoo.tests.common import TransactionCase |
||||
|
from ..models import ir_fields_converter |
||||
|
|
||||
|
|
||||
|
class ExtractorCase(TransactionCase): |
||||
|
def setUp(self): |
||||
|
super(ExtractorCase, self).setUp() |
||||
|
# Shortcut |
||||
|
self.text_from_html = self.env["ir.fields.converter"].text_from_html |
||||
|
|
||||
|
def test_excerpts(self): |
||||
|
"""Text gets correctly extracted.""" |
||||
|
html = u""" |
||||
|
<html> |
||||
|
<body> |
||||
|
<div class="this should not appear"> |
||||
|
<h1>I'm a title</h1> |
||||
|
<p>I'm a paragraph</p> |
||||
|
<small>¡Pues yo soy español!</small> |
||||
|
</div> |
||||
|
</body> |
||||
|
</html> |
||||
|
""" |
||||
|
|
||||
|
self.assertEqual( |
||||
|
self.text_from_html(html), |
||||
|
u"I'm a title I'm a paragraph ¡Pues yo soy español!") |
||||
|
self.assertEqual( |
||||
|
self.text_from_html(html, 8), |
||||
|
u"I'm a title I'm a paragraph ¡Pues yo…") |
||||
|
self.assertEqual( |
||||
|
self.text_from_html(html, 8, 31), |
||||
|
u"I'm a title I'm a paragraph ¡P…") |
||||
|
self.assertEqual( |
||||
|
self.text_from_html(html, 7, ellipsis=""), |
||||
|
u"I'm a title I'm a paragraph ¡Pues") |
||||
|
|
||||
|
@mute_logger(ir_fields_converter.__name__) |
||||
|
def test_empty_html(self): |
||||
|
"""Empty HTML handled correctly.""" |
||||
|
self.assertEqual(self.text_from_html(""), "") |
||||
|
with self.assertRaises(etree.XMLSyntaxError): |
||||
|
self.text_from_html("", fail=True) |
||||
|
|
||||
|
@mute_logger(ir_fields_converter.__name__) |
||||
|
def test_false_html(self): |
||||
|
"""``False`` HTML handled correctly.""" |
||||
|
self.assertEqual(self.text_from_html(False), "") |
||||
|
with self.assertRaises(TypeError): |
||||
|
self.text_from_html(False, fail=True) |
||||
|
|
||||
|
@mute_logger(ir_fields_converter.__name__) |
||||
|
def test_bad_html(self): |
||||
|
"""Bad HTML handled correctly.""" |
||||
|
self.assertEqual(self.text_from_html("<<bad>"), "") |
||||
|
with self.assertRaises(etree.ParserError): |
||||
|
self.text_from_html("<<bad>", fail=True) |
Write
Preview
Loading…
Cancel
Save
Reference in new issue