From 18ece10e07cc3b83c05b6d05921f592d70e802ce Mon Sep 17 00:00:00 2001 From: Yajo Date: Mon, 4 Apr 2016 16:58:49 +0200 Subject: [PATCH 1/5] Excerpt generator. * [8.0][html_text] Excerpt generator. This module adds a technical utility to allow the developer to get an excerpt from any HTML chunk. You can choose the maximum amount of words or characters if you want. See the README and inline docstrings for further details. --- html_text/README.rst | 82 ++++++++++++++++++++++++ html_text/__init__.py | 5 ++ html_text/__openerp__.py | 23 +++++++ html_text/models/__init__.py | 5 ++ html_text/models/ir_fields_converter.py | 72 +++++++++++++++++++++ html_text/static/description/icon.png | Bin 0 -> 9455 bytes html_text/tests/__init__.py | 5 ++ html_text/tests/test_extractor.py | 53 +++++++++++++++ 8 files changed, 245 insertions(+) create mode 100644 html_text/README.rst create mode 100644 html_text/__init__.py create mode 100644 html_text/__openerp__.py create mode 100644 html_text/models/__init__.py create mode 100644 html_text/models/ir_fields_converter.py create mode 100644 html_text/static/description/icon.png create mode 100644 html_text/tests/__init__.py create mode 100644 html_text/tests/test_extractor.py diff --git a/html_text/README.rst b/html_text/README.rst new file mode 100644 index 000000000..38eac30af --- /dev/null +++ b/html_text/README.rst @@ -0,0 +1,82 @@ +.. image:: https://img.shields.io/badge/licence-AGPL--3-blue.svg + :target: http://www.gnu.org/licenses/agpl-3.0-standalone.html + :alt: License: AGPL-3 + +==================== +Text from HTML field +==================== + +This module provides some technical features that allow to extract text from +any chunk of HTML, without HTML tags or attributes. You can chose either: + +* To truncate the result by amount of words or characters. +* To append an ellipsis (or any character(s)) at the end of the result. + +It can be used to easily generate excerpts. + +Usage +===== + +This module just adds a technical utility, but nothing for the end user. + +If you are a developer and need this utility for your module, see these +examples and read the docs inside the code. + +Python example:: + + @api.multi + def some_method(self): + # Get truncated text from an HTML field. It will 40 words and 100 + # characters at most, and will have "..." appended at the end if it + # gets truncated. + truncated_text = self.env["ir.fields.converter"].text_from_html( + self.html_field, 40, 100, "...") + +QWeb example:: + + + +.. image:: https://odoo-community.org/website/image/ir.attachment/5784_f2813bd/datas + :alt: Try me on Runbot + :target: https://runbot.odoo-community.org/runbot/149/8.0 + +Known issues / Roadmap +====================== + +* An option could be added to try to respect the basic HTML tags inside the + excerpt (````, ````, ``

``, etc.). + +Bug Tracker +=========== + +Bugs are tracked on `GitHub Issues +`_. In case of trouble, please +check there if your issue has already been reported. If you spotted it first, +help us smashing it by providing a detailed and welcomed `feedback +`_. + +Credits +======= + +Contributors +------------ + +* Jairo Llopis + +Maintainer +---------- + +.. image:: https://odoo-community.org/logo.png + :alt: Odoo Community Association + :target: https://odoo-community.org + +This module is maintained by the OCA. + +OCA, or the Odoo Community Association, is a nonprofit organization whose +mission is to support the collaborative development of Odoo features and +promote its widespread use. + +To contribute to this module, please visit https://odoo-community.org. diff --git a/html_text/__init__.py b/html_text/__init__.py new file mode 100644 index 000000000..09356048f --- /dev/null +++ b/html_text/__init__.py @@ -0,0 +1,5 @@ +# -*- coding: utf-8 -*- +# © 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis +# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). + +from . import models diff --git a/html_text/__openerp__.py b/html_text/__openerp__.py new file mode 100644 index 000000000..07f931ee8 --- /dev/null +++ b/html_text/__openerp__.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- +# © 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis +# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). +{ + "name": "Text from HTML field", + "summary": "Generate excerpts from any HTML field", + "version": "8.0.1.0.0", + "category": "Tools", + "website": "https://grupoesoc.es", + "author": "Grupo ESOC Ingeniería de Servicios, " + "Odoo Community Association (OCA)", + "license": "AGPL-3", + "application": False, + "installable": True, + "external_dependencies": { + "python": [ + "lxml.html", + ], + }, + "depends": [ + "base", + ], +} diff --git a/html_text/models/__init__.py b/html_text/models/__init__.py new file mode 100644 index 000000000..11b8b13d5 --- /dev/null +++ b/html_text/models/__init__.py @@ -0,0 +1,5 @@ +# -*- coding: utf-8 -*- +# © 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis +# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). + +from . import ir_fields_converter diff --git a/html_text/models/ir_fields_converter.py b/html_text/models/ir_fields_converter.py new file mode 100644 index 000000000..503b2c7c7 --- /dev/null +++ b/html_text/models/ir_fields_converter.py @@ -0,0 +1,72 @@ +# -*- coding: utf-8 -*- +# © 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis +# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). + +import logging +from lxml import etree, html +from openerp import api, models + +_logger = logging.getLogger(__name__) + + +class IrFieldsConverter(models.Model): + _inherit = "ir.fields.converter" + + @api.model + def text_from_html(self, html_content, max_words=None, max_chars=None, + ellipsis=u"…", fail=False): + """Extract text from an HTML field in a generator. + + :param str html_content: + HTML contents from where to extract the text. + + :param int max_words: + Maximum amount of words allowed in the resulting string. + + :param int max_chars: + Maximum amount of characters allowed in the resulting string. If + you apply this limit, beware that the last word could get cut in an + unexpected place. + + :param str ellipsis: + Character(s) to be appended to the end of the resulting string if + it gets truncated after applying limits set in :param:`max_words` + or :param:`max_chars`. If you want nothing applied, just set an + empty string. + + :param bool fail: + If ``True``, exceptions will be raised. Otherwise, an empty string + will be returned on failure. + """ + # Parse HTML + try: + doc = html.fromstring(html_content) + except (etree.XMLSyntaxError, etree.ParserError): + if fail: + raise + else: + _logger.exception("Failure parsing this HTML:\n%s", + html_content) + return "" + + # Get words + words = u"".join(doc.xpath("//text()")).split() + + # Truncate words + suffix = max_words and len(words) > max_words + if max_words: + words = words[:max_words] + + # Get text + text = u" ".join(words) + + # Truncate text + suffix = suffix or max_chars and len(text) > max_chars + if max_chars: + text = text[:max_chars - (len(ellipsis) if suffix else 0)].strip() + + # Append ellipsis if needed + if suffix: + text += ellipsis + + return text diff --git a/html_text/static/description/icon.png b/html_text/static/description/icon.png new file mode 100644 index 0000000000000000000000000000000000000000..3a0328b516c4980e8e44cdb63fd945757ddd132d GIT binary patch literal 9455 zcmW++2RxMjAAjx~&dlBk9S+%}OXg)AGE&Cb*&}d0jUxM@u(PQx^-s)697TX`ehR4?GS^qbkof1cslKgkU)h65qZ9Oc=ml_0temigYLJfnz{IDzUf>bGs4N!v3=Z3jMq&A#7%rM5eQ#dc?k~! zVpnB`o+K7|Al`Q_U;eD$B zfJtP*jH`siUq~{KE)`jP2|#TUEFGRryE2`i0**z#*^6~AI|YzIWy$Cu#CSLW3q=GA z6`?GZymC;dCPk~rBS%eCb`5OLr;RUZ;D`}um=H)BfVIq%7VhiMr)_#G0N#zrNH|__ zc+blN2UAB0=617@>_u;MPHN;P;N#YoE=)R#i$k_`UAA>WWCcEVMh~L_ zj--gtp&|K1#58Yz*AHCTMziU1Jzt_jG0I@qAOHsk$2}yTmVkBp_eHuY$A9)>P6o~I z%aQ?!(GqeQ-Y+b0I(m9pwgi(IIZZzsbMv+9w{PFtd_<_(LA~0H(xz{=FhLB@(1&qHA5EJw1>>=%q2f&^X>IQ{!GJ4e9U z&KlB)z(84HmNgm2hg2C0>WM{E(DdPr+EeU_N@57;PC2&DmGFW_9kP&%?X4}+xWi)( z;)z%wI5>D4a*5XwD)P--sPkoY(a~WBw;E~AW`Yue4kFa^LM3X`8x|}ZUeMnqr}>kH zG%WWW>3ml$Yez?i%)2pbKPI7?5o?hydokgQyZsNEr{a|mLdt;X2TX(#B1j35xPnPW z*bMSSOauW>o;*=kO8ojw91VX!qoOQb)zHJ!odWB}d+*K?#sY_jqPdg{Sm2HdYzdEx zOGVPhVRTGPtv0o}RfVP;Nd(|CB)I;*t&QO8h zFfekr30S!-LHmV_Su-W+rEwYXJ^;6&3|L$mMC8*bQptyOo9;>Qb9Q9`ySe3%V$A*9 zeKEe+b0{#KWGp$F+tga)0RtI)nhMa-K@JS}2krK~n8vJ=Ngm?R!9G<~RyuU0d?nz# z-5EK$o(!F?hmX*2Yt6+coY`6jGbb7tF#6nHA zuKk=GGJ;ZwON1iAfG$E#Y7MnZVmrY|j0eVI(DN_MNFJmyZ|;w4tf@=CCDZ#5N_0K= z$;R~bbk?}TpfDjfB&aiQ$VA}s?P}xPERJG{kxk5~R`iRS(SK5d+Xs9swCozZISbnS zk!)I0>t=A<-^z(cmSFz3=jZ23u13X><0b)P)^1T_))Kr`e!-pb#q&J*Q`p+B6la%C zuVl&0duN<;uOsB3%T9Fp8t{ED108<+W(nOZd?gDnfNBC3>M8WE61$So|P zVvqH0SNtDTcsUdzaMDpT=Ty0pDHHNL@Z0w$Y`XO z2M-_r1S+GaH%pz#Uy0*w$Vdl=X=rQXEzO}d6J^R6zjM1u&c9vYLvLp?W7w(?np9x1 zE_0JSAJCPB%i7p*Wvg)pn5T`8k3-uR?*NT|J`eS#_#54p>!p(mLDvmc-3o0mX*mp_ zN*AeS<>#^-{S%W<*mz^!X$w_2dHWpcJ6^j64qFBft-o}o_Vx80o0>}Du;>kLts;$8 zC`7q$QI(dKYG`Wa8#wl@V4jVWBRGQ@1dr-hstpQL)Tl+aqVpGpbSfN>5i&QMXfiZ> zaA?T1VGe?rpQ@;+pkrVdd{klI&jVS@I5_iz!=UMpTsa~mBga?1r}aRBm1WS;TT*s0f0lY=JBl66Upy)-k4J}lh=P^8(SXk~0xW=T9v*B|gzIhN z>qsO7dFd~mgxAy4V?&)=5ieYq?zi?ZEoj)&2o)RLy=@hbCRcfT5jigwtQGE{L*8<@Yd{zg;CsL5mvzfDY}P-wos_6PfprFVaeqNE%h zKZhLtcQld;ZD+>=nqN~>GvROfueSzJD&BE*}XfU|H&(FssBqY=hPCt`d zH?@s2>I(|;fcW&YM6#V#!kUIP8$Nkdh0A(bEVj``-AAyYgwY~jB zT|I7Bf@%;7aL7Wf4dZ%VqF$eiaC38OV6oy3Z#TER2G+fOCd9Iaoy6aLYbPTN{XRPz z;U!V|vBf%H!}52L2gH_+j;`bTcQRXB+y9onc^wLm5wi3-Be}U>k_u>2Eg$=k!(l@I zcCg+flakT2Nej3i0yn+g+}%NYb?ta;R?(g5SnwsQ49U8Wng8d|{B+lyRcEDvR3+`O{zfmrmvFrL6acVP%yG98X zo&+VBg@px@i)%o?dG(`T;n*$S5*rnyiR#=wW}}GsAcfyQpE|>a{=$Hjg=-*_K;UtD z#z-)AXwSRY?OPefw^iI+ z)AXz#PfEjlwTes|_{sB?4(O@fg0AJ^g8gP}ex9Ucf*@_^J(s_5jJV}c)s$`Myn|Kd z$6>}#q^n{4vN@+Os$m7KV+`}c%4)4pv@06af4-x5#wj!KKb%caK{A&Y#Rfs z-po?Dcb1({W=6FKIUirH&(yg=*6aLCekcKwyfK^JN5{wcA3nhO(o}SK#!CINhI`-I z1)6&n7O&ZmyFMuNwvEic#IiOAwNkR=u5it{B9n2sAJV5pNhar=j5`*N!Na;c7g!l$ z3aYBqUkqqTJ=Re-;)s!EOeij=7SQZ3Hq}ZRds%IM*PtM$wV z@;rlc*NRK7i3y5BETSKuumEN`Xu_8GP1Ri=OKQ$@I^ko8>H6)4rjiG5{VBM>B|%`&&s^)jS|-_95&yc=GqjNo{zFkw%%HHhS~e=s zD#sfS+-?*t|J!+ozP6KvtOl!R)@@-z24}`9{QaVLD^9VCSR2b`b!KC#o;Ki<+wXB6 zx3&O0LOWcg4&rv4QG0)4yb}7BFSEg~=IR5#ZRj8kg}dS7_V&^%#Do==#`u zpy6{ox?jWuR(;pg+f@mT>#HGWHAJRRDDDv~@(IDw&R>9643kK#HN`!1vBJHnC+RM&yIh8{gG2q zA%e*U3|N0XSRa~oX-3EAneep)@{h2vvd3Xvy$7og(sayr@95+e6~Xvi1tUqnIxoIH zVWo*OwYElb#uyW{Imam6f2rGbjR!Y3`#gPqkv57dB6K^wRGxc9B(t|aYDGS=m$&S!NmCtrMMaUg(c zc2qC=2Z`EEFMW-me5B)24AqF*bV5Dr-M5ig(l-WPS%CgaPzs6p_gnCIvTJ=Y<6!gT zVt@AfYCzjjsMEGi=rDQHo0yc;HqoRNnNFeWZgcm?f;cp(6CNylj36DoL(?TS7eU#+ z7&mfr#y))+CJOXQKUMZ7QIdS9@#-}7y2K1{8)cCt0~-X0O!O?Qx#E4Og+;A2SjalQ zs7r?qn0H044=sDN$SRG$arw~n=+T_DNdSrarmu)V6@|?1-ZB#hRn`uilTGPJ@fqEy zGt(f0B+^JDP&f=r{#Y_wi#AVDf-y!RIXU^0jXsFpf>=Ji*TeqSY!H~AMbJdCGLhC) zn7Rx+sXw6uYj;WRYrLd^5IZq@6JI1C^YkgnedZEYy<&4(z%Q$5yv#Boo{AH8n$a zhb4Y3PWdr269&?V%uI$xMcUrMzl=;w<_nm*qr=c3Rl@i5wWB;e-`t7D&c-mcQl7x! zZWB`UGcw=Y2=}~wzrfLx=uet<;m3~=8I~ZRuzvMQUQdr+yTV|ATf1Uuomr__nDf=X zZ3WYJtHp_ri(}SQAPjv+Y+0=fH4krOP@S&=zZ-t1jW1o@}z;xk8 z(Nz1co&El^HK^NrhVHa-_;&88vTU>_J33=%{if;BEY*J#1n59=07jrGQ#IP>@u#3A z;!q+E1Rj3ZJ+!4bq9F8PXJ@yMgZL;>&gYA0%_Kbi8?S=XGM~dnQZQ!yBSgcZhY96H zrWnU;k)qy`rX&&xlDyA%(a1Hhi5CWkmg(`Gb%m(HKi-7Z!LKGRP_B8@`7&hdDy5n= z`OIxqxiVfX@OX1p(mQu>0Ai*v_cTMiw4qRt3~NBvr9oBy0)r>w3p~V0SCm=An6@3n)>@z!|o-$HvDK z|3D2ZMJkLE5loMKl6R^ez@Zz%S$&mbeoqH5`Bb){Ei21q&VP)hWS2tjShfFtGE+$z zzCR$P#uktu+#!w)cX!lWN1XU%K-r=s{|j?)Akf@q#3b#{6cZCuJ~gCxuMXRmI$nGtnH+-h z+GEi!*X=AP<|fG`1>MBdTb?28JYc=fGvAi2I<$B(rs$;eoJCyR6_bc~p!XR@O-+sD z=eH`-ye})I5ic1eL~TDmtfJ|8`0VJ*Yr=hNCd)G1p2MMz4C3^Mj?7;!w|Ly%JqmuW zlIEW^Ft%z?*|fpXda>Jr^1noFZEwFgVV%|*XhH@acv8rdGxeEX{M$(vG{Zw+x(ei@ zmfXb22}8-?Fi`vo-YVrTH*C?a8%M=Hv9MqVH7H^J$KsD?>!SFZ;ZsvnHr_gn=7acz z#W?0eCdVhVMWN12VV^$>WlQ?f;P^{(&pYTops|btm6aj>_Uz+hqpGwB)vWp0Cf5y< zft8-je~nn?W11plq}N)4A{l8I7$!ks_x$PXW-2XaRFswX_BnF{R#6YIwMhAgd5F9X zGmwdadS6(a^fjHtXg8=l?Rc0Sm%hk6E9!5cLVloEy4eh(=FwgP`)~I^5~pBEWo+F6 zSf2ncyMurJN91#cJTy_u8Y}@%!bq1RkGC~-bV@SXRd4F{R-*V`bS+6;W5vZ(&+I<9$;-V|eNfLa5n-6% z2(}&uGRF;p92eS*sE*oR$@pexaqr*meB)VhmIg@h{uzkk$9~qh#cHhw#>O%)b@+(| z^IQgqzuj~Sk(J;swEM-3TrJAPCq9k^^^`q{IItKBRXYe}e0Tdr=Huf7da3$l4PdpwWDop%^}n;dD#K4s#DYA8SHZ z&1!riV4W4R7R#C))JH1~axJ)RYnM$$lIR%6fIVA@zV{XVyx}C+a-Dt8Y9M)^KU0+H zR4IUb2CJ{Hg>CuaXtD50jB(_Tcx=Z$^WYu2u5kubqmwp%drJ6 z?Fo40g!Qd<-l=TQxqHEOuPX0;^z7iX?Ke^a%XT<13TA^5`4Xcw6D@Ur&VT&CUe0d} z1GjOVF1^L@>O)l@?bD~$wzgf(nxX1OGD8fEV?TdJcZc2KoUe|oP1#=$$7ee|xbY)A zDZq+cuTpc(fFdj^=!;{k03C69lMQ(|>uhRfRu%+!k&YOi-3|1QKB z z?n?eq1XP>p-IM$Z^C;2L3itnbJZAip*Zo0aw2bs8@(s^~*8T9go!%dHcAz2lM;`yp zD=7&xjFV$S&5uDaiScyD?B-i1ze`+CoRtz`Wn+Zl&#s4&}MO{@N!ufrzjG$B79)Y2d3tBk&)TxUTw@QS0TEL_?njX|@vq?Uz(nBFK5Pq7*xj#u*R&i|?7+6# z+|r_n#SW&LXhtheZdah{ZVoqwyT{D>MC3nkFF#N)xLi{p7J1jXlmVeb;cP5?e(=f# zuT7fvjSbjS781v?7{)-X3*?>tq?)Yd)~|1{BDS(pqC zC}~H#WXlkUW*H5CDOo<)#x7%RY)A;ShGhI5s*#cRDA8YgqG(HeKDx+#(ZQ?386dv! zlXCO)w91~Vw4AmOcATuV653fa9R$fyK8ul%rG z-wfS zihugoZyr38Im?Zuh6@RcF~t1anQu7>#lPpb#}4cOA!EM11`%f*07RqOVkmX{p~KJ9 z^zP;K#|)$`^Rb{rnHGH{~>1(fawV0*Z#)}M`m8-?ZJV<+e}s9wE# z)l&az?w^5{)`S(%MRzxdNqrs1n*-=jS^_jqE*5XDrA0+VE`5^*p3CuM<&dZEeCjoz zR;uu_H9ZPZV|fQq`Cyw4nscrVwi!fE6ciMmX$!_hN7uF;jjKG)d2@aC4ropY)8etW=xJvni)8eHi`H$%#zn^WJ5NLc-rqk|u&&4Z6fD_m&JfSI1Bvb?b<*n&sfl0^t z=HnmRl`XrFvMKB%9}>PaA`m-fK6a0(8=qPkWS5bb4=v?XcWi&hRY?O5HdulRi4?fN zlsJ*N-0Qw+Yic@s0(2uy%F@ib;GjXt01Fmx5XbRo6+n|pP(&nodMoap^z{~q ziEeaUT@Mxe3vJSfI6?uLND(CNr=#^W<1b}jzW58bIfyWTDle$mmS(|x-0|2UlX+9k zQ^EX7Nw}?EzVoBfT(-LT|=9N@^hcn-_p&sqG z&*oVs2JSU+N4ZD`FhCAWaS;>|wH2G*Id|?pa#@>tyxX`+4HyIArWDvVrX)2WAOQff z0qyHu&-S@i^MS-+j--!pr4fPBj~_8({~e1bfcl0wI1kaoN>mJL6KUPQm5N7lB(ui1 zE-o%kq)&djzWJ}ob<-GfDlkB;F31j-VHKvQUGQ3sp`CwyGJk_i!y^sD0fqC@$9|jO zOqN!r!8-p==F@ZVP=U$qSpY(gQ0)59P1&t@y?5rvg<}E+GB}26NYPp4f2YFQrQtot5mn3wu_qprZ=>Ig-$ zbW26Ws~IgY>}^5w`vTB(G`PTZaDiGBo5o(tp)qli|NeV( z@H_=R8V39rt5J5YB2Ky?4eJJ#b`_iBe2ot~6%7mLt5t8Vwi^Jy7|jWXqa3amOIoRb zOr}WVFP--DsS`1WpN%~)t3R!arKF^Q$e12KEqU36AWwnCBICpH4XCsfnyrHr>$I$4 z!DpKX$OKLWarN7nv@!uIA+~RNO)l$$w}p(;b>mx8pwYvu;dD_unryX_NhT8*Tj>BTrTTL&!?O+%Rv;b?B??gSzdp?6Uug9{ zd@V08Z$BdI?fpoCS$)t4mg4rT8Q_I}h`0d-vYZ^|dOB*Q^S|xqTV*vIg?@fVFSmMpaw0qtTRbx} z({Pg?#{2`sc9)M5N$*N|4;^t$+QP?#mov zGVC@I*lBVrOU-%2y!7%)fAKjpEFsgQc4{amtiHb95KQEwvf<(3T<9-Zm$xIew#P22 zc2Ix|App^>v6(3L_MCU0d3W##AB0M~3D00EWoKZqsJYT(#@w$Y_H7G22M~ApVFTRHMI_3be)Lkn#0F*V8Pq zc}`Cjy$bE;FJ6H7p=0y#R>`}-m4(0F>%@P|?7fx{=R^uFdISRnZ2W_xQhD{YuR3t< z{6yxu=4~JkeA;|(J6_nv#>Nvs&FuLA&PW^he@t(UwFFE8)|a!R{`E`K`i^ZnyE4$k z;(749Ix|oi$c3QbEJ3b~D_kQsPz~fIUKym($a_7dJ?o+40*OLl^{=&oq$<#Q(yyrp z{J-FAniyAw9tPbe&IhQ|a`DqFTVQGQ&Gq3!C2==4x{6EJwiPZ8zub-iXoUtkJiG{} zPaR&}_fn8_z~(=;5lD-aPWD3z8PZS@AaUiomF!G8I}Mf>e~0g#BelA-5#`cj;O5>N Xviia!U7SGha1wx#SCgwmn*{w2TRX*I literal 0 HcmV?d00001 diff --git a/html_text/tests/__init__.py b/html_text/tests/__init__.py new file mode 100644 index 000000000..60346a281 --- /dev/null +++ b/html_text/tests/__init__.py @@ -0,0 +1,5 @@ +# -*- coding: utf-8 -*- +# © 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis +# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). + +from . import test_extractor diff --git a/html_text/tests/test_extractor.py b/html_text/tests/test_extractor.py new file mode 100644 index 000000000..f7d0843d3 --- /dev/null +++ b/html_text/tests/test_extractor.py @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- +# © 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis +# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). + +from lxml import etree +from openerp.tests.common import TransactionCase + + +class ExtractorCase(TransactionCase): + def setUp(self): + super(ExtractorCase, self).setUp() + + # Shortcut + self.text_from_html = self.env["ir.fields.converter"].text_from_html + + def test_excerpts(self): + """Text gets correctly extracted.""" + html = u""" + + +

+

I'm a title

+

I'm a paragraph

+ ¡Pues yo soy español! +
+ + + """ + + self.assertEqual( + self.text_from_html(html), + u"I'm a title I'm a paragraph ¡Pues yo soy español!") + self.assertEqual( + self.text_from_html(html, 8), + u"I'm a title I'm a paragraph ¡Pues yo…") + self.assertEqual( + self.text_from_html(html, 8, 31), + u"I'm a title I'm a paragraph ¡P…") + self.assertEqual( + self.text_from_html(html, 7, ellipsis=""), + u"I'm a title I'm a paragraph ¡Pues") + + def test_empty_html(self): + """Empty HTML handled correctly.""" + self.assertEqual(self.text_from_html(""), "") + with self.assertRaises(etree.XMLSyntaxError): + self.text_from_html("", fail=True) + + def test_bad_html(self): + """Bad HTML handled correctly.""" + self.assertEqual(self.text_from_html("<"), "") + with self.assertRaises(etree.ParserError): + self.text_from_html("<", fail=True) From 0e3713583c732ced288e3e7677c7094c6eaacb19 Mon Sep 17 00:00:00 2001 From: Yajo Date: Tue, 5 Apr 2016 16:58:19 +0200 Subject: [PATCH 2/5] Do not die miserably when you get `False` as the HTML input. Test it. --- html_text/models/ir_fields_converter.py | 2 +- html_text/tests/test_extractor.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/html_text/models/ir_fields_converter.py b/html_text/models/ir_fields_converter.py index 503b2c7c7..1ad3572ea 100644 --- a/html_text/models/ir_fields_converter.py +++ b/html_text/models/ir_fields_converter.py @@ -41,7 +41,7 @@ class IrFieldsConverter(models.Model): # Parse HTML try: doc = html.fromstring(html_content) - except (etree.XMLSyntaxError, etree.ParserError): + except (TypeError, etree.XMLSyntaxError, etree.ParserError): if fail: raise else: diff --git a/html_text/tests/test_extractor.py b/html_text/tests/test_extractor.py index f7d0843d3..22443e32a 100644 --- a/html_text/tests/test_extractor.py +++ b/html_text/tests/test_extractor.py @@ -46,6 +46,12 @@ class ExtractorCase(TransactionCase): with self.assertRaises(etree.XMLSyntaxError): self.text_from_html("", fail=True) + def test_false_html(self): + """``False`` HTML handled correctly.""" + self.assertEqual(self.text_from_html(False), "") + with self.assertRaises(TypeError): + self.text_from_html(False, fail=True) + def test_bad_html(self): """Bad HTML handled correctly.""" self.assertEqual(self.text_from_html("<"), "") From c6401dd11d70d75f255b6cd97daf59ef279f9f30 Mon Sep 17 00:00:00 2001 From: cubells Date: Mon, 28 Nov 2016 11:12:30 +0100 Subject: [PATCH 3/5] html_text * Updated README.rst --- html_text/README.rst | 9 +++------ html_text/__init__.py | 3 ++- html_text/__openerp__.py | 8 +++++--- html_text/models/__init__.py | 3 ++- html_text/models/ir_fields_converter.py | 3 ++- 5 files changed, 14 insertions(+), 12 deletions(-) diff --git a/html_text/README.rst b/html_text/README.rst index 38eac30af..52c7755be 100644 --- a/html_text/README.rst +++ b/html_text/README.rst @@ -38,7 +38,7 @@ QWeb example:: .. image:: https://odoo-community.org/website/image/ir.attachment/5784_f2813bd/datas :alt: Try me on Runbot - :target: https://runbot.odoo-community.org/runbot/149/8.0 + :target: https://runbot.odoo-community.org/runbot/149/9.0 Known issues / Roadmap ====================== @@ -52,11 +52,7 @@ Bug Tracker Bugs are tracked on `GitHub Issues `_. In case of trouble, please check there if your issue has already been reported. If you spotted it first, -help us smashing it by providing a detailed and welcomed `feedback -`_. +help us smashing it by providing a detailed and welcomed feedback. Credits ======= @@ -65,6 +61,7 @@ Contributors ------------ * Jairo Llopis +* Vicent Cubells Maintainer ---------- diff --git a/html_text/__init__.py b/html_text/__init__.py index 09356048f..197214cdd 100644 --- a/html_text/__init__.py +++ b/html_text/__init__.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- -# © 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis +# Copyright 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis +# Copyright 2016 Tecnativa - Vicent Cubells # License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). from . import models diff --git a/html_text/__openerp__.py b/html_text/__openerp__.py index 07f931ee8..173d737a0 100644 --- a/html_text/__openerp__.py +++ b/html_text/__openerp__.py @@ -1,13 +1,15 @@ # -*- coding: utf-8 -*- -# © 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis +# Copyright 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis +# Copyright 2016 Tecnativa - Vicent Cubells # License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). { "name": "Text from HTML field", "summary": "Generate excerpts from any HTML field", - "version": "8.0.1.0.0", + "version": "9.0.1.0.0", "category": "Tools", - "website": "https://grupoesoc.es", + "website": "https://tecnativa.com", "author": "Grupo ESOC Ingeniería de Servicios, " + "Tecnativa, " "Odoo Community Association (OCA)", "license": "AGPL-3", "application": False, diff --git a/html_text/models/__init__.py b/html_text/models/__init__.py index 11b8b13d5..5746f8b6b 100644 --- a/html_text/models/__init__.py +++ b/html_text/models/__init__.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- -# © 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis +# Copyright 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis +# Copyright 2016 Tecnativa - Vicent Cubells # License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). from . import ir_fields_converter diff --git a/html_text/models/ir_fields_converter.py b/html_text/models/ir_fields_converter.py index 1ad3572ea..5180665f0 100644 --- a/html_text/models/ir_fields_converter.py +++ b/html_text/models/ir_fields_converter.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- -# © 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis +# Copyright 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis +# Copyright 2016 Tecnativa - Vicent Cubells # License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). import logging From 1514640e7473fcc793069f7a47a94b36944f3ac0 Mon Sep 17 00:00:00 2001 From: OCA Transbot Date: Sat, 17 Dec 2016 00:20:53 -0500 Subject: [PATCH 4/5] OCA Transbot updated translations from Transifex --- html_text/i18n/ca.po | 24 ++++++++++++++++++++++++ html_text/i18n/de.po | 24 ++++++++++++++++++++++++ html_text/i18n/es.po | 24 ++++++++++++++++++++++++ html_text/i18n/es_ES.po | 24 ++++++++++++++++++++++++ html_text/i18n/tr.po | 24 ++++++++++++++++++++++++ 5 files changed, 120 insertions(+) create mode 100644 html_text/i18n/ca.po create mode 100644 html_text/i18n/de.po create mode 100644 html_text/i18n/es.po create mode 100644 html_text/i18n/es_ES.po create mode 100644 html_text/i18n/tr.po diff --git a/html_text/i18n/ca.po b/html_text/i18n/ca.po new file mode 100644 index 000000000..218f75161 --- /dev/null +++ b/html_text/i18n/ca.po @@ -0,0 +1,24 @@ +# Translation of Odoo Server. +# This file contains the translation of the following modules: +# * html_text +# +# Translators: +# Marc Tormo i Bochaca , 2017 +msgid "" +msgstr "" +"Project-Id-Version: Odoo Server 9.0c\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2017-04-19 18:00+0000\n" +"PO-Revision-Date: 2017-04-19 18:00+0000\n" +"Last-Translator: Marc Tormo i Bochaca , 2017\n" +"Language-Team: Catalan (https://www.transifex.com/oca/teams/23907/ca/)\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: \n" +"Language: ca\n" +"Plural-Forms: nplurals=2; plural=(n != 1);\n" + +#. module: html_text +#: model:ir.model,name:html_text.model_ir_fields_converter +msgid "ir.fields.converter" +msgstr "ir.fields.converter" diff --git a/html_text/i18n/de.po b/html_text/i18n/de.po new file mode 100644 index 000000000..b9f4c6287 --- /dev/null +++ b/html_text/i18n/de.po @@ -0,0 +1,24 @@ +# Translation of Odoo Server. +# This file contains the translation of the following modules: +# * html_text +# +# Translators: +# Rudolf Schnapka , 2017 +msgid "" +msgstr "" +"Project-Id-Version: Odoo Server 9.0c\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2017-04-19 18:00+0000\n" +"PO-Revision-Date: 2017-04-19 18:00+0000\n" +"Last-Translator: Rudolf Schnapka , 2017\n" +"Language-Team: German (https://www.transifex.com/oca/teams/23907/de/)\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: \n" +"Language: de\n" +"Plural-Forms: nplurals=2; plural=(n != 1);\n" + +#. module: html_text +#: model:ir.model,name:html_text.model_ir_fields_converter +msgid "ir.fields.converter" +msgstr "ir.fields.converter" diff --git a/html_text/i18n/es.po b/html_text/i18n/es.po new file mode 100644 index 000000000..0a08e7b0f --- /dev/null +++ b/html_text/i18n/es.po @@ -0,0 +1,24 @@ +# Translation of Odoo Server. +# This file contains the translation of the following modules: +# * html_text +# +# Translators: +# Pedro M. Baeza , 2016 +msgid "" +msgstr "" +"Project-Id-Version: Odoo Server 9.0c\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2016-12-17 02:07+0000\n" +"PO-Revision-Date: 2016-12-17 02:07+0000\n" +"Last-Translator: Pedro M. Baeza , 2016\n" +"Language-Team: Spanish (https://www.transifex.com/oca/teams/23907/es/)\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: \n" +"Language: es\n" +"Plural-Forms: nplurals=2; plural=(n != 1);\n" + +#. module: html_text +#: model:ir.model,name:html_text.model_ir_fields_converter +msgid "ir.fields.converter" +msgstr "ir.fields.converter" diff --git a/html_text/i18n/es_ES.po b/html_text/i18n/es_ES.po new file mode 100644 index 000000000..0394084e0 --- /dev/null +++ b/html_text/i18n/es_ES.po @@ -0,0 +1,24 @@ +# Translation of Odoo Server. +# This file contains the translation of the following modules: +# * html_text +# +# Translators: +# Fernando Lara , 2017 +msgid "" +msgstr "" +"Project-Id-Version: Odoo Server 9.0c\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2017-02-16 10:39+0000\n" +"PO-Revision-Date: 2017-02-16 10:39+0000\n" +"Last-Translator: Fernando Lara , 2017\n" +"Language-Team: Spanish (Spain) (https://www.transifex.com/oca/teams/23907/es_ES/)\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: \n" +"Language: es_ES\n" +"Plural-Forms: nplurals=2; plural=(n != 1);\n" + +#. module: html_text +#: model:ir.model,name:html_text.model_ir_fields_converter +msgid "ir.fields.converter" +msgstr "ir.documentos.conversor" diff --git a/html_text/i18n/tr.po b/html_text/i18n/tr.po new file mode 100644 index 000000000..3531df8fb --- /dev/null +++ b/html_text/i18n/tr.po @@ -0,0 +1,24 @@ +# Translation of Odoo Server. +# This file contains the translation of the following modules: +# * html_text +# +# Translators: +# Ahmet Altinisik , 2016 +msgid "" +msgstr "" +"Project-Id-Version: Odoo Server 9.0c\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2016-12-29 03:40+0000\n" +"PO-Revision-Date: 2016-12-29 03:40+0000\n" +"Last-Translator: Ahmet Altinisik , 2016\n" +"Language-Team: Turkish (https://www.transifex.com/oca/teams/23907/tr/)\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: \n" +"Language: tr\n" +"Plural-Forms: nplurals=2; plural=(n > 1);\n" + +#. module: html_text +#: model:ir.model,name:html_text.model_ir_fields_converter +msgid "ir.fields.converter" +msgstr "ir.fields.converter" From 6bead1323e261e45f7378f59956b7f0b41320597 Mon Sep 17 00:00:00 2001 From: Jairo Llopis Date: Thu, 22 Jun 2017 13:08:16 +0200 Subject: [PATCH 5/5] [MIG][10.0][html_text] Migrate --- html_text/README.rst | 2 +- html_text/__init__.py | 2 -- html_text/{__openerp__.py => __manifest__.py} | 4 ++-- html_text/models/__init__.py | 2 -- html_text/models/ir_fields_converter.py | 6 +++--- html_text/tests/__init__.py | 1 - html_text/tests/test_extractor.py | 10 +++++++--- 7 files changed, 13 insertions(+), 14 deletions(-) rename html_text/{__openerp__.py => __manifest__.py} (86%) diff --git a/html_text/README.rst b/html_text/README.rst index 52c7755be..59ab65597 100644 --- a/html_text/README.rst +++ b/html_text/README.rst @@ -38,7 +38,7 @@ QWeb example:: .. image:: https://odoo-community.org/website/image/ir.attachment/5784_f2813bd/datas :alt: Try me on Runbot - :target: https://runbot.odoo-community.org/runbot/149/9.0 + :target: https://runbot.odoo-community.org/runbot/149/10.0 Known issues / Roadmap ====================== diff --git a/html_text/__init__.py b/html_text/__init__.py index 197214cdd..a77a6fcbc 100644 --- a/html_text/__init__.py +++ b/html_text/__init__.py @@ -1,6 +1,4 @@ # -*- coding: utf-8 -*- -# Copyright 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis -# Copyright 2016 Tecnativa - Vicent Cubells # License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). from . import models diff --git a/html_text/__openerp__.py b/html_text/__manifest__.py similarity index 86% rename from html_text/__openerp__.py rename to html_text/__manifest__.py index 173d737a0..af0df87b6 100644 --- a/html_text/__openerp__.py +++ b/html_text/__manifest__.py @@ -1,11 +1,11 @@ # -*- coding: utf-8 -*- -# Copyright 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis +# Copyright 2016-2017 Jairo Llopis # Copyright 2016 Tecnativa - Vicent Cubells # License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). { "name": "Text from HTML field", "summary": "Generate excerpts from any HTML field", - "version": "9.0.1.0.0", + "version": "10.0.1.0.0", "category": "Tools", "website": "https://tecnativa.com", "author": "Grupo ESOC Ingeniería de Servicios, " diff --git a/html_text/models/__init__.py b/html_text/models/__init__.py index 5746f8b6b..6fcbec0b6 100644 --- a/html_text/models/__init__.py +++ b/html_text/models/__init__.py @@ -1,6 +1,4 @@ # -*- coding: utf-8 -*- -# Copyright 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis -# Copyright 2016 Tecnativa - Vicent Cubells # License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). from . import ir_fields_converter diff --git a/html_text/models/ir_fields_converter.py b/html_text/models/ir_fields_converter.py index 5180665f0..1e84b1cad 100644 --- a/html_text/models/ir_fields_converter.py +++ b/html_text/models/ir_fields_converter.py @@ -1,16 +1,16 @@ # -*- coding: utf-8 -*- -# Copyright 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis +# Copyright 2016-2017 Jairo Llopis # Copyright 2016 Tecnativa - Vicent Cubells # License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). import logging from lxml import etree, html -from openerp import api, models +from odoo import api, models _logger = logging.getLogger(__name__) -class IrFieldsConverter(models.Model): +class IrFieldsConverter(models.AbstractModel): _inherit = "ir.fields.converter" @api.model diff --git a/html_text/tests/__init__.py b/html_text/tests/__init__.py index 60346a281..f8ca9e601 100644 --- a/html_text/tests/__init__.py +++ b/html_text/tests/__init__.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -# © 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis # License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). from . import test_extractor diff --git a/html_text/tests/test_extractor.py b/html_text/tests/test_extractor.py index 22443e32a..938361fd7 100644 --- a/html_text/tests/test_extractor.py +++ b/html_text/tests/test_extractor.py @@ -1,15 +1,16 @@ # -*- coding: utf-8 -*- -# © 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis +# Copyright 2016-2017 Jairo Llopis # License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). from lxml import etree -from openerp.tests.common import TransactionCase +from odoo.tools import mute_logger +from odoo.tests.common import TransactionCase +from ..models import ir_fields_converter class ExtractorCase(TransactionCase): def setUp(self): super(ExtractorCase, self).setUp() - # Shortcut self.text_from_html = self.env["ir.fields.converter"].text_from_html @@ -40,18 +41,21 @@ class ExtractorCase(TransactionCase): self.text_from_html(html, 7, ellipsis=""), u"I'm a title I'm a paragraph ¡Pues") + @mute_logger(ir_fields_converter.__name__) def test_empty_html(self): """Empty HTML handled correctly.""" self.assertEqual(self.text_from_html(""), "") with self.assertRaises(etree.XMLSyntaxError): self.text_from_html("", fail=True) + @mute_logger(ir_fields_converter.__name__) def test_false_html(self): """``False`` HTML handled correctly.""" self.assertEqual(self.text_from_html(False), "") with self.assertRaises(TypeError): self.text_from_html(False, fail=True) + @mute_logger(ir_fields_converter.__name__) def test_bad_html(self): """Bad HTML handled correctly.""" self.assertEqual(self.text_from_html("<"), "")