From cec41e8c007fdc4b5050889d4c255b01c668eacd Mon Sep 17 00:00:00 2001 From: cubells Date: Tue, 29 Nov 2016 21:57:11 +0100 Subject: [PATCH] [9.0] [MIG] html_image_url_extractor module (#624) * Image extractor from HTML fields. (#354) * [8.0][html_image_url_extractor] Image extractor from HTML fields. This technical utility allows the developer to get a list of image URLs from any piece of HTML. You can use it for example, to get the cover image from a blog post (upcoming module), or to create a slider with all images from it. * [9.0] [MIG] html_image_url_extractor * Updated README.rst --- html_image_url_extractor/README.rst | 78 ++++++++++++++++++ html_image_url_extractor/__init__.py | 6 ++ html_image_url_extractor/__openerp__.py | 24 ++++++ html_image_url_extractor/models/__init__.py | 6 ++ .../models/ir_fields_converter.py | 72 ++++++++++++++++ .../static/description/icon.png | Bin 0 -> 9455 bytes html_image_url_extractor/tests/__init__.py | 5 ++ .../tests/test_extractor.py | 69 ++++++++++++++++ 8 files changed, 260 insertions(+) create mode 100644 html_image_url_extractor/README.rst create mode 100644 html_image_url_extractor/__init__.py create mode 100644 html_image_url_extractor/__openerp__.py create mode 100644 html_image_url_extractor/models/__init__.py create mode 100644 html_image_url_extractor/models/ir_fields_converter.py create mode 100644 html_image_url_extractor/static/description/icon.png create mode 100644 html_image_url_extractor/tests/__init__.py create mode 100644 html_image_url_extractor/tests/test_extractor.py diff --git a/html_image_url_extractor/README.rst b/html_image_url_extractor/README.rst new file mode 100644 index 000000000..9b64201ec --- /dev/null +++ b/html_image_url_extractor/README.rst @@ -0,0 +1,78 @@ +.. image:: https://img.shields.io/badge/licence-AGPL--3-blue.svg + :target: http://www.gnu.org/licenses/agpl-3.0-standalone.html + :alt: License: AGPL-3 + +========================== +Image URLs from HTML field +========================== + +This module includes a method that extracts image URLs from any chunk of HTML, +in appearing order. + +Usage +===== + +This module just adds a technical utility, but nothing for the end user. + +If you are a developer and need this utility for your module, see these +examples and read the docs inside the code. + +Python example:: + + @api.multi + def some_method(self): + # Get images from an HTML field + imgs = self.env["ir.fields.converter"].imgs_from_html(self.html_field) + for url in imgs: + # Do stuff with those URLs + pass + +QWeb example:: + + + + + + +.. image:: https://odoo-community.org/website/image/ir.attachment/5784_f2813bd/datas + :alt: Try me on Runbot + :target: https://runbot.odoo-community.org/runbot/149/9.0 + +Known issues / Roadmap +====================== + +* The regexp to find the URL could be better. + +Bug Tracker +=========== + +Bugs are tracked on `GitHub Issues +`_. In case of trouble, please +check there if your issue has already been reported. If you spotted it first, +help us smashing it by providing a detailed and welcomed feedback. + +Credits +======= + +Contributors +------------ + +* Jairo Llopis +* Vicent Cubells + +Maintainer +---------- + +.. image:: https://odoo-community.org/logo.png + :alt: Odoo Community Association + :target: https://odoo-community.org + +This module is maintained by the OCA. + +OCA, or the Odoo Community Association, is a nonprofit organization whose +mission is to support the collaborative development of Odoo features and +promote its widespread use. + +To contribute to this module, please visit https://odoo-community.org. diff --git a/html_image_url_extractor/__init__.py b/html_image_url_extractor/__init__.py new file mode 100644 index 000000000..197214cdd --- /dev/null +++ b/html_image_url_extractor/__init__.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis +# Copyright 2016 Tecnativa - Vicent Cubells +# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). + +from . import models diff --git a/html_image_url_extractor/__openerp__.py b/html_image_url_extractor/__openerp__.py new file mode 100644 index 000000000..27c5a6a92 --- /dev/null +++ b/html_image_url_extractor/__openerp__.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis +# Copyright 2016 Tecnativa - Vicent Cubells +# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). +{ + "name": "Image URLs from HTML field", + "summary": "Extract images found in any HTML field", + "version": "9.0.1.0.0", + "category": "Tools", + "website": "https://tecnativa.com", + "author": "Tecnativa, " + "Odoo Community Association (OCA)", + "license": "AGPL-3", + "application": False, + "installable": True, + "external_dependencies": { + "python": [ + "lxml.html", + ], + }, + "depends": [ + "base", + ], +} diff --git a/html_image_url_extractor/models/__init__.py b/html_image_url_extractor/models/__init__.py new file mode 100644 index 000000000..5746f8b6b --- /dev/null +++ b/html_image_url_extractor/models/__init__.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis +# Copyright 2016 Tecnativa - Vicent Cubells +# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). + +from . import ir_fields_converter diff --git a/html_image_url_extractor/models/ir_fields_converter.py b/html_image_url_extractor/models/ir_fields_converter.py new file mode 100644 index 000000000..bbb140aaa --- /dev/null +++ b/html_image_url_extractor/models/ir_fields_converter.py @@ -0,0 +1,72 @@ +# -*- coding: utf-8 -*- +# Copyright 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis +# Copyright 2016 Tecnativa - Vicent Cubells +# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). + +import re +import logging +from lxml import etree, html +from openerp import api, models + +_logger = logging.getLogger(__name__) + + +class IrFieldsConverter(models.Model): + _inherit = "ir.fields.converter" + + @api.model + def imgs_from_html(self, html_content, limit=None, fail=False): + """Extract all images in order from an HTML field in a generator. + + :param str html_content: + HTML contents from where to extract the images. + + :param int limit: + Only get up to this number of images. + + :param bool fail: + If ``True``, exceptions will be raised. + """ + # Parse HTML + try: + doc = html.fromstring(html_content) + except (TypeError, etree.XMLSyntaxError, etree.ParserError): + if fail: + raise + else: + _logger.exception("Failure parsing this HTML:\n%s", + html_content) + return + + # Required tools + query = """ + //img[@src] | + //*[contains(translate(@style, "BACKGROUND", "background"), + 'background')] + [contains(translate(@style, "URL", "url"), 'url(')] + """ + rgx = r""" + url\(\s* # Start function + (?P[^)]*) # URL string + \s*\) # End function + """ + rgx = re.compile(rgx, re.IGNORECASE | re.VERBOSE) + + # Loop through possible image URLs + for lap, element in enumerate(doc.xpath(query)): + if limit and lap >= limit: + break + if element.tag == "img": + yield element.attrib["src"] + else: + for rule in element.attrib["style"].split(";"): + # Extract background image + parts = rule.split(":", 1) + try: + if parts[0].strip().lower() in {"background", + "background-image"}: + yield (rgx.search(parts[1]) + .group("url").strip("\"'")) + # Malformed CSS or no match for URL + except (IndexError, AttributeError): + pass diff --git a/html_image_url_extractor/static/description/icon.png b/html_image_url_extractor/static/description/icon.png new file mode 100644 index 0000000000000000000000000000000000000000..3a0328b516c4980e8e44cdb63fd945757ddd132d GIT binary patch literal 9455 zcmW++2RxMjAAjx~&dlBk9S+%}OXg)AGE&Cb*&}d0jUxM@u(PQx^-s)697TX`ehR4?GS^qbkof1cslKgkU)h65qZ9Oc=ml_0temigYLJfnz{IDzUf>bGs4N!v3=Z3jMq&A#7%rM5eQ#dc?k~! zVpnB`o+K7|Al`Q_U;eD$B zfJtP*jH`siUq~{KE)`jP2|#TUEFGRryE2`i0**z#*^6~AI|YzIWy$Cu#CSLW3q=GA z6`?GZymC;dCPk~rBS%eCb`5OLr;RUZ;D`}um=H)BfVIq%7VhiMr)_#G0N#zrNH|__ zc+blN2UAB0=617@>_u;MPHN;P;N#YoE=)R#i$k_`UAA>WWCcEVMh~L_ zj--gtp&|K1#58Yz*AHCTMziU1Jzt_jG0I@qAOHsk$2}yTmVkBp_eHuY$A9)>P6o~I z%aQ?!(GqeQ-Y+b0I(m9pwgi(IIZZzsbMv+9w{PFtd_<_(LA~0H(xz{=FhLB@(1&qHA5EJw1>>=%q2f&^X>IQ{!GJ4e9U z&KlB)z(84HmNgm2hg2C0>WM{E(DdPr+EeU_N@57;PC2&DmGFW_9kP&%?X4}+xWi)( z;)z%wI5>D4a*5XwD)P--sPkoY(a~WBw;E~AW`Yue4kFa^LM3X`8x|}ZUeMnqr}>kH zG%WWW>3ml$Yez?i%)2pbKPI7?5o?hydokgQyZsNEr{a|mLdt;X2TX(#B1j35xPnPW z*bMSSOauW>o;*=kO8ojw91VX!qoOQb)zHJ!odWB}d+*K?#sY_jqPdg{Sm2HdYzdEx zOGVPhVRTGPtv0o}RfVP;Nd(|CB)I;*t&QO8h zFfekr30S!-LHmV_Su-W+rEwYXJ^;6&3|L$mMC8*bQptyOo9;>Qb9Q9`ySe3%V$A*9 zeKEe+b0{#KWGp$F+tga)0RtI)nhMa-K@JS}2krK~n8vJ=Ngm?R!9G<~RyuU0d?nz# z-5EK$o(!F?hmX*2Yt6+coY`6jGbb7tF#6nHA zuKk=GGJ;ZwON1iAfG$E#Y7MnZVmrY|j0eVI(DN_MNFJmyZ|;w4tf@=CCDZ#5N_0K= z$;R~bbk?}TpfDjfB&aiQ$VA}s?P}xPERJG{kxk5~R`iRS(SK5d+Xs9swCozZISbnS zk!)I0>t=A<-^z(cmSFz3=jZ23u13X><0b)P)^1T_))Kr`e!-pb#q&J*Q`p+B6la%C zuVl&0duN<;uOsB3%T9Fp8t{ED108<+W(nOZd?gDnfNBC3>M8WE61$So|P zVvqH0SNtDTcsUdzaMDpT=Ty0pDHHNL@Z0w$Y`XO z2M-_r1S+GaH%pz#Uy0*w$Vdl=X=rQXEzO}d6J^R6zjM1u&c9vYLvLp?W7w(?np9x1 zE_0JSAJCPB%i7p*Wvg)pn5T`8k3-uR?*NT|J`eS#_#54p>!p(mLDvmc-3o0mX*mp_ zN*AeS<>#^-{S%W<*mz^!X$w_2dHWpcJ6^j64qFBft-o}o_Vx80o0>}Du;>kLts;$8 zC`7q$QI(dKYG`Wa8#wl@V4jVWBRGQ@1dr-hstpQL)Tl+aqVpGpbSfN>5i&QMXfiZ> zaA?T1VGe?rpQ@;+pkrVdd{klI&jVS@I5_iz!=UMpTsa~mBga?1r}aRBm1WS;TT*s0f0lY=JBl66Upy)-k4J}lh=P^8(SXk~0xW=T9v*B|gzIhN z>qsO7dFd~mgxAy4V?&)=5ieYq?zi?ZEoj)&2o)RLy=@hbCRcfT5jigwtQGE{L*8<@Yd{zg;CsL5mvzfDY}P-wos_6PfprFVaeqNE%h zKZhLtcQld;ZD+>=nqN~>GvROfueSzJD&BE*}XfU|H&(FssBqY=hPCt`d zH?@s2>I(|;fcW&YM6#V#!kUIP8$Nkdh0A(bEVj``-AAyYgwY~jB zT|I7Bf@%;7aL7Wf4dZ%VqF$eiaC38OV6oy3Z#TER2G+fOCd9Iaoy6aLYbPTN{XRPz z;U!V|vBf%H!}52L2gH_+j;`bTcQRXB+y9onc^wLm5wi3-Be}U>k_u>2Eg$=k!(l@I zcCg+flakT2Nej3i0yn+g+}%NYb?ta;R?(g5SnwsQ49U8Wng8d|{B+lyRcEDvR3+`O{zfmrmvFrL6acVP%yG98X zo&+VBg@px@i)%o?dG(`T;n*$S5*rnyiR#=wW}}GsAcfyQpE|>a{=$Hjg=-*_K;UtD z#z-)AXwSRY?OPefw^iI+ z)AXz#PfEjlwTes|_{sB?4(O@fg0AJ^g8gP}ex9Ucf*@_^J(s_5jJV}c)s$`Myn|Kd z$6>}#q^n{4vN@+Os$m7KV+`}c%4)4pv@06af4-x5#wj!KKb%caK{A&Y#Rfs z-po?Dcb1({W=6FKIUirH&(yg=*6aLCekcKwyfK^JN5{wcA3nhO(o}SK#!CINhI`-I z1)6&n7O&ZmyFMuNwvEic#IiOAwNkR=u5it{B9n2sAJV5pNhar=j5`*N!Na;c7g!l$ z3aYBqUkqqTJ=Re-;)s!EOeij=7SQZ3Hq}ZRds%IM*PtM$wV z@;rlc*NRK7i3y5BETSKuumEN`Xu_8GP1Ri=OKQ$@I^ko8>H6)4rjiG5{VBM>B|%`&&s^)jS|-_95&yc=GqjNo{zFkw%%HHhS~e=s zD#sfS+-?*t|J!+ozP6KvtOl!R)@@-z24}`9{QaVLD^9VCSR2b`b!KC#o;Ki<+wXB6 zx3&O0LOWcg4&rv4QG0)4yb}7BFSEg~=IR5#ZRj8kg}dS7_V&^%#Do==#`u zpy6{ox?jWuR(;pg+f@mT>#HGWHAJRRDDDv~@(IDw&R>9643kK#HN`!1vBJHnC+RM&yIh8{gG2q zA%e*U3|N0XSRa~oX-3EAneep)@{h2vvd3Xvy$7og(sayr@95+e6~Xvi1tUqnIxoIH zVWo*OwYElb#uyW{Imam6f2rGbjR!Y3`#gPqkv57dB6K^wRGxc9B(t|aYDGS=m$&S!NmCtrMMaUg(c zc2qC=2Z`EEFMW-me5B)24AqF*bV5Dr-M5ig(l-WPS%CgaPzs6p_gnCIvTJ=Y<6!gT zVt@AfYCzjjsMEGi=rDQHo0yc;HqoRNnNFeWZgcm?f;cp(6CNylj36DoL(?TS7eU#+ z7&mfr#y))+CJOXQKUMZ7QIdS9@#-}7y2K1{8)cCt0~-X0O!O?Qx#E4Og+;A2SjalQ zs7r?qn0H044=sDN$SRG$arw~n=+T_DNdSrarmu)V6@|?1-ZB#hRn`uilTGPJ@fqEy zGt(f0B+^JDP&f=r{#Y_wi#AVDf-y!RIXU^0jXsFpf>=Ji*TeqSY!H~AMbJdCGLhC) zn7Rx+sXw6uYj;WRYrLd^5IZq@6JI1C^YkgnedZEYy<&4(z%Q$5yv#Boo{AH8n$a zhb4Y3PWdr269&?V%uI$xMcUrMzl=;w<_nm*qr=c3Rl@i5wWB;e-`t7D&c-mcQl7x! zZWB`UGcw=Y2=}~wzrfLx=uet<;m3~=8I~ZRuzvMQUQdr+yTV|ATf1Uuomr__nDf=X zZ3WYJtHp_ri(}SQAPjv+Y+0=fH4krOP@S&=zZ-t1jW1o@}z;xk8 z(Nz1co&El^HK^NrhVHa-_;&88vTU>_J33=%{if;BEY*J#1n59=07jrGQ#IP>@u#3A z;!q+E1Rj3ZJ+!4bq9F8PXJ@yMgZL;>&gYA0%_Kbi8?S=XGM~dnQZQ!yBSgcZhY96H zrWnU;k)qy`rX&&xlDyA%(a1Hhi5CWkmg(`Gb%m(HKi-7Z!LKGRP_B8@`7&hdDy5n= z`OIxqxiVfX@OX1p(mQu>0Ai*v_cTMiw4qRt3~NBvr9oBy0)r>w3p~V0SCm=An6@3n)>@z!|o-$HvDK z|3D2ZMJkLE5loMKl6R^ez@Zz%S$&mbeoqH5`Bb){Ei21q&VP)hWS2tjShfFtGE+$z zzCR$P#uktu+#!w)cX!lWN1XU%K-r=s{|j?)Akf@q#3b#{6cZCuJ~gCxuMXRmI$nGtnH+-h z+GEi!*X=AP<|fG`1>MBdTb?28JYc=fGvAi2I<$B(rs$;eoJCyR6_bc~p!XR@O-+sD z=eH`-ye})I5ic1eL~TDmtfJ|8`0VJ*Yr=hNCd)G1p2MMz4C3^Mj?7;!w|Ly%JqmuW zlIEW^Ft%z?*|fpXda>Jr^1noFZEwFgVV%|*XhH@acv8rdGxeEX{M$(vG{Zw+x(ei@ zmfXb22}8-?Fi`vo-YVrTH*C?a8%M=Hv9MqVH7H^J$KsD?>!SFZ;ZsvnHr_gn=7acz z#W?0eCdVhVMWN12VV^$>WlQ?f;P^{(&pYTops|btm6aj>_Uz+hqpGwB)vWp0Cf5y< zft8-je~nn?W11plq}N)4A{l8I7$!ks_x$PXW-2XaRFswX_BnF{R#6YIwMhAgd5F9X zGmwdadS6(a^fjHtXg8=l?Rc0Sm%hk6E9!5cLVloEy4eh(=FwgP`)~I^5~pBEWo+F6 zSf2ncyMurJN91#cJTy_u8Y}@%!bq1RkGC~-bV@SXRd4F{R-*V`bS+6;W5vZ(&+I<9$;-V|eNfLa5n-6% z2(}&uGRF;p92eS*sE*oR$@pexaqr*meB)VhmIg@h{uzkk$9~qh#cHhw#>O%)b@+(| z^IQgqzuj~Sk(J;swEM-3TrJAPCq9k^^^`q{IItKBRXYe}e0Tdr=Huf7da3$l4PdpwWDop%^}n;dD#K4s#DYA8SHZ z&1!riV4W4R7R#C))JH1~axJ)RYnM$$lIR%6fIVA@zV{XVyx}C+a-Dt8Y9M)^KU0+H zR4IUb2CJ{Hg>CuaXtD50jB(_Tcx=Z$^WYu2u5kubqmwp%drJ6 z?Fo40g!Qd<-l=TQxqHEOuPX0;^z7iX?Ke^a%XT<13TA^5`4Xcw6D@Ur&VT&CUe0d} z1GjOVF1^L@>O)l@?bD~$wzgf(nxX1OGD8fEV?TdJcZc2KoUe|oP1#=$$7ee|xbY)A zDZq+cuTpc(fFdj^=!;{k03C69lMQ(|>uhRfRu%+!k&YOi-3|1QKB z z?n?eq1XP>p-IM$Z^C;2L3itnbJZAip*Zo0aw2bs8@(s^~*8T9go!%dHcAz2lM;`yp zD=7&xjFV$S&5uDaiScyD?B-i1ze`+CoRtz`Wn+Zl&#s4&}MO{@N!ufrzjG$B79)Y2d3tBk&)TxUTw@QS0TEL_?njX|@vq?Uz(nBFK5Pq7*xj#u*R&i|?7+6# z+|r_n#SW&LXhtheZdah{ZVoqwyT{D>MC3nkFF#N)xLi{p7J1jXlmVeb;cP5?e(=f# zuT7fvjSbjS781v?7{)-X3*?>tq?)Yd)~|1{BDS(pqC zC}~H#WXlkUW*H5CDOo<)#x7%RY)A;ShGhI5s*#cRDA8YgqG(HeKDx+#(ZQ?386dv! zlXCO)w91~Vw4AmOcATuV653fa9R$fyK8ul%rG z-wfS zihugoZyr38Im?Zuh6@RcF~t1anQu7>#lPpb#}4cOA!EM11`%f*07RqOVkmX{p~KJ9 z^zP;K#|)$`^Rb{rnHGH{~>1(fawV0*Z#)}M`m8-?ZJV<+e}s9wE# z)l&az?w^5{)`S(%MRzxdNqrs1n*-=jS^_jqE*5XDrA0+VE`5^*p3CuM<&dZEeCjoz zR;uu_H9ZPZV|fQq`Cyw4nscrVwi!fE6ciMmX$!_hN7uF;jjKG)d2@aC4ropY)8etW=xJvni)8eHi`H$%#zn^WJ5NLc-rqk|u&&4Z6fD_m&JfSI1Bvb?b<*n&sfl0^t z=HnmRl`XrFvMKB%9}>PaA`m-fK6a0(8=qPkWS5bb4=v?XcWi&hRY?O5HdulRi4?fN zlsJ*N-0Qw+Yic@s0(2uy%F@ib;GjXt01Fmx5XbRo6+n|pP(&nodMoap^z{~q ziEeaUT@Mxe3vJSfI6?uLND(CNr=#^W<1b}jzW58bIfyWTDle$mmS(|x-0|2UlX+9k zQ^EX7Nw}?EzVoBfT(-LT|=9N@^hcn-_p&sqG z&*oVs2JSU+N4ZD`FhCAWaS;>|wH2G*Id|?pa#@>tyxX`+4HyIArWDvVrX)2WAOQff z0qyHu&-S@i^MS-+j--!pr4fPBj~_8({~e1bfcl0wI1kaoN>mJL6KUPQm5N7lB(ui1 zE-o%kq)&djzWJ}ob<-GfDlkB;F31j-VHKvQUGQ3sp`CwyGJk_i!y^sD0fqC@$9|jO zOqN!r!8-p==F@ZVP=U$qSpY(gQ0)59P1&t@y?5rvg<}E+GB}26NYPp4f2YFQrQtot5mn3wu_qprZ=>Ig-$ zbW26Ws~IgY>}^5w`vTB(G`PTZaDiGBo5o(tp)qli|NeV( z@H_=R8V39rt5J5YB2Ky?4eJJ#b`_iBe2ot~6%7mLt5t8Vwi^Jy7|jWXqa3amOIoRb zOr}WVFP--DsS`1WpN%~)t3R!arKF^Q$e12KEqU36AWwnCBICpH4XCsfnyrHr>$I$4 z!DpKX$OKLWarN7nv@!uIA+~RNO)l$$w}p(;b>mx8pwYvu;dD_unryX_NhT8*Tj>BTrTTL&!?O+%Rv;b?B??gSzdp?6Uug9{ zd@V08Z$BdI?fpoCS$)t4mg4rT8Q_I}h`0d-vYZ^|dOB*Q^S|xqTV*vIg?@fVFSmMpaw0qtTRbx} z({Pg?#{2`sc9)M5N$*N|4;^t$+QP?#mov zGVC@I*lBVrOU-%2y!7%)fAKjpEFsgQc4{amtiHb95KQEwvf<(3T<9-Zm$xIew#P22 zc2Ix|App^>v6(3L_MCU0d3W##AB0M~3D00EWoKZqsJYT(#@w$Y_H7G22M~ApVFTRHMI_3be)Lkn#0F*V8Pq zc}`Cjy$bE;FJ6H7p=0y#R>`}-m4(0F>%@P|?7fx{=R^uFdISRnZ2W_xQhD{YuR3t< z{6yxu=4~JkeA;|(J6_nv#>Nvs&FuLA&PW^he@t(UwFFE8)|a!R{`E`K`i^ZnyE4$k z;(749Ix|oi$c3QbEJ3b~D_kQsPz~fIUKym($a_7dJ?o+40*OLl^{=&oq$<#Q(yyrp z{J-FAniyAw9tPbe&IhQ|a`DqFTVQGQ&Gq3!C2==4x{6EJwiPZ8zub-iXoUtkJiG{} zPaR&}_fn8_z~(=;5lD-aPWD3z8PZS@AaUiomF!G8I}Mf>e~0g#BelA-5#`cj;O5>N Xviia!U7SGha1wx#SCgwmn*{w2TRX*I literal 0 HcmV?d00001 diff --git a/html_image_url_extractor/tests/__init__.py b/html_image_url_extractor/tests/__init__.py new file mode 100644 index 000000000..60346a281 --- /dev/null +++ b/html_image_url_extractor/tests/__init__.py @@ -0,0 +1,5 @@ +# -*- coding: utf-8 -*- +# © 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis +# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). + +from . import test_extractor diff --git a/html_image_url_extractor/tests/test_extractor.py b/html_image_url_extractor/tests/test_extractor.py new file mode 100644 index 000000000..c511aa5f8 --- /dev/null +++ b/html_image_url_extractor/tests/test_extractor.py @@ -0,0 +1,69 @@ +# -*- coding: utf-8 -*- +# © 2016 Grupo ESOC Ingeniería de Servicios, S.L.U. - Jairo Llopis +# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). + +from lxml import etree +from openerp.tests.common import TransactionCase + + +class ExtractorCase(TransactionCase): + def setUp(self): + super(ExtractorCase, self).setUp() + + # Shortcut + self.imgs_from_html = self.env["ir.fields.converter"].imgs_from_html + + def test_mixed_images_found(self): + """Images correctly found in elements and backgrounds.""" + content = u""" +
+ + +

+ + + + +

+
+

+ +

+
+
+
+ """ + + # Read all images + for n, url in enumerate(self.imgs_from_html(content)): + self.assertEqual("/path/%d" % n, url) + self.assertEqual(n, 7) + + # Read only first image + for n, url in enumerate(self.imgs_from_html(content, 1)): + self.assertEqual("/path/%d" % n, url) + self.assertEqual(n, 0) + + def test_empty_html(self): + """Empty HTML handled correctly.""" + for laps, text in self.imgs_from_html(""): + self.assertTrue(False) # You should never get here + + with self.assertRaises(etree.XMLSyntaxError): + list(self.imgs_from_html("", fail=True)) + + def test_false_html(self): + """``False`` HTML handled correctly.""" + for laps, text in self.imgs_from_html(False): + self.assertTrue(False) # You should never get here + + with self.assertRaises(TypeError): + list(self.imgs_from_html(False, fail=True)) + + def test_bad_html(self): + """Bad HTML handled correctly.""" + for laps, text in self.imgs_from_html("<"): + self.assertTrue(False) # You should never get here + + with self.assertRaises(etree.ParserError): + list(self.imgs_from_html("<", fail=True))