From 3ab81013cc2d3c37e6b2f38f91143268a0632941 Mon Sep 17 00:00:00 2001
From: Yannick Vaucher <yannick.vaucher@camptocamp.com>
Date: Fri, 13 Sep 2013 11:01:19 +0200
Subject: [PATCH] [IMP] account-export-csv - use generator with yield to avoid
 filling the worker memory with enormous lists of rows

---
 .../wizard/account_export_csv.py              | 64 +++++++++++--------
 1 file changed, 36 insertions(+), 28 deletions(-)

diff --git a/account_export_csv/wizard/account_export_csv.py b/account_export_csv/wizard/account_export_csv.py
index 6f952e93..0db9f679 100644
--- a/account_export_csv/wizard/account_export_csv.py
+++ b/account_export_csv/wizard/account_export_csv.py
@@ -19,6 +19,7 @@
 #
 ##############################################################################
 
+import itertools
 import time
 import tempfile
 import StringIO
@@ -46,16 +47,14 @@ class AccountUnicodeWriter(object):
         self.stream = f
         self.encoder = codecs.getincrementalencoder(encoding)()
 
-    def writerow(self, row, base64_compress=False):
+    def writerow(self, row):
         #we ensure that we do not try to encode none or bool
-        row = [x or u'' for x in row]
+        row = (x or u'' for x in row)
 
         encoded_row = []
         for c in row:
-            if type(c) == unicode:
+            if isinstance(c, unicode):
                 val = c.encode("utf-8")
-                if base64_compress:
-                    val = base64.encodestring(val)
             else:
                 val = c
 
@@ -72,9 +71,9 @@ class AccountUnicodeWriter(object):
         # empty queue
         self.queue.truncate(0)
 
-    def writerows(self, rows, base64_compress=False):
+    def writerows(self, rows):
         for row in rows:
-            self.writerow(row, base64_compress=base64_compress)
+            self.writerow(row)
 
 class AccountCSVExport(orm.TransientModel):
     _name = 'account.csv.export'
@@ -106,7 +105,7 @@ class AccountCSVExport(orm.TransientModel):
 
     def action_manual_export_account(self, cr, uid, ids, context=None):
         this = self.browse(cr, uid, ids)[0]
-        rows = self.get_data(cr, uid, ids,"account", context)
+        rows = self.get_data(cr, uid, ids, "account", context)
         file_data = StringIO.StringIO()
         try:
             writer = AccountUnicodeWriter(file_data)
@@ -230,12 +229,19 @@ class AccountCSVExport(orm.TransientModel):
         """
         Here we use TemporaryFile to avoid full filling the OpenERP worker Memory
         We also write the data to the wizard with SQL query as write seams to use
-        too much memory as well
+        too much memory as well.
 
-        Thos improvment permitted to improve the export from a 100k line to 200k lines
-        with default `limit_memory_hard = 805306368` (768MB)
+        Those improvements permitted to improve the export from a 100k line to 200k lines
+        with default `limit_memory_hard = 805306368` (768MB) with more lines,
+        you might encounter a MemoryError when trying to download the file even
+        if it has been generated.
+
+        To be able to export bigger volume of data, it is advised to set
+        limit_memory_hard to  (2 GB) to generate the file and let
+        OpenERP load it in the wizard when trying to download it.
+
+        Tested with up to a generation of 700k entry lines
         """
-        #XXX check why it still fail with more than 200k line and when
         this = self.browse(cr, uid, ids)[0]
         rows = self.get_data(cr, uid, ids, "journal_entries", context)
         with tempfile.TemporaryFile() as file_data:
@@ -297,7 +303,7 @@ class AccountCSVExport(orm.TransientModel):
             company_id,
             context=None):
         """
-        Return list to generate rows of the CSV file
+        Create a generator of rows of the CSV file
         """
         cr.execute("""
         SELECT
@@ -344,12 +350,15 @@ class AccountCSVExport(orm.TransientModel):
         """,
         {'period_ids': tuple(period_range_ids), 'journal_ids': tuple(journal_ids)}
         )
-        res = cr.fetchall()
-        rows = []
-        for line in res:
-            rows.append(list(line))
-        return rows
-
+        while 1:
+            # http://initd.org/psycopg/docs/cursor.html#cursor.fetchmany
+            # Set cursor.arraysize to minimize network round trips
+            cr.arraysize=100
+            rows = cr.fetchmany()
+            if not rows:
+                break
+            for row in rows:
+                yield row
 
     def get_data(self, cr, uid, ids,result_type,context=None):
         get_header_func = getattr(self,("_get_header_%s"%(result_type)), None)
@@ -370,13 +379,12 @@ class AccountCSVExport(orm.TransientModel):
         else:
             j_obj = self.pool.get("account.journal")
             journal_ids = j_obj.search(cr, uid, [], context=context)
-        rows = []
-        rows.append(get_header_func(cr, uid, ids, context=context))
-        rows.extend(get_rows_func(
-            cr, uid, ids,
-            fiscalyear_id,
-            period_range_ids,
-            journal_ids,
-            company_id,
-            context=context))
+        rows = itertools.chain((get_header_func(cr, uid, ids, context=context),),
+                               get_rows_func(cr, uid, ids,
+                                             fiscalyear_id,
+                                             period_range_ids,
+                                             journal_ids,
+                                             company_id,
+                                             context=context)
+                               )
         return rows