[IMP] account-export-csv - use generator with yield to avoid filling the worker memory with enormous lists of rows

11 years ago · 3ab81013cc
1 changed files with 36 additions and 28 deletions
--- a/account_export_csv/wizard/account_export_csv.py
+++ b/account_export_csv/wizard/account_export_csv.py
@ -19,6 +19,7 @@
 #
 ##############################################################################
 import itertools
 import time
 import tempfile
 import StringIO
@ -46,16 +47,14 @@ class AccountUnicodeWriter(object):
        self.stream = f
        self.encoder = codecs.getincrementalencoder(encoding)()
    def writerow(self, row, base64_compress=False):
    def writerow(self, row):
        #we ensure that we do not try to encode none or bool
        row = [x or u'' for x in row]
        row = (x or u'' for x in row)
        encoded_row = []
        for c in row:
            if type(c) == unicode:
            if isinstance(c, unicode):
                val = c.encode("utf-8")
                if base64_compress:
                    val = base64.encodestring(val)
            else:
                val = c
@ -72,9 +71,9 @@ class AccountUnicodeWriter(object):
        # empty queue
        self.queue.truncate(0)
    def writerows(self, rows, base64_compress=False):
    def writerows(self, rows):
        for row in rows:
            self.writerow(row, base64_compress=base64_compress)
            self.writerow(row)
 class AccountCSVExport(orm.TransientModel):
    _name = 'account.csv.export'
@ -106,7 +105,7 @@ class AccountCSVExport(orm.TransientModel):
    def action_manual_export_account(self, cr, uid, ids, context=None):
        this = self.browse(cr, uid, ids)[0]
        rows = self.get_data(cr, uid, ids,"account", context)
        rows = self.get_data(cr, uid, ids, "account", context)
        file_data = StringIO.StringIO()
        try:
            writer = AccountUnicodeWriter(file_data)
@ -230,12 +229,19 @@ class AccountCSVExport(orm.TransientModel):
        """
        Here we use TemporaryFile to avoid full filling the OpenERP worker Memory
        We also write the data to the wizard with SQL query as write seams to use
        too much memory as well
        too much memory as well.
        Thos improvment permitted to improve the export from a 100k line to 200k lines
        with default `limit_memory_hard = 805306368` (768MB)
        Those improvements permitted to improve the export from a 100k line to 200k lines
        with default `limit_memory_hard = 805306368` (768MB) with more lines,
        you might encounter a MemoryError when trying to download the file even
        if it has been generated.
        To be able to export bigger volume of data, it is advised to set
        limit_memory_hard to  (2 GB) to generate the file and let
        OpenERP load it in the wizard when trying to download it.
        Tested with up to a generation of 700k entry lines
        """
        #XXX check why it still fail with more than 200k line and when
        this = self.browse(cr, uid, ids)[0]
        rows = self.get_data(cr, uid, ids, "journal_entries", context)
        with tempfile.TemporaryFile() as file_data:
@ -297,7 +303,7 @@ class AccountCSVExport(orm.TransientModel):
            company_id,
            context=None):
        """
        Return list to generate rows of the CSV file
        Create a generator of rows of the CSV file
        """
        cr.execute("""
        SELECT
@ -344,12 +350,15 @@ class AccountCSVExport(orm.TransientModel):
        """,
        {'period_ids': tuple(period_range_ids), 'journal_ids': tuple(journal_ids)}
        )
        res = cr.fetchall()
        rows = []
        for line in res:
            rows.append(list(line))
        return rows
        while 1:
            # http://initd.org/psycopg/docs/cursor.html#cursor.fetchmany
            # Set cursor.arraysize to minimize network round trips
            cr.arraysize=100
            rows = cr.fetchmany()
            if not rows:
                break
            for row in rows:
                yield row
    def get_data(self, cr, uid, ids,result_type,context=None):
        get_header_func = getattr(self,("_get_header_%s"%(result_type)), None)
@ -370,13 +379,12 @@ class AccountCSVExport(orm.TransientModel):
        else:
            j_obj = self.pool.get("account.journal")
            journal_ids = j_obj.search(cr, uid, [], context=context)
        rows = []
        rows.append(get_header_func(cr, uid, ids, context=context))
        rows.extend(get_rows_func(
            cr, uid, ids,
            fiscalyear_id,
            period_range_ids,
            journal_ids,
            company_id,
            context=context))
        rows = itertools.chain((get_header_func(cr, uid, ids, context=context),),
                               get_rows_func(cr, uid, ids,
                                             fiscalyear_id,
                                             period_range_ids,
                                             journal_ids,
                                             company_id,
                                             context=context)
                               )
        return rows