bank-statement-import/bank_statement_parse_mt940/mt940.py


								#!/usr/bin/env python2

								# -*- coding: utf-8 -*-

								"""Generic parser for MT940 files, base for customized versions per bank."""

								##############################################################################

								#

								#    OpenERP, Open Source Management Solution

								#    This module copyright (C) 2014 Therp BV (<http://therp.nl>).

								#

								#    This program is free software: you can redistribute it and/or modify

								#    it under the terms of the GNU Affero General Public License as

								#    published by the Free Software Foundation, either version 3 of the

								#    License, or (at your option) any later version.

								#

								#    This program is distributed in the hope that it will be useful,

								#    but WITHOUT ANY WARRANTY; without even the implied warranty of

								#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

								#    GNU Affero General Public License for more details.

								#

								#    You should have received a copy of the GNU Affero General Public License

								#    along with this program.  If not, see <http://www.gnu.org/licenses/>.

								#

								##############################################################################

								import re

								import logging

								from datetime import datetime


								from openerp.addons.bank_statement_parse import parserlib


								def str2amount(sign, amount_str):

								    """Convert sign (C or D) and amount in string to signed amount (float)."""

								    factor = (1 if sign == 'C' else -1)

								    return factor * float(amount_str.replace(',', '.'))


								def get_subfields(data, codewords):

								    """Return dictionary with value array for each codeword in data.


								    For instance:

								    data =

								        /BENM//NAME/Kosten/REMI/Periode 01-10-2013 t/m 31-12-2013/ISDT/20

								    codewords = ['BENM', 'ADDR', 'NAME', 'CNTP', ISDT', 'REMI']

								    Then return subfields = {

								        'BENM': [],

								        'NAME': ['Kosten'],

								        'REMI': ['Periode 01-10-2013 t', 'm 31-12-2013'],

								        'ISDT': ['20'],

								    }

								    """

								    subfields = {}

								    current_codeword = None

								    for word in data.split('/'):

								        if not word and not current_codeword:

								            continue

								        if word in codewords:

								            current_codeword = word

								            subfields[current_codeword] = []

								            continue

								        if current_codeword in subfields:

								            subfields[current_codeword].append(word)

								    return subfields


								def get_counterpart(transaction, subfield):

								    """Get counterpart from transaction.


								    Counterpart is often stored in subfield of tag 86. The subfield

								    can be BENM, ORDP, CNTP"""

								    if not subfield:

								        return  # subfield is empty

								    if len(subfield) >= 1 and subfield[0]:

								        transaction.remote_account = subfield[0]

								    if len(subfield) >= 2 and subfield[1]:

								        transaction.remote_bank_bic = subfield[1]

								    if len(subfield) >= 3 and subfield[2]:

								        transaction.remote_owner = subfield[2]

								    if len(subfield) >= 4 and subfield[3]:

								        transaction.remote_owner_city = subfield[3]


								def handle_common_subfields(transaction, subfields):

								    """Deal with common functionality for tag 86 subfields."""

								    # Get counterpart from CNTP, BENM or ORDP subfields:

								    for counterpart_field in ['CNTP', 'BENM', 'ORDP']:

								        if counterpart_field in subfields:

								            get_counterpart(transaction, subfields[counterpart_field])

								    # REMI: Remitter information (text entered by other party on trans.):

								    if 'REMI' in subfields:

								        transaction.message = (

								            '/'.join(x for x in subfields['REMI'] if x))

								    # Get transaction reference subfield (might vary):

								    if transaction.eref in subfields:

								        transaction.eref = ''.join(

								            subfields[transaction.eref])


								class MT940(object):

								    """Inherit this class in your account_banking.parsers.models.parser,

								    define functions to handle the tags you need to handle and adjust static

								    variables as needed.


								    At least, you should override handle_tag_61 and handle_tag_86. Don't forget

								    to call super.

								    handle_tag_* functions receive the remainder of the the line (that is,

								    without ':XX:') and are supposed to write into self.current_transaction"""


								    header_lines = 3

								    """One file can contain multiple statements, each with its own poorly

								    documented header. For now, the best thing to do seems to skip that"""


								    header_regex = '^{1:[0-9A-Z]{25,25}}'

								    'The file is considered a valid MT940 file when it contains this line'


								    footer_regex = '^-XXX$'

								    'The line that denotes end of message, we need to create a new statement'


								    tag_regex = '^:[0-9]{2}[A-Z]*:'

								    'The beginning of a record, should be anchored to beginning of the line'


								    def __init__(self):

								        self.current_statement = None

								        self.current_transaction = None

								        self.statements = []


								    def create_transaction(self):

								        """Create and return BankTransaction object."""

								        transaction = parserlib.BankTransaction()

								        return transaction


								    def is_mt940(self, line):

								        """determine if a line is the header of a statement"""

								        if not bool(re.match(self.header_regex, line)):

								            raise ValueError(

								                'This does not seem to be a MT940 format bank statement.')


								    def parse(self, data):

								        """Parse mt940 bank statement file contents."""

								        self.is_mt940(data)

								        iterator = data.replace('\r\n', '\n').split('\n').__iter__()

								        line = None

								        record_line = ''

								        try:

								            while True:

								                if not self.current_statement:

								                    self.handle_header(line, iterator)

								                line = iterator.next()

								                if not self.is_tag(line) and not self.is_footer(line):

								                    record_line = self.append_continuation_line(

								                        record_line, line)

								                    continue

								                if record_line:

								                    self.handle_record(record_line)

								                if self.is_footer(line):

								                    self.handle_footer(line, iterator)

								                    record_line = ''

								                    continue

								                record_line = line

								        except StopIteration:

								            pass

								        if self.current_statement:

								            if record_line:

								                self.handle_record(record_line)

								                record_line = ''

								            self.statements.append(self.current_statement)

								            self.current_statement = None

								        return self.statements


								    def append_continuation_line(self, line, continuation_line):

								        """append a continuation line for a multiline record.

								        Override and do data cleanups as necessary."""

								        return line + continuation_line


								    def create_statement(self):

								        """create a BankStatement."""

								        return parserlib.BankStatement()


								    def is_footer(self, line):

								        """determine if a line is the footer of a statement"""

								        return line and bool(re.match(self.footer_regex, line))


								    def is_tag(self, line):

								        """determine if a line has a tag"""

								        return line and bool(re.match(self.tag_regex, line))


								    def handle_header(self, line, iterator):

								        """skip header lines, create current statement"""

								        for dummy_i in range(self.header_lines):

								            iterator.next()

								        self.current_statement = self.create_statement()


								    def handle_footer(self, line, iterator):

								        """add current statement to list, reset state"""

								        self.statements.append(self.current_statement)

								        self.current_statement = None


								    def handle_record(self, line):

								        """find a function to handle the record represented by line"""

								        tag_match = re.match(self.tag_regex, line)

								        tag = tag_match.group(0).strip(':')

								        if not hasattr(self, 'handle_tag_%s' % tag):

								            logging.error('Unknown tag %s', tag)

								            logging.error(line)

								            return

								        handler = getattr(self, 'handle_tag_%s' % tag)

								        handler(line[tag_match.end():])


								    def handle_tag_20(self, data):

								        """Contains unique ? message ID"""

								        pass


								    def handle_tag_25(self, data):

								        """Handle tag 25: local bank account information."""

								        data = data.replace('EUR', '').replace('.', '').strip()

								        self.current_statement.local_account = data


								    def handle_tag_28C(self, data):

								        """get sequence number _within_this_batch_ - this alone

								        doesn't provide a unique id!"""

								        self.current_statement.statement_id = data


								    def handle_tag_60F(self, data):

								        """get start balance and currency"""

								        self.current_statement.local_currency = data[7:10]

								        self.current_statement.date = datetime.strptime(data[1:7], '%y%m%d')

								        self.current_statement.start_balance = str2amount(data[0], data[10:])

								        self.current_statement.statement_id = '%s/%s' % (

								            self.current_statement.date.strftime('%Y-%m-%d'),

								            self.current_statement.statement_id,

								        )


								    def handle_tag_62F(self, data):

								        """get ending balance"""

								        self.current_statement.end_balance = str2amount(data[0], data[10:])


								    def handle_tag_64(self, data):

								        """get current balance in currency"""

								        pass


								    def handle_tag_65(self, data):

								        """get future balance in currency"""

								        pass


								    def handle_tag_61(self, data):

								        """get transaction values"""

								        transaction = self.create_transaction()

								        self.current_statement.transactions.append(transaction)

								        self.current_transaction = transaction

								        transaction.execution_date = datetime.strptime(data[:6], '%y%m%d')

								        transaction.value_date = datetime.strptime(data[:6], '%y%m%d')

								        #  ...and the rest already is highly bank dependent


								    def handle_tag_86(self, data):

								        """details for previous transaction, here most differences between

								        banks occur"""

								        pass


								# vim:expandtab:smartindent:tabstop=4:softtabstop=4:shiftwidth=4: