You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
268 lines
9.9 KiB
268 lines
9.9 KiB
# -*- coding: utf-8 -*-
|
|
"""Generic parser for MT940 files, base for customized versions per bank."""
|
|
# Copyright 2014-2018 Therp BV <https://therp.nl>.
|
|
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
|
import re
|
|
import logging
|
|
from datetime import datetime
|
|
|
|
from openerp.addons.account_bank_statement_import.parserlib import (
|
|
BankStatement)
|
|
|
|
|
|
def str2amount(sign, amount_str):
|
|
"""Convert sign (C or D) and amount in string to signed amount (float)."""
|
|
factor = (1 if sign == 'C' else -1)
|
|
return factor * float(amount_str.replace(',', '.'))
|
|
|
|
|
|
def get_subfields(data, codewords):
|
|
"""Return dictionary with value array for each codeword in data.
|
|
|
|
For instance:
|
|
data =
|
|
/BENM//NAME/Kosten/REMI/Periode 01-10-2013 t/m 31-12-2013/ISDT/20
|
|
codewords = ['BENM', 'ADDR', 'NAME', 'CNTP', ISDT', 'REMI']
|
|
Then return subfields = {
|
|
'BENM': [],
|
|
'NAME': ['Kosten'],
|
|
'REMI': ['Periode 01-10-2013 t', 'm 31-12-2013'],
|
|
'ISDT': ['20'],
|
|
}
|
|
"""
|
|
subfields = {}
|
|
current_codeword = None
|
|
for word in data.split('/'):
|
|
if not word and not current_codeword:
|
|
continue
|
|
if word in codewords:
|
|
current_codeword = word
|
|
subfields[current_codeword] = []
|
|
continue
|
|
if current_codeword in subfields:
|
|
subfields[current_codeword].append(word)
|
|
return subfields
|
|
|
|
|
|
def get_counterpart(transaction, subfield):
|
|
"""Get counterpart from transaction.
|
|
|
|
Counterpart is often stored in subfield of tag 86. The subfield
|
|
can be BENM, ORDP, CNTP"""
|
|
if not subfield:
|
|
return # subfield is empty
|
|
if len(subfield) >= 1 and subfield[0]:
|
|
transaction.remote_account = subfield[0]
|
|
if len(subfield) >= 2 and subfield[1]:
|
|
transaction.remote_bank_bic = subfield[1]
|
|
if len(subfield) >= 3 and subfield[2]:
|
|
transaction.remote_owner = subfield[2]
|
|
if len(subfield) >= 4 and subfield[3]:
|
|
transaction.remote_owner_city = subfield[3]
|
|
|
|
|
|
def handle_common_subfields(transaction, subfields):
|
|
"""Deal with common functionality for tag 86 subfields.
|
|
|
|
transaction.eref is filled from 61 record with information on subfield
|
|
that contains the actual reference in 86 record. So transaction.eref
|
|
is used for a dual purpose!
|
|
"""
|
|
# Get counterpart from CNTP, BENM or ORDP subfields:
|
|
for counterpart_field in ['CNTP', 'BENM', 'ORDP']:
|
|
if counterpart_field in subfields:
|
|
get_counterpart(transaction, subfields[counterpart_field])
|
|
if not transaction.message:
|
|
transaction.message = ''
|
|
# REMI: Remitter information (text entered by other party on trans.):
|
|
if 'REMI' in subfields:
|
|
transaction.message += (
|
|
subfields['REMI'][2]
|
|
# this might look like
|
|
# /REMI/USTD//<remittance info>/
|
|
# or
|
|
# /REMI/STRD/CUR/<betalingskenmerk>/
|
|
if len(subfields['REMI']) >= 3 and subfields['REMI'][0] in [
|
|
'STRD', 'USTD'
|
|
]
|
|
else
|
|
'/'.join(x for x in subfields['REMI'] if x)
|
|
)
|
|
# EREF: End-to-end reference
|
|
# Get transaction reference subfield (might vary):
|
|
transaction.eref = transaction.eref or 'EREF'
|
|
if transaction.eref in subfields:
|
|
transaction.eref = ''.join(subfields[transaction.eref])
|
|
|
|
|
|
class MT940(object):
|
|
"""Inherit this class in your account_banking.parsers.models.parser,
|
|
define functions to handle the tags you need to handle and adjust static
|
|
variables as needed.
|
|
|
|
At least, you should override handle_tag_61 and handle_tag_86.
|
|
Don't forget to call super.
|
|
|
|
handle_tag_* functions receive the remainder of the the line (that is,
|
|
without ':XX:') and are supposed to write into self.current_transaction
|
|
"""
|
|
|
|
def __init__(self):
|
|
"""Initialize parser - override at least header_regex.
|
|
|
|
This in fact uses the ING syntax, override in others."""
|
|
self.mt940_type = 'General'
|
|
self.header_lines = 3 # Number of lines to skip
|
|
self.header_regex = '^0000 01INGBNL2AXXXX|^{1' # Start of header
|
|
self.footer_regex = '^-}$|^-XXX$' # Stop processing on seeing this
|
|
self.tag_regex = '^:[0-9]{2}[A-Z]*:' # Start of new tag
|
|
self.current_statement = None
|
|
self.current_transaction = None
|
|
self.statements = []
|
|
|
|
def is_mt940(self, line):
|
|
"""determine if a line is the header of a statement"""
|
|
if not bool(re.match(self.header_regex, line)):
|
|
raise ValueError(
|
|
'File starting with %s does not seem to be a'
|
|
' valid %s MT940 format bank statement.' %
|
|
(line[:12], self.mt940_type)
|
|
)
|
|
|
|
def parse(self, data):
|
|
"""Parse mt940 bank statement file contents."""
|
|
self.is_mt940(data)
|
|
iterator = data.replace('\r\n', '\n').split('\n').__iter__()
|
|
line = None
|
|
record_line = ''
|
|
try:
|
|
while True:
|
|
if not self.current_statement:
|
|
self.handle_header(line, iterator)
|
|
line = iterator.next()
|
|
if not self.is_tag(line) and not self.is_footer(line):
|
|
record_line = self.add_record_line(line, record_line)
|
|
continue
|
|
if record_line:
|
|
self.handle_record(record_line)
|
|
if self.is_footer(line):
|
|
self.handle_footer(line, iterator)
|
|
record_line = ''
|
|
continue
|
|
record_line = line
|
|
except StopIteration:
|
|
pass
|
|
if self.current_statement:
|
|
if record_line:
|
|
self.handle_record(record_line)
|
|
record_line = ''
|
|
self.statements.append(self.current_statement)
|
|
self.current_statement = None
|
|
return self.statements
|
|
|
|
def add_record_line(self, line, record_line):
|
|
record_line += line
|
|
return record_line
|
|
|
|
def is_footer(self, line):
|
|
"""determine if a line is the footer of a statement"""
|
|
return line and bool(re.match(self.footer_regex, line))
|
|
|
|
def is_tag(self, line):
|
|
"""determine if a line has a tag"""
|
|
return line and bool(re.match(self.tag_regex, line))
|
|
|
|
def handle_header(self, dummy_line, iterator):
|
|
"""skip header lines, create current statement"""
|
|
for dummy_i in range(self.header_lines):
|
|
iterator.next()
|
|
self.current_statement = BankStatement()
|
|
|
|
def handle_footer(self, dummy_line, dummy_iterator):
|
|
"""add current statement to list, reset state"""
|
|
self.statements.append(self.current_statement)
|
|
self.current_statement = None
|
|
|
|
def handle_record(self, line):
|
|
"""find a function to handle the record represented by line"""
|
|
tag_match = re.match(self.tag_regex, line)
|
|
tag = tag_match.group(0).strip(':')
|
|
if not hasattr(self, 'handle_tag_%s' % tag):
|
|
logging.error('Unknown tag %s', tag)
|
|
logging.error(line)
|
|
return
|
|
handler = getattr(self, 'handle_tag_%s' % tag)
|
|
handler(line[tag_match.end():])
|
|
|
|
def handle_tag_20(self, data):
|
|
"""Contains unique ? message ID"""
|
|
pass
|
|
|
|
def handle_tag_25(self, data):
|
|
"""Handle tag 25: local bank account information."""
|
|
data = data.replace('EUR', '').replace('.', '').strip()
|
|
self.current_statement.local_account = data
|
|
|
|
def handle_tag_28C(self, data):
|
|
"""Sequence number within batch - normally only zeroes."""
|
|
pass
|
|
|
|
def handle_tag_60F(self, data):
|
|
"""get start balance and currency"""
|
|
# For the moment only first 60F record
|
|
# The alternative would be to split the file and start a new
|
|
# statement for each 20: tag encountered.
|
|
stmt = self.current_statement
|
|
if not stmt.local_currency:
|
|
stmt.local_currency = data[7:10]
|
|
stmt.start_balance = str2amount(data[0], data[10:])
|
|
|
|
def handle_tag_61(self, data):
|
|
"""get transaction values"""
|
|
transaction = self.current_statement.create_transaction()
|
|
self.current_transaction = transaction
|
|
transaction.execution_date = datetime.strptime(data[:6], '%y%m%d')
|
|
transaction.value_date = datetime.strptime(data[:6], '%y%m%d')
|
|
# ...and the rest already is highly bank dependent
|
|
|
|
def handle_tag_62F(self, data):
|
|
"""Get ending balance, statement date and id.
|
|
|
|
We use the date on the last 62F tag as statement date, as the date
|
|
on the 60F record (previous end balance) might contain a date in
|
|
a previous period.
|
|
|
|
We generate the statement.id from the local_account and the end-date,
|
|
this should normally be unique, provided there is a maximum of
|
|
one statement per day.
|
|
|
|
Depending on the bank, there might be multiple 62F tags in the import
|
|
file. The last one counts.
|
|
"""
|
|
stmt = self.current_statement
|
|
stmt.end_balance = str2amount(data[0], data[10:])
|
|
stmt.date = datetime.strptime(data[1:7], '%y%m%d')
|
|
# Only replace logically empty (only whitespace or zeroes) id's:
|
|
# But do replace statement_id's added before (therefore starting
|
|
# with local_account), because we need the date on the last 62F
|
|
# record.
|
|
test_empty_id = re.sub(r'[\s0]', '', stmt.statement_id)
|
|
if ((not test_empty_id) or
|
|
(stmt.statement_id.startswith(stmt.local_account))):
|
|
stmt.statement_id = '%s-%s' % (
|
|
stmt.local_account,
|
|
stmt.date.strftime('%Y-%m-%d'),
|
|
)
|
|
|
|
def handle_tag_64(self, data):
|
|
"""get current balance in currency"""
|
|
pass
|
|
|
|
def handle_tag_65(self, data):
|
|
"""get future balance in currency"""
|
|
pass
|
|
|
|
def handle_tag_86(self, data):
|
|
"""details for previous transaction, here most differences between
|
|
banks occur"""
|
|
pass
|