You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

311 lines
12 KiB

  1. # -*- coding: utf-8 -*-
  2. # Copyright (C) 2014-2015 Therp BV <http://therp.nl>.
  3. # License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
  4. """Generic parser for MT940 files, base for customized versions per bank."""
  5. import re
  6. import logging
  7. from datetime import datetime
  8. def str2amount(sign, amount_str):
  9. """Convert sign (C or D) and amount in string to signed amount (float)."""
  10. factor = (1 if sign == 'C' else -1)
  11. return factor * float(amount_str.replace(',', '.'))
  12. def get_subfields(data, codewords):
  13. """Return dictionary with value array for each codeword in data.
  14. For instance:
  15. data =
  16. /BENM//NAME/Kosten/REMI/Periode 01-10-2013 t/m 31-12-2013/ISDT/20
  17. codewords = ['BENM', 'ADDR', 'NAME', 'CNTP', ISDT', 'REMI']
  18. Then return subfields = {
  19. 'BENM': [],
  20. 'NAME': ['Kosten'],
  21. 'REMI': ['Periode 01-10-2013 t', 'm 31-12-2013'],
  22. 'ISDT': ['20'],
  23. }
  24. """
  25. subfields = {}
  26. current_codeword = None
  27. for word in data.split('/'):
  28. if not word and not current_codeword:
  29. continue
  30. if word in codewords:
  31. current_codeword = word
  32. subfields[current_codeword] = []
  33. continue
  34. if current_codeword in subfields:
  35. subfields[current_codeword].append(word)
  36. return subfields
  37. def get_counterpart(transaction, subfield):
  38. """Get counterpart from transaction.
  39. Counterpart is often stored in subfield of tag 86. The subfield
  40. can be BENM, ORDP, CNTP"""
  41. if not subfield:
  42. return # subfield is empty
  43. if len(subfield) >= 1 and subfield[0]:
  44. transaction.update({'account_number': subfield[0]})
  45. if len(subfield) >= 2 and subfield[1]:
  46. transaction.update({'account_bic': subfield[1]})
  47. if len(subfield) >= 3 and subfield[2]:
  48. transaction.update({'partner_name': subfield[2]})
  49. def handle_common_subfields(transaction, subfields):
  50. """Deal with common functionality for tag 86 subfields."""
  51. # Get counterpart from CNTP, BENM or ORDP subfields:
  52. for counterpart_field in ['CNTP', 'BENM', 'ORDP']:
  53. if counterpart_field in subfields:
  54. get_counterpart(transaction, subfields[counterpart_field])
  55. if not transaction.get('name'):
  56. transaction['name'] = ''
  57. # REMI: Remitter information (text entered by other party on trans.):
  58. if 'REMI' in subfields:
  59. transaction['name'] += (
  60. subfields['REMI'][2]
  61. # this might look like
  62. # /REMI/USTD//<remittance info>/
  63. # or
  64. # /REMI/STRD/CUR/<betalingskenmerk>/
  65. if len(subfields['REMI']) >= 3 and subfields['REMI'][0] in [
  66. 'STRD', 'USTD'
  67. ]
  68. else
  69. '/'.join(x for x in subfields['REMI'] if x)
  70. )
  71. # EREF: End-to-end reference
  72. if 'EREF' in subfields:
  73. transaction['name'] += '/'.join(filter(bool, subfields['EREF']))
  74. # Get transaction reference subfield (might vary):
  75. if transaction.get('ref') in subfields:
  76. transaction['ref'] = ''.join(subfields[transaction['ref']])
  77. class MT940(object):
  78. """Inherit this class in your account_banking.parsers.models.parser,
  79. define functions to handle the tags you need to handle and adjust static
  80. variables as needed.
  81. At least, you should override handle_tag_61 and handle_tag_86.
  82. Don't forget to call super.
  83. handle_tag_* functions receive the remainder of the the line (that is,
  84. without ':XX:') and are supposed to write into self.current_transaction
  85. """
  86. def __init__(self):
  87. """Initialize parser - override at least header_regex.
  88. This in fact uses the ING syntax, override in others."""
  89. self.mt940_type = 'General'
  90. self.header_lines = 3 # Number of lines to skip
  91. self.header_regex = '^0000 01INGBNL2AXXXX|^{1' # Start of header
  92. self.footer_regex = '^-}$|^-XXX$' # Stop processing on seeing this
  93. self.tag_regex = '^:[0-9]{2}[A-Z]*:' # Start of new tag
  94. self.current_statement = None
  95. self.current_transaction = None
  96. self.statements = []
  97. self.currency_code = None
  98. self.account_number = None
  99. def is_mt940(self, line):
  100. """determine if a line is the header of a statement"""
  101. if not bool(re.match(self.header_regex, line)):
  102. raise ValueError(
  103. 'File starting with %s does not seem to be a'
  104. ' valid %s MT940 format bank statement.' %
  105. (line[:12], self.mt940_type)
  106. )
  107. def is_mt940_statement(self, line):
  108. """determine if line is the start of a statement"""
  109. if not bool(line.startswith('{4:')):
  110. raise ValueError(
  111. 'The pre processed match %s does not seem to be a'
  112. ' valid %s MT940 format bank statement. Every statement'
  113. ' should start be a dict starting with {4:..' % line
  114. )
  115. def pre_process_data(self, data):
  116. matches = []
  117. self.is_mt940(line=data)
  118. data = data.replace(
  119. '-}', '}').replace('}{', '}\r\n{').replace('\r\n', '\n')
  120. if data.startswith(':940:'):
  121. for statement in data.replace(':940:', '').split(':20:'):
  122. match = '{4:\n:20:' + statement + '}'
  123. matches.append(match)
  124. else:
  125. tag_re = re.compile(
  126. r'(\{4:[^{}]+\})',
  127. re.MULTILINE)
  128. matches = tag_re.findall(data)
  129. return matches
  130. def parse(self, data, header_lines=None):
  131. """Parse mt940 bank statement file contents."""
  132. data = data.decode()
  133. matches = self.pre_process_data(data)
  134. for match in matches:
  135. self.is_mt940_statement(line=match)
  136. iterator = '\n'.join(
  137. match.split('\n')[1:-1]).split('\n').__iter__()
  138. line = None
  139. record_line = ''
  140. try:
  141. while True:
  142. if not self.current_statement:
  143. self.handle_header(line, iterator,
  144. header_lines=header_lines)
  145. line = iterator.next()
  146. if not self.is_tag(line) and not self.is_footer(line):
  147. record_line = self.add_record_line(line, record_line)
  148. continue
  149. if record_line:
  150. self.handle_record(record_line)
  151. if self.is_footer(line):
  152. self.handle_footer(line, iterator)
  153. record_line = ''
  154. continue
  155. record_line = line
  156. except StopIteration:
  157. pass
  158. if self.current_statement:
  159. if record_line:
  160. self.handle_record(record_line)
  161. record_line = ''
  162. self.statements.append(self.current_statement)
  163. self.current_statement = None
  164. return self.currency_code, self.account_number, self.statements
  165. def add_record_line(self, line, record_line):
  166. record_line += line
  167. return record_line
  168. def is_footer(self, line):
  169. """determine if a line is the footer of a statement"""
  170. return line and bool(re.match(self.footer_regex, line))
  171. def is_tag(self, line):
  172. """determine if a line has a tag"""
  173. return line and bool(re.match(self.tag_regex, line))
  174. def handle_header(self, dummy_line, iterator, header_lines=None):
  175. """skip header lines, create current statement"""
  176. if not header_lines:
  177. header_lines = self.header_lines
  178. for dummy_i in range(header_lines):
  179. iterator.next()
  180. self.current_statement = {
  181. 'name': None,
  182. 'date': None,
  183. 'balance_start': 0.0,
  184. 'balance_end_real': 0.0,
  185. 'transactions': []
  186. }
  187. def handle_footer(self, dummy_line, dummy_iterator):
  188. """add current statement to list, reset state"""
  189. self.statements.append(self.current_statement)
  190. self.current_statement = None
  191. def handle_record(self, line):
  192. """find a function to handle the record represented by line"""
  193. tag_match = re.match(self.tag_regex, line)
  194. tag = tag_match.group(0).strip(':')
  195. if not hasattr(self, 'handle_tag_%s' % tag): # pragma: no cover
  196. logging.error('Unknown tag %s', tag)
  197. logging.error(line)
  198. return
  199. handler = getattr(self, 'handle_tag_%s' % tag)
  200. handler(line[tag_match.end():])
  201. def handle_tag_20(self, data):
  202. """Contains unique ? message ID"""
  203. pass
  204. def handle_tag_25(self, data):
  205. """Handle tag 25: local bank account information."""
  206. data = data.replace('EUR', '').replace('.', '').strip()
  207. self.account_number = data
  208. def handle_tag_28C(self, data):
  209. """Sequence number within batch - normally only zeroes."""
  210. pass
  211. def handle_tag_60F(self, data):
  212. """get start balance and currency"""
  213. # For the moment only first 60F record
  214. # The alternative would be to split the file and start a new
  215. # statement for each 20: tag encountered.
  216. if not self.currency_code:
  217. self.currency_code = data[7:10]
  218. self.current_statement['balance_start'] = str2amount(
  219. data[0],
  220. data[10:]
  221. )
  222. if not self.current_statement['date']:
  223. self.current_statement['date'] = datetime.strptime(data[1:7],
  224. '%y%m%d')
  225. def handle_tag_61(self, data):
  226. """get transaction values"""
  227. self.current_statement['transactions'].append({})
  228. self.current_transaction = self.current_statement['transactions'][-1]
  229. self.current_transaction['date'] = datetime.strptime(
  230. data[:6],
  231. '%y%m%d'
  232. )
  233. def handle_tag_62F(self, data):
  234. """Get ending balance, statement date and id.
  235. We use the date on the last 62F tag as statement date, as the date
  236. on the 60F record (previous end balance) might contain a date in
  237. a previous period.
  238. We generate the statement.id from the local_account and the end-date,
  239. this should normally be unique, provided there is a maximum of
  240. one statement per day.
  241. Depending on the bank, there might be multiple 62F tags in the import
  242. file. The last one counts.
  243. """
  244. self.current_statement['balance_end_real'] = str2amount(
  245. data[0],
  246. data[10:]
  247. )
  248. self.current_statement['date'] = datetime.strptime(data[1:7], '%y%m%d')
  249. # Only replace logically empty (only whitespace or zeroes) id's:
  250. # But do replace statement_id's added before (therefore starting
  251. # with local_account), because we need the date on the last 62F
  252. # record.
  253. statement_name = self.current_statement['name'] or ''
  254. test_empty_id = re.sub(r'[\s0]', '', statement_name)
  255. is_account_number = statement_name.startswith(self.account_number)
  256. if not test_empty_id or is_account_number:
  257. self.current_statement['name'] = '%s-%s' % (
  258. self.account_number,
  259. self.current_statement['date'].strftime('%Y-%m-%d'),
  260. )
  261. def handle_tag_64(self, data):
  262. """get current balance in currency"""
  263. pass
  264. def handle_tag_65(self, data):
  265. """get future balance in currency"""
  266. pass
  267. def handle_tag_86(self, data):
  268. """details for previous transaction, here most differences between
  269. banks occur"""
  270. pass