You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

118 lines
5.6 KiB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
  1. # -*- coding: utf-8 -*-
  2. # RFC 2822 - style email validation for Python
  3. # (c) 2012 Syrus Akbary <me@syrusakbary.com>
  4. # Extended from (c) 2011 Noel Bush <noel@aitools.org>
  5. # for support of mx and user check
  6. # This code is made available to you under the GNU LGPL v3.
  7. #
  8. # This module provides a single method, valid_email_address(),
  9. # which returns True or False to indicate whether a given address
  10. # is valid according to the 'addr-spec' part of the specification
  11. # given in RFC 2822. Ideally, we would like to find this
  12. # in some other library, already thoroughly tested and well-
  13. # maintained. The standard Python library email.utils
  14. # contains a parse_addr() function, but it is not sufficient
  15. # to detect many malformed addresses.
  16. #
  17. # This implementation aims to be faithful to the RFC, with the
  18. # exception of a circular definition (see comments below), and
  19. # with the omission of the pattern components marked as "obsolete".
  20. import re
  21. import smtplib
  22. try:
  23. import DNS
  24. ServerError = DNS.ServerError
  25. except:
  26. DNS = None
  27. class ServerError(Exception):
  28. pass
  29. # All we are really doing is comparing the input string to one
  30. # gigantic regular expression. But building that regexp, and
  31. # ensuring its correctness, is made much easier by assembling it
  32. # from the "tokens" defined by the RFC. Each of these tokens is
  33. # tested in the accompanying unit test file.
  34. #
  35. # The section of RFC 2822 from which each pattern component is
  36. # derived is given in an accompanying comment.
  37. #
  38. # (To make things simple, every string below is given as 'raw',
  39. # even when it's not strictly necessary. This way we don't forget
  40. # when it is necessary.)
  41. #
  42. WSP = r'[ \t]' # see 2.2.2. Structured Header Field Bodies
  43. CRLF = r'(?:\r\n)' # see 2.2.3. Long Header Fields
  44. NO_WS_CTL = r'\x01-\x08\x0b\x0c\x0f-\x1f\x7f' # see 3.2.1. Primitive Tokens
  45. QUOTED_PAIR = r'(?:\\.)' # see 3.2.2. Quoted characters
  46. FWS = r'(?:(?:{0}*{1})?{0}+)'.format(WSP, CRLF) # see 3.2.3. Folding white space and comments
  47. CTEXT = r'[{0}\x21-\x27\x2a-\x5b\x5d-\x7e]'.format(NO_WS_CTL) # see 3.2.3
  48. # see 3.2.3 (NB: The RFC includes COMMENT here as well, but that would be circular.)
  49. CCONTENT = r'(?:{0}|{1})'.format(CTEXT, QUOTED_PAIR)
  50. COMMENT = r'\((?:{0}?{1})*{0}?\)'.format(FWS, CCONTENT) # see 3.2.3
  51. CFWS = r'(?:{0}?{1})*(?:{0}?{1}|{0})'.format(FWS, COMMENT) # see 3.2.3
  52. ATEXT = r'[\w!#$%&\'\*\+\-/=\?\^`\{\|\}~]' # see 3.2.4. Atom
  53. ATOM = r'{0}?{1}+{0}?'.format(CFWS, ATEXT) # see 3.2.4
  54. DOT_ATOM_TEXT = r'{0}+(?:\.{0}+)*'.format(ATEXT) # see 3.2.4
  55. DOT_ATOM = r'{0}?{1}{0}?'.format(CFWS, DOT_ATOM_TEXT) # see 3.2.4
  56. QTEXT = r'[{0}\x21\x23-\x5b\x5d-\x7e]'.format(NO_WS_CTL) # see 3.2.5. Quoted strings
  57. QCONTENT = r'(?:{0}|{1})'.format(QTEXT, QUOTED_PAIR) # see 3.2.5
  58. QUOTED_STRING = r'{0}?"(?:{1}?{2})*{1}?"{0}?'.format(CFWS, FWS, QCONTENT)
  59. LOCAL_PART = r'(?:{0}|{1})'.format(DOT_ATOM, QUOTED_STRING) # see 3.4.1. Addr-spec specification
  60. DTEXT = r'[{0}\x21-\x5a\x5e-\x7e]'.format(NO_WS_CTL) # see 3.4.1
  61. DCONTENT = r'(?:{0}|{1})'.format(DTEXT, QUOTED_PAIR) # see 3.4.1
  62. DOMAIN_LITERAL = r'{0}?\[(?:{1}?{2})*{1}?\]{0}?'.format(CFWS, FWS, DCONTENT) # see 3.4.1
  63. DOMAIN = r'(?:{0}|{1})'.format(DOT_ATOM, DOMAIN_LITERAL) # see 3.4.1
  64. ADDR_SPEC = r'{0}@{1}'.format(LOCAL_PART, DOMAIN) # see 3.4.1
  65. # A valid address will match exactly the 3.4.1 addr-spec.
  66. VALID_ADDRESS_REGEXP = '^' + ADDR_SPEC + '$'
  67. def validate_email(email, check_mx=False, verify=False):
  68. """Indicate whether the given string is a valid email address
  69. according to the 'addr-spec' portion of RFC 2822 (see section
  70. 3.4.1). Parts of the spec that are marked obsolete are *not*
  71. included in this test, and certain arcane constructions that
  72. depend on circular definitions in the spec may not pass, but in
  73. general this should correctly identify any email address likely
  74. to be in use as of 2011."""
  75. try:
  76. assert re.match(VALID_ADDRESS_REGEXP, email) is not None
  77. check_mx |= verify
  78. if check_mx:
  79. if not DNS:
  80. raise Exception('For check the mx records or check if the '
  81. 'email exists you must have installed pyDNS '
  82. 'python package')
  83. DNS.DiscoverNameServers()
  84. hostname = email[email.find('@') + 1:]
  85. mx_hosts = DNS.mxlookup(hostname)
  86. for mx in mx_hosts:
  87. try:
  88. smtp = smtplib.SMTP()
  89. smtp.connect(mx[1])
  90. if not verify:
  91. return True
  92. status, _ = smtp.helo()
  93. if status != 250:
  94. continue
  95. smtp.mail('')
  96. status, _ = smtp.rcpt(email)
  97. if status != 250:
  98. return False
  99. break
  100. except smtplib.SMTPServerDisconnected: # Server not permits verify user
  101. break
  102. except smtplib.SMTPConnectError:
  103. continue
  104. except (AssertionError, ServerError):
  105. return False
  106. return True
  107. # import sys
  108. # sys.modules[__name__], sys.modules['validate_email_module'] = validate_email, sys.modules[__name__]
  109. # from validate_email_module import *