135 lines
5.4 KiB

11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
11 years ago
  1. # -*- coding: utf-8 -*-
  2. # RFC 2822 - style email validation for Python
  3. # (c) 2012 Syrus Akbary <me@syrusakbary.com>
  4. # Extended from (c) 2011 Noel Bush <noel@aitools.org>
  5. # for support of mx and user check
  6. # This code is made available to you under the GNU LGPL v3.
  7. #
  8. # This module provides a single method, valid_email_address(),
  9. # which returns True or False to indicate whether a given address
  10. # is valid according to the 'addr-spec' part of the specification
  11. # given in RFC 2822. Ideally, we would like to find this
  12. # in some other library, already thoroughly tested and well-
  13. # maintained. The standard Python library email.utils
  14. # contains a parse_addr() function, but it is not sufficient
  15. # to detect many malformed addresses.
  16. #
  17. # This implementation aims to be faithful to the RFC, with the
  18. # exception of a circular definition (see comments below), and
  19. # with the omission of the pattern components marked as "obsolete".
  20. import re
  21. import smtplib
  22. try:
  23. import DNS
  24. ServerError = DNS.ServerError
  25. except:
  26. DNS = None
  27. class ServerError(Exception):
  28. pass
  29. # All we are really doing is comparing the input string to one
  30. # gigantic regular expression. But building that regexp, and
  31. # ensuring its correctness, is made much easier by assembling it
  32. # from the "tokens" defined by the RFC. Each of these tokens is
  33. # tested in the accompanying unit test file.
  34. #
  35. # The section of RFC 2822 from which each pattern component is
  36. # derived is given in an accompanying comment.
  37. #
  38. # (To make things simple, every string below is given as 'raw',
  39. # even when it's not strictly necessary. This way we don't forget
  40. # when it is necessary.)
  41. #
  42. WSP = r'[ \t]' # see 2.2.2. Structured Header Field Bodies
  43. CRLF = r'(?:\r\n)' # see 2.2.3. Long Header Fields
  44. NO_WS_CTL = r'\x01-\x08\x0b\x0c\x0f-\x1f\x7f' # see 3.2.1. Primitive Tokens
  45. QUOTED_PAIR = r'(?:\\.)' # see 3.2.2. Quoted characters
  46. FWS = r'(?:(?:{0}*{1})?{0}+)'.format(WSP, CRLF)
  47. # see 3.2.3. Folding white space and comments
  48. CTEXT = r'[{0}\x21-\x27\x2a-\x5b\x5d-\x7e]'.format(
  49. NO_WS_CTL) # see 3.2.3
  50. # see 3.2.3 (NB: The RFC includes COMMENT here as well, but that would be
  51. # circular.)
  52. CCONTENT = r'(?:{0}|{1})'.format(CTEXT, QUOTED_PAIR)
  53. COMMENT = r'\((?:{0}?{1})*{0}?\)'.format(
  54. FWS, CCONTENT) # see 3.2.3
  55. CFWS = r'(?:{0}?{1})*(?:{0}?{1}|{0})'.format(
  56. FWS, COMMENT) # see 3.2.3
  57. ATEXT = r'[\w!#$%&\'\*\+\-/=\?\^`\{\|\}~]' # see 3.2.4. Atom
  58. ATOM = r'{0}?{1}+{0}?'.format(CFWS, ATEXT)
  59. # see 3.2.4
  60. DOT_ATOM_TEXT = r'{0}+(?:\.{0}+)*'.format(
  61. ATEXT) # see 3.2.4
  62. DOT_ATOM = r'{0}?{1}{0}?'.format(
  63. CFWS, DOT_ATOM_TEXT) # see 3.2.4
  64. QTEXT = r'[{0}\x21\x23-\x5b\x5d-\x7e]'.format(
  65. NO_WS_CTL) # see 3.2.5. Quoted strings
  66. QCONTENT = r'(?:{0}|{1})'.format(QTEXT, QUOTED_PAIR)
  67. # see 3.2.5
  68. QUOTED_STRING = r'{0}?"(?:{1}?{2})*{1}?"{0}?'.format(CFWS, FWS, QCONTENT)
  69. LOCAL_PART = r'(?:{0}|{1})'.format(DOT_ATOM, QUOTED_STRING)
  70. # see 3.4.1. Addr-spec specification
  71. DTEXT = r'[{0}\x21-\x5a\x5e-\x7e]'.format(
  72. NO_WS_CTL) # see 3.4.1
  73. DCONTENT = r'(?:{0}|{1})'.format(DTEXT, QUOTED_PAIR)
  74. # see 3.4.1
  75. DOMAIN_LITERAL = r'{0}?\[(?:{1}?{2})*{1}?\]{0}?'.format(
  76. CFWS, FWS, DCONTENT) # see 3.4.1
  77. DOMAIN = r'(?:{0}|{1})'.format(DOT_ATOM, DOMAIN_LITERAL)
  78. # see 3.4.1
  79. ADDR_SPEC = r'{0}@{1}'.format(
  80. LOCAL_PART, DOMAIN) # see 3.4.1
  81. # A valid address will match exactly the 3.4.1 addr-spec.
  82. VALID_ADDRESS_REGEXP = '^' + ADDR_SPEC + '$'
  83. def validate_email(email, check_mx=False, verify=False):
  84. """Indicate whether the given string is a valid email address
  85. according to the 'addr-spec' portion of RFC 2822 (see section
  86. 3.4.1). Parts of the spec that are marked obsolete are *not*
  87. included in this test, and certain arcane constructions that
  88. depend on circular definitions in the spec may not pass, but in
  89. general this should correctly identify any email address likely
  90. to be in use as of 2011."""
  91. try:
  92. assert re.match(VALID_ADDRESS_REGEXP, email) is not None
  93. check_mx |= verify
  94. if check_mx:
  95. if not DNS:
  96. raise Exception('For check the mx records or check if the '
  97. 'email exists you must have installed pyDNS '
  98. 'python package')
  99. DNS.DiscoverNameServers()
  100. hostname = email[email.find('@') + 1:]
  101. mx_hosts = DNS.mxlookup(hostname)
  102. for mx in mx_hosts:
  103. try:
  104. smtp = smtplib.SMTP()
  105. smtp.connect(mx[1])
  106. if not verify:
  107. return True
  108. status, _ = smtp.helo()
  109. if status != 250:
  110. continue
  111. smtp.mail('')
  112. status, _ = smtp.rcpt(email)
  113. if status != 250:
  114. return False
  115. break
  116. except smtplib.SMTPServerDisconnected:
  117. # Server not permits verify user
  118. break
  119. except smtplib.SMTPConnectError:
  120. continue
  121. except (AssertionError, ServerError):
  122. return False
  123. return True
  124. # import sys
  125. # sys.modules[__name__], sys.modules['validate_email_module'] = validate_email,
  126. # sys.modules[__name__]
  127. # from validate_email_module import *