You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

123 lines
5.8 KiB

  1. # RFC 2822 - style email validation for Python
  2. # (c) 2012 Syrus Akbary <me@syrusakbary.com>
  3. # Extended from (c) 2011 Noel Bush <noel@aitools.org>
  4. # for support of mx and user check
  5. # This code is made available to you under the GNU LGPL v3.
  6. #
  7. # This module provides a single method, valid_email_address(),
  8. # which returns True or False to indicate whether a given address
  9. # is valid according to the 'addr-spec' part of the specification
  10. # given in RFC 2822. Ideally, we would like to find this
  11. # in some other library, already thoroughly tested and well-
  12. # maintained. The standard Python library email.utils
  13. # contains a parse_addr() function, but it is not sufficient
  14. # to detect many malformed addresses.
  15. #
  16. # This implementation aims to be faithful to the RFC, with the
  17. # exception of a circular definition (see comments below), and
  18. # with the omission of the pattern components marked as "obsolete".
  19. import re
  20. import smtplib
  21. import socket
  22. try:
  23. import DNS
  24. ServerError = DNS.ServerError
  25. except:
  26. DNS = None
  27. class ServerError(Exception): pass
  28. # All we are really doing is comparing the input string to one
  29. # gigantic regular expression. But building that regexp, and
  30. # ensuring its correctness, is made much easier by assembling it
  31. # from the "tokens" defined by the RFC. Each of these tokens is
  32. # tested in the accompanying unit test file.
  33. #
  34. # The section of RFC 2822 from which each pattern component is
  35. # derived is given in an accompanying comment.
  36. #
  37. # (To make things simple, every string below is given as 'raw',
  38. # even when it's not strictly necessary. This way we don't forget
  39. # when it is necessary.)
  40. #
  41. WSP = r'[ \t]' # see 2.2.2. Structured Header Field Bodies
  42. CRLF = r'(?:\r\n)' # see 2.2.3. Long Header Fields
  43. NO_WS_CTL = r'\x01-\x08\x0b\x0c\x0f-\x1f\x7f' # see 3.2.1. Primitive Tokens
  44. QUOTED_PAIR = r'(?:\\.)' # see 3.2.2. Quoted characters
  45. FWS = r'(?:(?:' + WSP + r'*' + CRLF + r')?' + \
  46. WSP + r'+)' # see 3.2.3. Folding white space and comments
  47. CTEXT = r'[' + NO_WS_CTL + \
  48. r'\x21-\x27\x2a-\x5b\x5d-\x7e]' # see 3.2.3
  49. CCONTENT = r'(?:' + CTEXT + r'|' + \
  50. QUOTED_PAIR + r')' # see 3.2.3 (NB: The RFC includes COMMENT here
  51. # as well, but that would be circular.)
  52. COMMENT = r'\((?:' + FWS + r'?' + CCONTENT + \
  53. r')*' + FWS + r'?\)' # see 3.2.3
  54. CFWS = r'(?:' + FWS + r'?' + COMMENT + ')*(?:' + \
  55. FWS + '?' + COMMENT + '|' + FWS + ')' # see 3.2.3
  56. ATEXT = r'[\w!#$%&\'\*\+\-/=\?\^`\{\|\}~]' # see 3.2.4. Atom
  57. ATOM = CFWS + r'?' + ATEXT + r'+' + CFWS + r'?' # see 3.2.4
  58. DOT_ATOM_TEXT = ATEXT + r'+(?:\.' + ATEXT + r'+)*' # see 3.2.4
  59. DOT_ATOM = CFWS + r'?' + DOT_ATOM_TEXT + CFWS + r'?' # see 3.2.4
  60. QTEXT = r'[' + NO_WS_CTL + \
  61. r'\x21\x23-\x5b\x5d-\x7e]' # see 3.2.5. Quoted strings
  62. QCONTENT = r'(?:' + QTEXT + r'|' + \
  63. QUOTED_PAIR + r')' # see 3.2.5
  64. QUOTED_STRING = CFWS + r'?' + r'"(?:' + FWS + \
  65. r'?' + QCONTENT + r')*' + FWS + \
  66. r'?' + r'"' + CFWS + r'?'
  67. LOCAL_PART = r'(?:' + DOT_ATOM + r'|' + \
  68. QUOTED_STRING + r')' # see 3.4.1. Addr-spec specification
  69. DTEXT = r'[' + NO_WS_CTL + r'\x21-\x5a\x5e-\x7e]' # see 3.4.1
  70. DCONTENT = r'(?:' + DTEXT + r'|' + \
  71. QUOTED_PAIR + r')' # see 3.4.1
  72. DOMAIN_LITERAL = CFWS + r'?' + r'\[' + \
  73. r'(?:' + FWS + r'?' + DCONTENT + \
  74. r')*' + FWS + r'?\]' + CFWS + r'?' # see 3.4.1
  75. DOMAIN = r'(?:' + DOT_ATOM + r'|' + \
  76. DOMAIN_LITERAL + r')' # see 3.4.1
  77. ADDR_SPEC = LOCAL_PART + r'@' + DOMAIN # see 3.4.1
  78. # A valid address will match exactly the 3.4.1 addr-spec.
  79. VALID_ADDRESS_REGEXP = '^' + ADDR_SPEC + '$'
  80. def validate_email(email, check_mx=False,verify=False):
  81. """Indicate whether the given string is a valid email address
  82. according to the 'addr-spec' portion of RFC 2822 (see section
  83. 3.4.1). Parts of the spec that are marked obsolete are *not*
  84. included in this test, and certain arcane constructions that
  85. depend on circular definitions in the spec may not pass, but in
  86. general this should correctly identify any email address likely
  87. to be in use as of 2011."""
  88. try:
  89. assert re.match(VALID_ADDRESS_REGEXP, email) is not None
  90. check_mx |= verify
  91. if check_mx:
  92. if not DNS: raise Exception('For check the mx records or check if the email exists you must have installed pyDNS python package')
  93. DNS.DiscoverNameServers()
  94. hostname = email[email.find('@')+1:]
  95. mx_hosts = DNS.mxlookup(hostname)
  96. for mx in mx_hosts:
  97. try:
  98. smtp = smtplib.SMTP()
  99. smtp.connect(mx[1])
  100. if not verify: return True
  101. status, _ = smtp.helo()
  102. if status != 250: continue
  103. smtp.mail('')
  104. status, _ = smtp.rcpt(email)
  105. if status != 250: return False
  106. break
  107. except smtplib.SMTPServerDisconnected: #Server not permits verify user
  108. break
  109. except smtplib.SMTPConnectError:
  110. continue
  111. except (AssertionError, ServerError):
  112. return False
  113. return True
  114. # import sys
  115. # sys.modules[__name__],sys.modules['validate_email_module'] = validate_email,sys.modules[__name__]
  116. # from validate_email_module import *