# Copyright (C) 2000-2014 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""
Handler for mailto: links.
"""
import re
import urllib.parse
from email._parseaddr import AddressList
from . import urlbase
from .. import log, LOG_CHECK, strformat, url as urlutil
from dns import resolver
from ..network import iputil
from .const import WARN_MAIL_NO_MX_HOST
[docs]
def getaddresses(addr):
"""Return list of email addresses from given field value."""
parsed = [mail for name, mail in AddressList(addr).addresslist if mail]
if parsed:
addresses = parsed
elif addr:
# we could not parse any mail addresses, so try with the raw string
addresses = [addr]
else:
addresses = []
return addresses
[docs]
def is_quoted(addr):
"""Return True iff mail address string is quoted."""
return addr.startswith('"') and addr.endswith('"')
[docs]
def is_literal(domain):
"""Return True iff domain string is a literal."""
return domain.startswith('[') and domain.endswith(']')
_remove_quoted = re.compile(r'\\.').sub
_quotes = re.compile(r'["\\]')
[docs]
def is_missing_quote(addr):
"""Return True iff mail address is not correctly quoted."""
return _quotes.match(_remove_quoted("", addr[1:-1]))
# list of CGI keys to search for email addresses
EMAIL_CGI_ADDRESS = ("to", "cc", "bcc")
EMAIL_CGI_SUBJECT = "subject"
[docs]
class MailtoUrl(urlbase.UrlBase):
"""
Url link with mailto scheme.
"""
[docs]
def build_url(self):
"""Call super.build_url(), extract list of mail addresses from URL,
and check their syntax.
"""
super().build_url()
self.addresses = set()
self.subject = None
self.parse_addresses()
if self.addresses:
for addr in sorted(self.addresses):
self.check_email_syntax(addr)
if not self.valid:
break
elif not self.subject:
self.add_warning(
_("No mail addresses or email subject found in `%(url)s'.")
% {"url": self.url}
)
[docs]
def parse_addresses(self):
"""Parse all mail addresses out of the URL target. Also parses
optional CGI headers like "?to=foo@example.org".
Stores parsed addresses in the self.addresses set.
"""
# cut off leading mailto: and unquote
url = urllib.parse.unquote(self.base_url[7:], self.encoding)
# search for cc, bcc, to and store in headers
mode = 0 # 0=default, 1=quote, 2=esc
quote = None
i = 0
for i, c in enumerate(url):
if mode == 0:
if c == '?':
break
elif c in '<"':
quote = c
mode = 1
elif c == '\\':
mode = 2
elif mode == 1:
if c == '"' and quote == '"':
mode = 0
elif c == '>' and quote == '<':
mode = 0
elif mode == 2:
mode = 0
if i < (len(url) - 1):
self.addresses.update(getaddresses(url[:i]))
try:
headers = urllib.parse.parse_qs(url[(i + 1):], strict_parsing=True)
for key, vals in headers.items():
if key.lower() in EMAIL_CGI_ADDRESS:
# Only the first header value is added
self.addresses.update(
getaddresses(urllib.parse.unquote(vals[0], self.encoding))
)
if key.lower() == EMAIL_CGI_SUBJECT:
self.subject = vals[0]
except ValueError as err:
self.add_warning(_("Error parsing CGI values: %s") % str(err))
else:
self.addresses.update(getaddresses(url))
log.debug(LOG_CHECK, "addresses: %s", self.addresses)
[docs]
def check_email_syntax(self, mail):
"""Check email syntax. The relevant RFCs:
- How to check names (memo):
https://tools.ietf.org/html/rfc3696
- Email address syntax
https://tools.ietf.org/html/rfc2822
- SMTP protocol
https://tools.ietf.org/html/rfc5321#section-4.1.3
- IPv6
https://tools.ietf.org/html/rfc4291#section-2.2
- Host syntax
https://tools.ietf.org/html/rfc1123#section-2
"""
# length checks
# restrict email length to 256 characters
# https://www.rfc-editor.org/errata_search.php?eid=1003
if len(mail) > 256:
self.set_result(
_(
"Mail address `%(addr)s' too long. Allowed 256 chars,"
" was %(length)d chars."
)
% {"addr": mail, "length": len(mail)},
valid=False,
overwrite=False,
)
return
if "@" not in mail:
self.set_result(
_("Missing `@' in mail address `%(addr)s'.") % {"addr": mail},
valid=False,
overwrite=False,
)
return
# note: be sure to use rsplit since "@" can occur in local part
local, domain = mail.rsplit("@", 1)
if not local:
self.set_result(
_("Missing local part of mail address `%(addr)s'.") % {"addr": mail},
valid=False,
overwrite=False,
)
return
if not domain:
self.set_result(
_("Missing domain part of mail address `%(addr)s'.") % {"addr": mail},
valid=False,
overwrite=False,
)
return
if len(local) > 64:
self.set_result(
_(
"Local part of mail address `%(addr)s' too long."
" Allowed 64 chars, was %(length)d chars."
)
% {"addr": mail, "length": len(local)},
valid=False,
overwrite=False,
)
return
if len(domain) > 255:
self.set_result(
_(
"Domain part of mail address `%(addr)s' too long."
" Allowed 255 chars, was %(length)d chars."
)
% {"addr": mail, "length": len(local)},
valid=False,
overwrite=False,
)
return
# local part syntax check
# Rules taken from https://tools.ietf.org/html/rfc3696#section-3
if is_quoted(local):
if is_missing_quote(local):
self.set_result(
_("Unquoted double quote or backslash in mail address `%(addr)s'.")
% {"addr": mail},
valid=False,
overwrite=False,
)
return
else:
if local.startswith("."):
self.set_result(
_("Local part of mail address `%(addr)s' may not start with a dot.")
% {"addr": mail},
valid=False,
overwrite=False,
)
return
if local.endswith("."):
self.set_result(
_("Local part of mail address `%(addr)s' may not end with a dot.")
% {"addr": mail},
valid=False,
overwrite=False,
)
return
if ".." in local:
self.set_result(
_("Local part of mail address `%(addr)s' may not contain two dots.")
% {"addr": mail},
valid=False,
overwrite=False,
)
return
for char in '@ \\",[]':
if char in local.replace(f"\\{char}", ""):
self.set_result(
_(
"Local part of mail address `%(addr)s' contains"
" unquoted character `%(char)s."
)
% {"addr": mail, "char": char},
valid=False,
overwrite=False,
)
return
# domain part syntax check
if is_literal(domain):
# it's an IP address
ip = domain[1:-1]
if ip.startswith("IPv6:"):
ip = ip[5:]
if not iputil.is_valid_ip(ip):
self.set_result(
_("Domain part of mail address `%(addr)s' has invalid IP.")
% {"addr": mail},
valid=False,
overwrite=False,
)
return
else:
# it's a domain name
if not urlutil.is_safe_domain(domain):
self.set_result(
_("Invalid domain part of mail address `%(addr)s'.")
% {"addr": mail},
valid=False,
overwrite=False,
)
return
if domain.endswith(".") or domain.split(".")[-1].isdigit():
self.set_result(
_("Invalid top level domain part of mail address `%(addr)s'.")
% {"addr": mail},
valid=False,
overwrite=False,
)
return
[docs]
def check_connection(self):
"""
Verify a list of email addresses. If one address fails,
the whole list will fail.
For each mail address the MX DNS records are found.
If no MX records are found, print a warning and try
to look for A DNS records. If no A records are found either
print an error.
"""
for mail in sorted(self.addresses):
self.check_smtp_domain(mail)
if not self.valid:
break
[docs]
def check_smtp_domain(self, mail):
"""
Check a single mail address.
"""
from dns.exception import DNSException
log.debug(LOG_CHECK, "checking mail address %r", mail)
mail = strformat.ascii_safe(mail)
username, domain = mail.rsplit('@', 1)
log.debug(LOG_CHECK, "looking up MX mailhost %r", domain)
try:
answers = resolver.resolve(domain, 'MX', search=True)
except DNSException:
answers = []
if len(answers) == 0:
self.add_warning(
_("No MX mail host for %(domain)s found.") % {'domain': domain},
tag=WARN_MAIL_NO_MX_HOST,
)
try:
answers = resolver.resolve(domain, 'A', search=True)
except DNSException:
answers = []
if len(answers) == 0:
self.set_result(
_("No host for %(domain)s found.") % {'domain': domain},
valid=False,
overwrite=True,
)
return
# set preference to zero
mxdata = [(0, rdata.to_text(omit_final_dot=True)) for rdata in answers]
else:
from dns.rdtypes.mxbase import MXBase
mxdata = [
(rdata.preference, rdata.exchange.to_text(omit_final_dot=True))
for rdata in answers
if isinstance(rdata, MXBase)
]
if not mxdata:
self.set_result(
_("Got invalid DNS answer %(answer)s for %(domain)s.")
% {'answer': answers, 'domain': domain},
valid=False,
overwrite=True,
)
return
# sort according to preference (lower preference means this
# host should be preferred)
mxdata.sort()
# debug output
log.debug(LOG_CHECK, "found %d MX mailhosts:", len(answers))
for preference, host in mxdata:
log.debug(LOG_CHECK, "MX host %r, preference %d", host, preference)
self.set_result(_("Valid mail address syntax"))
[docs]
def set_cache_url(self):
"""
The cache url is a comma separated list of emails.
"""
emails = ",".join(sorted(self.addresses))
self.cache_url = f"{self.scheme}:{emails}"
[docs]
def can_get_content(self):
"""
mailto: URLs do not have any content
@return: False
@rtype: bool
"""
return False