# Copyright (C) 2000-2014 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""Parse configuration files"""
from configparser import RawConfigParser
from re import compile as re_compile
import os
from .. import (
LinkCheckerError,
get_link_pat,
LOG_CHECK,
log,
fileutil,
plugins,
logconf,
)
[docs]
def read_multiline(value):
"""Helper function reading multiline values."""
for line in value.splitlines():
line = line.strip()
if not line or line.startswith('#'):
continue
yield line
[docs]
class LCConfigParser(RawConfigParser):
"""
Parse a LinkChecker configuration file.
"""
def __init__(self, config):
"""Initialize configuration."""
super().__init__()
self.config = config
[docs]
def read(self, files):
"""Read settings from given config files.
@raises: LinkCheckerError on syntax errors in the config file(s)
"""
assert isinstance(files, list), "Invalid file list %r" % files
try:
self.read_ok = super().read(files)
if not self.sections():
raise LinkCheckerError(
_("configuration files %s contain no sections.") % files)
if len(self.read_ok) < len(files):
failed_files = set(files) - set(self.read_ok)
log.warn(
LOG_CHECK, "Could not read configuration files %s.", failed_files
)
# Read all the configuration parameters from the given files.
self.read_checking_config()
self.read_authentication_config()
self.read_filtering_config()
self.read_output_config()
self.read_plugin_config()
except Exception as msg:
raise LinkCheckerError(_("Error parsing configuration: %s") % str(msg))
[docs]
def read_string_option(self, section, option, allowempty=False):
"""Read a string option."""
if self.has_option(section, option):
value = self.get(section, option)
if not allowempty and not value:
raise LinkCheckerError(
_("invalid empty value for %s: %s\n") % (option, value)
)
self.config[option] = value
[docs]
def read_boolean_option(self, section, option):
"""Read a boolean option."""
if self.has_option(section, option):
self.config[option] = self.getboolean(section, option)
[docs]
def read_float_option(self, section, option, key=None, min=None, max=None):
"""Read a float option."""
if self.has_option(section, option):
num = self.getfloat(section, option)
if min is not None and num < min:
raise LinkCheckerError(
_("invalid value for %s: %d must not be less than %d")
% (option, num, min)
)
if max is not None and num < max:
raise LinkCheckerError(
_("invalid value for %s: %d must not be greater than %d")
% (option, num, max)
)
if key is None:
key = option
self.config[key] = num
[docs]
def read_int_option(self, section, option, key=None, min=None, max=None):
"""Read an integer option."""
if self.has_option(section, option):
num = self.getint(section, option)
if min is not None and num < min:
raise LinkCheckerError(
_("invalid value for %s: %d must not be less than %d")
% (option, num, min)
)
if max is not None and num < max:
raise LinkCheckerError(
_("invalid value for %s: %d must not be greater than %d")
% (option, num, max)
)
if key is None:
key = option
self.config[key] = num
[docs]
def read_output_config(self):
"""Read configuration options in section "output"."""
section = "output"
from ..logger import LoggerClasses
if self.has_section("blacklist"):
log.warn(
LOG_CHECK,
_("The blacklist section in linkcheckerrc is deprecated, "
"please rename to failures")
)
for opt in self.options("blacklist"):
self.config["failures"][opt] = self.get("blacklist", opt)
for c in LoggerClasses:
key = c.LoggerName
if self.has_section(key):
for opt in self.options(key):
self.config[key][opt] = self.get(key, opt)
if self.has_option(key, 'parts'):
val = self.get(key, 'parts')
parts = [f.strip().lower() for f in val.split(',')]
self.config[key]['parts'] = parts
self.read_boolean_option(section, "warnings")
if self.has_option(section, "verbose"):
if self.getboolean(section, "verbose"):
self.config["verbose"] = True
self.config["warnings"] = True
if self.has_option(section, "quiet"):
if self.getboolean(section, "quiet"):
self.config['output'] = 'none'
self.config['quiet'] = True
logconf.reset_loglevel() # if debug will be overwritten next
if self.has_option(section, "debug"):
val = self.get(section, "debug")
parts = [f.strip().lower() for f in val.split(',')]
logconf.set_debug(parts)
self.read_boolean_option(section, "status")
if self.has_option(section, "log"):
val = self.get(section, "log").strip().lower()
self.config['output'] = val
if self.has_option(section, "fileoutput"):
loggers = self.get(section, "fileoutput").split(",")
# strip names from whitespace
loggers = (x.strip().lower() for x in loggers)
# no file output for the failures and none Logger
from ..logger import LoggerNames
loggers = (
x
for x in loggers
if x in LoggerNames and x not in ("failures", "none")
)
for val in loggers:
output = self.config.logger_new(val, fileoutput=1)
self.config['fileoutput'].append(output)
if self.has_option(section, "ignoreerrors"):
for line in read_multiline(self.get(section, "ignoreerrors")):
parts = line.split(maxsplit=1)
if len(parts) == 1:
parts.append('')
self.config["ignoreerrors"].append(tuple(
re_compile(part) for part in parts
))
[docs]
def read_checking_config(self):
"""Read configuration options in section "checking"."""
section = "checking"
self.read_int_option(section, "threads", min=-1)
self.config['threads'] = max(0, self.config['threads'])
self.read_int_option(section, "timeout", min=1)
self.read_int_option(section, "aborttimeout", min=1)
self.read_int_option(section, "recursionlevel", min=-1)
self.read_string_option(section, "useragent")
self.read_float_option(section, "maxrequestspersecond", min=0.001)
self.read_int_option(section, "maxnumurls", min=0)
self.read_int_option(section, "maxfilesizeparse", min=1)
self.read_int_option(section, "maxfilesizedownload", min=1)
if self.has_option(section, "allowedschemes"):
self.config['allowedschemes'] = [
x.strip().lower()
for x in self.get(section, 'allowedschemes').split(',')
]
self.read_boolean_option(section, "debugmemory")
self.read_string_option(section, "cookiefile")
self.read_boolean_option(section, "robotstxt")
self.read_string_option(section, "localwebroot")
try:
self.read_boolean_option(section, "sslverify")
except ValueError:
self.read_string_option(section, "sslverify")
self.read_int_option(section, "maxrunseconds", min=0)
self.read_int_option(section, "resultcachesize", min=0)
[docs]
def read_authentication_config(self):
"""Read configuration options in section "authentication"."""
section = "authentication"
password_fields = []
if self.has_option(section, "entry"):
for val in read_multiline(self.get(section, "entry")):
auth = val.split()
if len(auth) == 3:
self.config.add_auth(
pattern=auth[0], user=auth[1], password=auth[2]
)
password_fields.append(f"entry/{auth[0]}/{auth[1]}")
elif len(auth) == 2:
self.config.add_auth(pattern=auth[0], user=auth[1])
else:
raise LinkCheckerError(
_("missing auth part in entry %(val)r") % {"val": val}
)
# read login URL and field names
if self.has_option(section, "loginurl"):
val = self.get(section, "loginurl").strip()
if not (
val.lower().startswith("http:") or val.lower().startswith("https:")
):
raise LinkCheckerError(
_(
"invalid login URL `%s'. Only "
"HTTP and HTTPS URLs are supported."
)
% val
)
self.config["loginurl"] = val
self.read_string_option(section, "loginuserfield")
self.read_string_option(section, "loginpasswordfield")
# read login extra fields
if self.has_option(section, "loginextrafields"):
for val in read_multiline(self.get(section, "loginextrafields")):
name, value = val.split(":", 1)
self.config["loginextrafields"][name] = value
self.check_password_readable(section, password_fields)
[docs]
def check_password_readable(self, section, fields):
"""Check if there is a readable configuration file and print a warning."""
if not fields:
return
# The information which of the configuration files
# included which option is not available. To avoid false positives,
# a warning is only printed if exactly one file has been read.
if len(self.read_ok) != 1:
return
fn = self.read_ok[0]
if fileutil.is_accessable_by_others(fn):
log.warn(
LOG_CHECK,
_(
"The configuration file %s contains password information (in"
" section [%s] and options %s) and the file is readable by"
" others. Please make the file only readable by you."
),
fn,
section,
fields,
)
if os.name == 'posix':
log.warn(LOG_CHECK, _("For example execute 'chmod go-rw %s'.") % fn)
elif os.name == 'nt':
log.warn(
LOG_CHECK,
_(
"See %(url)s for more info on setting file permissions."
) % {"url": "https://support.microsoft.com/kb/308419"}
)
[docs]
def read_filtering_config(self):
"""
Read configuration options in section "filtering".
"""
section = "filtering"
if self.has_option(section, "ignorewarnings"):
self.config['ignorewarnings'] = [
f.strip().lower()
for f in self.get(section, 'ignorewarnings').split(',')
]
if self.has_option(section, "ignorewarningsforurls"):
for line in read_multiline(self.get(section, "ignorewarningsforurls")):
parts = line.split(maxsplit=1)
if len(parts) == 1:
parts.append('')
self.config["ignorewarningsforurls"].append(tuple(
re_compile(part) for part in parts
))
if self.has_option(section, "ignore"):
for line in read_multiline(self.get(section, "ignore")):
pat = get_link_pat(line, strict=1)
self.config["externlinks"].append(pat)
if self.has_option(section, "nofollow"):
for line in read_multiline(self.get(section, "nofollow")):
pat = get_link_pat(line, strict=0)
self.config["externlinks"].append(pat)
if self.has_option(section, "internlinks"):
pat = get_link_pat(self.get(section, "internlinks"))
self.config["internlinks"].append(pat)
self.read_boolean_option(section, "checkextern")
[docs]
def read_plugin_config(self):
"""Read plugin-specific configuration values."""
folders = self.config["pluginfolders"]
modules = plugins.get_plugin_modules(folders)
for pluginclass in plugins.get_plugin_classes(modules):
section = pluginclass.__name__
if self.has_section(section):
self.config["enabledplugins"].append(section)
self.config[section] = pluginclass.read_config(self)