Source code for linkcheck.configuration.confparse

# Copyright (C) 2000-2014 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""Parse configuration files"""

from configparser import RawConfigParser
from re import compile as re_compile
import os

from .. import (
    LinkCheckerError,
    get_link_pat,
    LOG_CHECK,
    log,
    fileutil,
    plugins,
    logconf,
)


[docs] def read_multiline(value): """Helper function reading multiline values.""" for line in value.splitlines(): line = line.strip() if not line or line.startswith('#'): continue yield line
[docs] class LCConfigParser(RawConfigParser): """ Parse a LinkChecker configuration file. """ def __init__(self, config): """Initialize configuration.""" super().__init__() self.config = config
[docs] def read(self, files): """Read settings from given config files. @raises: LinkCheckerError on syntax errors in the config file(s) """ assert isinstance(files, list), "Invalid file list %r" % files try: self.read_ok = super().read(files) if not self.sections(): raise LinkCheckerError( _("configuration files %s contain no sections.") % files) if len(self.read_ok) < len(files): failed_files = set(files) - set(self.read_ok) log.warn( LOG_CHECK, "Could not read configuration files %s.", failed_files ) # Read all the configuration parameters from the given files. self.read_checking_config() self.read_authentication_config() self.read_filtering_config() self.read_output_config() self.read_plugin_config() except Exception as msg: raise LinkCheckerError(_("Error parsing configuration: %s") % str(msg))
[docs] def read_string_option(self, section, option, allowempty=False): """Read a string option.""" if self.has_option(section, option): value = self.get(section, option) if not allowempty and not value: raise LinkCheckerError( _("invalid empty value for %s: %s\n") % (option, value) ) self.config[option] = value
[docs] def read_boolean_option(self, section, option): """Read a boolean option.""" if self.has_option(section, option): self.config[option] = self.getboolean(section, option)
[docs] def read_float_option(self, section, option, key=None, min=None, max=None): """Read a float option.""" if self.has_option(section, option): num = self.getfloat(section, option) if min is not None and num < min: raise LinkCheckerError( _("invalid value for %s: %d must not be less than %d") % (option, num, min) ) if max is not None and num < max: raise LinkCheckerError( _("invalid value for %s: %d must not be greater than %d") % (option, num, max) ) if key is None: key = option self.config[key] = num
[docs] def read_int_option(self, section, option, key=None, min=None, max=None): """Read an integer option.""" if self.has_option(section, option): num = self.getint(section, option) if min is not None and num < min: raise LinkCheckerError( _("invalid value for %s: %d must not be less than %d") % (option, num, min) ) if max is not None and num < max: raise LinkCheckerError( _("invalid value for %s: %d must not be greater than %d") % (option, num, max) ) if key is None: key = option self.config[key] = num
[docs] def read_output_config(self): """Read configuration options in section "output".""" section = "output" from ..logger import LoggerClasses if self.has_section("blacklist"): log.warn( LOG_CHECK, _("The blacklist section in linkcheckerrc is deprecated, " "please rename to failures") ) for opt in self.options("blacklist"): self.config["failures"][opt] = self.get("blacklist", opt) for c in LoggerClasses: key = c.LoggerName if self.has_section(key): for opt in self.options(key): self.config[key][opt] = self.get(key, opt) if self.has_option(key, 'parts'): val = self.get(key, 'parts') parts = [f.strip().lower() for f in val.split(',')] self.config[key]['parts'] = parts self.read_boolean_option(section, "warnings") if self.has_option(section, "verbose"): if self.getboolean(section, "verbose"): self.config["verbose"] = True self.config["warnings"] = True if self.has_option(section, "quiet"): if self.getboolean(section, "quiet"): self.config['output'] = 'none' self.config['quiet'] = True logconf.reset_loglevel() # if debug will be overwritten next if self.has_option(section, "debug"): val = self.get(section, "debug") parts = [f.strip().lower() for f in val.split(',')] logconf.set_debug(parts) self.read_boolean_option(section, "status") if self.has_option(section, "log"): val = self.get(section, "log").strip().lower() self.config['output'] = val if self.has_option(section, "fileoutput"): loggers = self.get(section, "fileoutput").split(",") # strip names from whitespace loggers = (x.strip().lower() for x in loggers) # no file output for the failures and none Logger from ..logger import LoggerNames loggers = ( x for x in loggers if x in LoggerNames and x not in ("failures", "none") ) for val in loggers: output = self.config.logger_new(val, fileoutput=1) self.config['fileoutput'].append(output) if self.has_option(section, "ignoreerrors"): for line in read_multiline(self.get(section, "ignoreerrors")): parts = line.split(maxsplit=1) if len(parts) == 1: parts.append('') self.config["ignoreerrors"].append(tuple( re_compile(part) for part in parts ))
[docs] def read_checking_config(self): """Read configuration options in section "checking".""" section = "checking" self.read_int_option(section, "threads", min=-1) self.config['threads'] = max(0, self.config['threads']) self.read_int_option(section, "timeout", min=1) self.read_int_option(section, "aborttimeout", min=1) self.read_int_option(section, "recursionlevel", min=-1) self.read_string_option(section, "useragent") self.read_float_option(section, "maxrequestspersecond", min=0.001) self.read_int_option(section, "maxnumurls", min=0) self.read_int_option(section, "maxfilesizeparse", min=1) self.read_int_option(section, "maxfilesizedownload", min=1) if self.has_option(section, "allowedschemes"): self.config['allowedschemes'] = [ x.strip().lower() for x in self.get(section, 'allowedschemes').split(',') ] self.read_boolean_option(section, "debugmemory") self.read_string_option(section, "cookiefile") self.read_boolean_option(section, "robotstxt") self.read_string_option(section, "localwebroot") try: self.read_boolean_option(section, "sslverify") except ValueError: self.read_string_option(section, "sslverify") self.read_int_option(section, "maxrunseconds", min=0) self.read_int_option(section, "resultcachesize", min=0)
[docs] def read_authentication_config(self): """Read configuration options in section "authentication".""" section = "authentication" password_fields = [] if self.has_option(section, "entry"): for val in read_multiline(self.get(section, "entry")): auth = val.split() if len(auth) == 3: self.config.add_auth( pattern=auth[0], user=auth[1], password=auth[2] ) password_fields.append(f"entry/{auth[0]}/{auth[1]}") elif len(auth) == 2: self.config.add_auth(pattern=auth[0], user=auth[1]) else: raise LinkCheckerError( _("missing auth part in entry %(val)r") % {"val": val} ) # read login URL and field names if self.has_option(section, "loginurl"): val = self.get(section, "loginurl").strip() if not ( val.lower().startswith("http:") or val.lower().startswith("https:") ): raise LinkCheckerError( _( "invalid login URL `%s'. Only " "HTTP and HTTPS URLs are supported." ) % val ) self.config["loginurl"] = val self.read_string_option(section, "loginuserfield") self.read_string_option(section, "loginpasswordfield") # read login extra fields if self.has_option(section, "loginextrafields"): for val in read_multiline(self.get(section, "loginextrafields")): name, value = val.split(":", 1) self.config["loginextrafields"][name] = value self.check_password_readable(section, password_fields)
[docs] def check_password_readable(self, section, fields): """Check if there is a readable configuration file and print a warning.""" if not fields: return # The information which of the configuration files # included which option is not available. To avoid false positives, # a warning is only printed if exactly one file has been read. if len(self.read_ok) != 1: return fn = self.read_ok[0] if fileutil.is_accessable_by_others(fn): log.warn( LOG_CHECK, _( "The configuration file %s contains password information (in" " section [%s] and options %s) and the file is readable by" " others. Please make the file only readable by you." ), fn, section, fields, ) if os.name == 'posix': log.warn(LOG_CHECK, _("For example execute 'chmod go-rw %s'.") % fn) elif os.name == 'nt': log.warn( LOG_CHECK, _( "See %(url)s for more info on setting file permissions." ) % {"url": "https://support.microsoft.com/kb/308419"} )
[docs] def read_filtering_config(self): """ Read configuration options in section "filtering". """ section = "filtering" if self.has_option(section, "ignorewarnings"): self.config['ignorewarnings'] = [ f.strip().lower() for f in self.get(section, 'ignorewarnings').split(',') ] if self.has_option(section, "ignorewarningsforurls"): for line in read_multiline(self.get(section, "ignorewarningsforurls")): parts = line.split(maxsplit=1) if len(parts) == 1: parts.append('') self.config["ignorewarningsforurls"].append(tuple( re_compile(part) for part in parts )) if self.has_option(section, "ignore"): for line in read_multiline(self.get(section, "ignore")): pat = get_link_pat(line, strict=1) self.config["externlinks"].append(pat) if self.has_option(section, "nofollow"): for line in read_multiline(self.get(section, "nofollow")): pat = get_link_pat(line, strict=0) self.config["externlinks"].append(pat) if self.has_option(section, "internlinks"): pat = get_link_pat(self.get(section, "internlinks")) self.config["internlinks"].append(pat) self.read_boolean_option(section, "checkextern")
[docs] def read_plugin_config(self): """Read plugin-specific configuration values.""" folders = self.config["pluginfolders"] modules = plugins.get_plugin_modules(folders) for pluginclass in plugins.get_plugin_classes(modules): section = pluginclass.__name__ if self.has_section(section): self.config["enabledplugins"].append(section) self.config[section] = pluginclass.read_config(self)