Source code for linkcheck.logger.html

# Copyright (C) 2000-2014 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""
A HTML logger.
"""

import html
import os
import time

from . import _Logger
from .. import strformat, configuration


# ss=1 enables show source
validate_html = "https://validator.w3.org/check?ss=1&uri=%(uri)s"
# options are the default
validate_css = (
    "https://jigsaw.w3.org/css-validator/validator?"
    "uri=%(uri)s&warning=1&profile=css2&usermedium=all"
)

HTML_HEADER = """<!DOCTYPE HTML>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=%(encoding)s"/>
<title>%(title)s</title>
<style type="text/css">
<!--
 h2 { font-family: Verdana,sans-serif; font-size: 22pt; font-weight: bold; }
 body { font-family: Arial,sans-serif; font-size: 11pt; background-color: %(body)s; }
 td { font-family: Arial,sans-serif; font-size: 11pt; }
 code { font-family: Courier; }
 a:link {color: %(link)s;}
 a:visited {color: %(vlink)s;}
 a:active {color: %(alink)s;}
 a:hover { color: #34a4ef; }
 table { border-collapse:collapse; }
 table, th, td { border: 1px solid black; padding: 2px; }
 td.url { background-color: %(url)s }
 td.valid { background-color: %(valid)s }
 td.error { background-color: %(error)s }
 td.warning { background-color: %(warning)s }
-->
</style>
</head>
<body>
"""


[docs]class HtmlLogger(_Logger): """Logger with HTML output.""" LoggerName = 'html' LoggerArgs = { "filename": "linkchecker-out.html", 'colorbackground': '#fff7e5', 'colorurl': '#dcd5cf', 'colorborder': '#000000', 'colorlink': '#191c83', 'colorwarning': '#e0954e', 'colorerror': '#db4930', 'colorok': '#3ba557', } def __init__(self, **kwargs): """Initialize default HTML color values.""" args = self.get_args(kwargs) super().__init__(**args) self.init_fileoutput(args) self.colorbackground = args['colorbackground'] self.colorurl = args['colorurl'] self.colorborder = args['colorborder'] self.colorlink = args['colorlink'] self.colorwarning = args['colorwarning'] self.colorerror = args['colorerror'] self.colorok = args['colorok']
[docs] def part(self, name): """Return non-space-breakable part name.""" return super().part(name).replace(" ", "&nbsp;")
[docs] def comment(self, s, **args): """Write HTML comment.""" self.write("<!-- ") self.write(s, **args) self.write(" -->")
[docs] def start_output(self): """Write start of checking info.""" super().start_output() header = { "encoding": self.get_charset_encoding(), "title": configuration.App, "body": self.colorbackground, "link": self.colorlink, "vlink": self.colorlink, "alink": self.colorlink, "url": self.colorurl, "error": self.colorerror, "valid": self.colorok, "warning": self.colorwarning, } self.write(HTML_HEADER % header) self.comment("Generated by %s" % configuration.App) if self.has_part('intro'): self.write( "<h2>" + configuration.App + "</h2><br/><blockquote>" + configuration.Freeware + "<br/><br/>" + (_("Start checking at %s") % strformat.strtime(self.starttime)) + os.linesep + "<br/>" ) self.check_date() self.flush()
[docs] def log_url(self, url_data): """Write url checking info as HTML.""" self.write_table_start() if self.has_part("url"): self.write_url(url_data) if url_data.name and self.has_part("name"): self.write_name(url_data) if url_data.parent_url and self.has_part("parenturl"): self.write_parent(url_data) if url_data.base_ref and self.has_part("base"): self.write_base(url_data) if url_data.url and self.has_part("realurl"): self.write_real(url_data) if url_data.dltime >= 0 and self.has_part("dltime"): self.write_dltime(url_data) if url_data.size >= 0 and self.has_part("dlsize"): self.write_size(url_data) if url_data.checktime and self.has_part("checktime"): self.write_checktime(url_data) if url_data.info and self.has_part("info"): self.write_info(url_data) if url_data.modified and self.has_part("modified"): self.write_modified(url_data) if url_data.warnings and self.has_part("warning"): self.write_warning(url_data) if self.has_part("result"): self.write_result(url_data) self.write_table_end() self.flush()
[docs] def write_table_start(self): """Start html table.""" self.writeln('<br/><br/><table>')
[docs] def write_table_end(self): """End html table.""" self.write('</table><br/>')
[docs] def write_id(self): """Write ID for current URL.""" self.writeln("<tr>") self.writeln('<td>%s</td>' % self.part("id")) self.write("<td>%d</td></tr>" % self.stats.number)
[docs] def write_url(self, url_data): """Write url_data.base_url.""" self.writeln("<tr>") self.writeln('<td class="url">%s</td>' % self.part("url")) self.write('<td class="url">') self.write("`%s'" % html.escape(url_data.base_url)) self.writeln("</td></tr>")
[docs] def write_name(self, url_data): """Write url_data.name.""" args = (self.part("name"), html.escape(url_data.name)) self.writeln("<tr><td>%s</td><td>`%s'</td></tr>" % args)
[docs] def write_parent(self, url_data): """Write url_data.parent_url.""" self.write( "<tr><td>" + self.part("parenturl") + '</td><td><a target="top" href="' + html.escape(url_data.parent_url) + '">' + html.escape(url_data.parent_url) + "</a>" ) if url_data.line is not None: self.write(_(", line %d") % url_data.line) if url_data.column is not None: self.write(_(", col %d") % url_data.column) if url_data.page > 0: self.write(_(", page %d") % url_data.page) if not url_data.valid: # on errors show HTML and CSS validation for parent url vhtml = validate_html % {'uri': url_data.parent_url} vcss = validate_css % {'uri': url_data.parent_url} self.writeln() self.writeln('(<a href="' + vhtml + '">HTML</a>)') self.write('(<a href="' + vcss + '">CSS</a>)') self.writeln("</td></tr>")
[docs] def write_base(self, url_data): """Write url_data.base_ref.""" self.writeln( "<tr><td>" + self.part("base") + "</td><td>" + html.escape(url_data.base_ref) + "</td></tr>" )
[docs] def write_real(self, url_data): """Write url_data.url.""" self.writeln( "<tr><td>" + self.part("realurl") + "</td><td>" + '<a target="top" href="' + html.escape(url_data.url) + '">' + html.escape(url_data.url) + "</a></td></tr>" )
[docs] def write_dltime(self, url_data): """Write url_data.dltime.""" self.writeln( "<tr><td>" + self.part("dltime") + "</td><td>" + (_("%.3f seconds") % url_data.dltime) + "</td></tr>" )
[docs] def write_size(self, url_data): """Write url_data.size.""" self.writeln( "<tr><td>" + self.part("dlsize") + "</td><td>" + strformat.strsize(url_data.size) + "</td></tr>" )
[docs] def write_checktime(self, url_data): """Write url_data.checktime.""" self.writeln( "<tr><td>" + self.part("checktime") + "</td><td>" + (_("%.3f seconds") % url_data.checktime) + "</td></tr>" )
[docs] def write_info(self, url_data): """Write url_data.info.""" sep = "<br/>" + os.linesep text = sep.join(html.escape(x) for x in url_data.info) self.writeln( '<tr><td valign="top">' + self.part("info") + "</td><td>" + text + "</td></tr>" )
[docs] def write_modified(self, url_data): """Write url_data.modified.""" text = html.escape(self.format_modified(url_data.modified)) self.writeln( '<tr><td valign="top">' + self.part("modified") + "</td><td>" + text + "</td></tr>" )
[docs] def write_warning(self, url_data): """Write url_data.warnings.""" sep = "<br/>" + os.linesep text = sep.join(html.escape(x[1]) for x in url_data.warnings) self.writeln( '<tr><td class="warning" valign="top">' + self.part("warning") + '</td><td class="warning">' + text + "</td></tr>" )
[docs] def write_result(self, url_data): """Write url_data.result.""" if url_data.valid: self.write('<tr><td class="valid">') self.write(self.part("result")) self.write('</td><td class="valid">') self.write(html.escape(_("Valid"))) else: self.write('<tr><td class="error">') self.write(self.part("result")) self.write('</td><td class="error">') self.write(html.escape(_("Error"))) if url_data.result: self.write(": " + html.escape(url_data.result)) self.writeln("</td></tr>")
[docs] def write_stats(self): """Write check statistic infos.""" self.writeln('<br/><i>%s</i><br/>' % _("Statistics")) if self.stats.number > 0: self.writeln( _( "Content types: %(image)d image, %(text)d text, %(video)d video, " "%(audio)d audio, %(application)d application, %(mail)d mail" " and %(other)d other." ) % self.stats.link_types ) self.writeln("<br/>") self.writeln( _("URL lengths: min=%(min)d, max=%(max)d, avg=%(avg)d.") % dict( min=self.stats.min_url_length, max=self.stats.max_url_length, avg=self.stats.avg_url_length, ) ) else: self.writeln(_("No statistics available since no URLs were checked.")) self.writeln("<br/>")
[docs] def write_outro(self): """Write end of check message.""" self.writeln("<br/>") self.write(_("That's it.") + " ") if self.stats.number >= 0: self.write( _n("%d link checked.", "%d links checked.", self.stats.number) % self.stats.number ) self.write(" ") self.write( _n("%d warning found", "%d warnings found", self.stats.warnings_printed) % self.stats.warnings_printed ) if self.stats.warnings != self.stats.warnings_printed: self.write( _(" (%d ignored or duplicates not printed)") % (self.stats.warnings - self.stats.warnings_printed) ) self.write(". ") self.write( _n("%d error found", "%d errors found", self.stats.errors_printed) % self.stats.errors_printed ) if self.stats.errors != self.stats.errors_printed: self.write( _(" (%d duplicates not printed)") % (self.stats.errors - self.stats.errors_printed) ) self.writeln(".") self.writeln("<br/>") num = self.stats.internal_errors if num: self.write( _n( "There was %(num)d internal error.", "There were %(num)d internal errors.", num, ) % {"num": num} ) self.writeln("<br/>") self.stoptime = time.time() duration = self.stoptime - self.starttime self.writeln( _("Stopped checking at %(time)s (%(duration)s)") % { "time": strformat.strtime(self.stoptime), "duration": strformat.strduration_long(duration), } ) self.writeln( '</blockquote><br/><hr><small>' + configuration.HtmlAppInfo + "<br/>" ) self.writeln( _("Read the documentation at %s") % ( '<a href="' + configuration.Url + '" target="_top">' + configuration.Url + "</a>.<br/>" ) ) self.writeln( _("Write comments and bugs to %s") % ( '<a href="' + configuration.SupportUrl + '">' + configuration.SupportUrl + "</a>.<br/>" ) ) self.writeln("</small></body></html>")
[docs] def end_output(self, **kwargs): """Write end of checking info as HTML.""" if self.has_part("stats"): self.write_stats() if self.has_part("outro"): self.write_outro() self.close_fileoutput()