Implement XML-based reports

python · Jun 30, 2015 · e61e781 · e61e781
1 parent 8bb09b3
commit e61e781
Show file tree

Hide file tree

Showing 16 changed files with 635 additions and 18 deletions.
diff --git a/.gitignore b/.gitignore
@@ -6,3 +6,4 @@ __pycache__
 /build
 /env
 docs/build/
+/out/
diff --git a/lib-typing/3.2/typing.py b/lib-typing/3.2/typing.py
@@ -51,6 +51,8 @@
     # Structural checks, a.k.a. protocols.
     'Reversible',
     'SupportsAbs',
+    'SupportsBytes',
+    'SupportsComplex',
     'SupportsFloat',
     'SupportsInt',
     'SupportsRound',

diff --git a/lxml/__init__.pyi b/lxml/__init__.pyi
diff --git a/lxml/etree.pyi b/lxml/etree.pyi
@@ -0,0 +1,102 @@
+# Hand-written stub for lxml.etree as used by mypy.report.
+# This is *far* from complete, and the stubgen-generated ones crash mypy.
+# Any use of `Any` below means I couldn't figure out the type.
+
+import typing
+from typing import Any, Dict, List, Tuple, Union
+from typing import SupportsBytes
+
+
+# We do *not* want `typing.AnyStr` because it is a `TypeVar`, which is an
+# unnecessary constraint. It seems reasonable to constrain each
+# List/Dict argument to use one type consistently, though, and it is
+# necessary in order to keep these brief.
+AnyStr = Union[str, bytes]
+ListAnyStr = Union[List[str], List[bytes]]
+DictAnyStr = Union[Dict[str, str], Dict[bytes, bytes]]
+Dict_Tuple2AnyStr_Any = Union[Dict[Tuple[str, str], Any], Tuple[bytes, bytes], Any]
+
+
+class _Element:
+    def addprevious(self, element: '_Element') -> None:
+        pass
+
+class _ElementTree:
+    def write(self,
+              file: Union[AnyStr, typing.IO],
+              encoding: AnyStr = None,
+              method: AnyStr = "xml",
+              pretty_print: bool = False,
+              xml_declaration: Any = None,
+              with_tail: Any = True,
+              standalone: bool = None,
+              compression: int = 0,
+              exclusive: bool = False,
+              with_comments: bool = True,
+              inclusive_ns_prefixes: ListAnyStr = None) -> None:
+        pass
+
+class _XSLTResultTree(SupportsBytes):
+    pass
+
+class _XSLTQuotedStringParam:
+    pass
+
+class XMLParser:
+    pass
+
+class XMLSchema:
+    def __init__(self,
+                 etree: Union[_Element, _ElementTree] = None,
+                 file: Union[AnyStr, typing.IO] = None) -> None:
+        pass
+
+    def assertValid(self,
+                    etree: Union[_Element, _ElementTree]) -> None:
+        pass
+
+class XSLTAccessControl:
+    pass
+
+class XSLT:
+    def __init__(self,
+                 xslt_input: Union[_Element, _ElementTree],
+                 extensions: Dict_Tuple2AnyStr_Any = None,
+                 regexp: bool = True,
+                 access_control: XSLTAccessControl = None) -> None:
+        pass
+
+    def __call__(self,
+                 _input: Union[_Element, _ElementTree],
+                 profile_run: bool = False,
+                 **kwargs: Union[AnyStr, _XSLTQuotedStringParam]) -> _XSLTResultTree:
+        pass
+
+    @staticmethod
+    def strparam(s: AnyStr) -> _XSLTQuotedStringParam:
+        pass
+
+def Element(_tag: AnyStr,
+            attrib: DictAnyStr = None,
+            nsmap: DictAnyStr = None,
+            **extra: AnyStr) -> _Element:
+    pass
+
+def SubElement(_parent: _Element, _tag: AnyStr,
+               attrib: DictAnyStr = None,
+               nsmap: DictAnyStr = None,
+               **extra: AnyStr) -> _Element:
+    pass
+
+def ElementTree(element: _Element = None,
+                file: Union[AnyStr, typing.IO] = None,
+                parser: XMLParser = None) -> _ElementTree:
+    pass
+
+def ProcessingInstruction(target: AnyStr, text: AnyStr = None) -> _Element:
+    pass
+
+def parse(source: Union[AnyStr, typing.IO],
+          parser: XMLParser = None,
+          base_url: AnyStr = None) -> _ElementTree:
+    pass
diff --git a/mypy/build.py b/mypy/build.py
@@ -145,7 +145,11 @@ def build(program_path: str,
     if alt_lib_path:
         lib_path.insert(0, alt_lib_path)
 
-    reports = Reports(data_dir, report_dirs)
+    program_path = program_path or lookup_program(module, lib_path)
+    if program_text is None:
+        program_text = read_program(program_path)
+
+    reports = Reports(program_path, data_dir, report_dirs)
 
     # Construct a build manager object that performs all the stages of the
     # build in the correct order.
@@ -157,10 +161,6 @@ def build(program_path: str,
                            custom_typing_module=custom_typing_module,
                            reports=reports)
 
-    program_path = program_path or lookup_program(module, lib_path)
-    if program_text is None:
-        program_text = read_program(program_path)
-
     # Construct information that describes the initial file. __main__ is the
     # implicit module id and the import context is empty initially ([]).
     info = StateInfo(program_path, module, [], manager)

diff --git a/mypy/report.py b/mypy/report.py
@@ -1,8 +1,11 @@
 """Classes for producing HTML reports about imprecision."""
 
 from abc import ABCMeta, abstractmethod
+import cgi
+import os
+import shutil
 
-from typing import Callable, Dict, List
+from typing import Callable, Dict, List, cast
 
 from mypy.types import Type
 from mypy.nodes import MypyFile, Node
@@ -13,17 +16,25 @@
 
 
 class Reports:
-    def __init__(self, data_dir: str, report_dirs: Dict[str, str]) -> None:
+    def __init__(self, main_file: str, data_dir: str, report_dirs: Dict[str, str]) -> None:
+        self.main_file = main_file
         self.data_dir = data_dir
         self.reporters = [] # type: List[AbstractReporter]
+        self.named_reporters = {} # type: Dict[str, AbstractReporter]
 
         for report_type, report_dir in sorted(report_dirs.items()):
             self.add_report(report_type, report_dir)
 
     def add_report(self, report_type: str, report_dir: str) -> 'AbstractReporter':
+        try:
+            return self.named_reporters[report_type]
+        except KeyError:
+            pass
         reporter_cls = reporter_classes[report_type]
         reporter = reporter_cls(self, report_dir)
         self.reporters.append(reporter)
+        self.named_reporters[report_type] = reporter
+        return reporter
 
     def file(self, tree: MypyFile, type_map: Dict[Node, Type]) -> None:
         for reporter in self.reporters:
@@ -60,4 +71,206 @@ def on_finish(self) -> None:
         stats.generate_html_index(self.output_dir)
 reporter_classes['old-html'] = OldHtmlReporter
 
-reporter_classes['html'] = reporter_classes['old-html']
+class FileInfo:
+    def __init__(self, name: str, module: str) -> None:
+        self.name = name
+        self.module = module
+        self.counts = [0] * len(stats.precision_names)
+
+    def total(self) -> int:
+        return sum(self.counts)
+
+    def attrib(self) -> Dict[str, str]:
+        return {name: str(val) for name, val in zip(stats.precision_names, self.counts)}
+
+class MemoryXmlReporter(AbstractReporter):
+    """Internal reporter that generates XML in memory.
+
+    This is used by all other XML-based reporters to avoid duplication.
+    """
+
+    def __init__(self, reports: Reports, output_dir: str) -> None:
+        import lxml.etree as etree
+
+        super().__init__(reports, output_dir)
+
+        self.main_file = reports.main_file
+        self.xslt_html_path = os.path.join(reports.data_dir, 'xml', 'mypy-html.xslt')
+        self.xslt_txt_path = os.path.join(reports.data_dir, 'xml', 'mypy-txt.xslt')
+        self.css_html_path = os.path.join(reports.data_dir, 'xml', 'mypy-html.css')
+        xsd_path = os.path.join(reports.data_dir, 'xml', 'mypy.xsd')
+        self.schema = etree.XMLSchema(etree.parse(xsd_path))
+        self.last_xml = None # type: etree._ElementTree
+        self.files = [] # type: List[FileInfo]
+
+    def on_file(self, tree: MypyFile, type_map: Dict[Node, Type]) -> None:
+        import lxml.etree as etree
+
+        self.last_xml = None
+        path = os.path.relpath(tree.path)
+        if stats.is_special_module(path):
+            return
+        if path.startswith('..'):
+            return
+        if 'stubs' in path.split('/'):
+            return
+
+        visitor = stats.StatisticsVisitor(inferred=True, typemap=type_map, all_nodes=True)
+        tree.accept(visitor)
+
+        root = etree.Element('mypy-report-file', name=path, module=tree._fullname)
+        doc = etree.ElementTree(root)
+        file_info = FileInfo(path, tree._fullname)
+
+        with open(path) as input_file:
+            for lineno, line_text in enumerate(input_file, 1):
+                status = visitor.line_map.get(lineno, stats.TYPE_EMPTY)
+                file_info.counts[status] += 1
+                etree.SubElement(root, 'line',
+                                 number=str(lineno),
+                                 precision=stats.precision_names[status],
+                                 content=line_text[:-1])
+        # Assumes a layout similar to what XmlReporter uses.
+        xslt_path = os.path.relpath('mypy-html.xslt', path)
+        xml_pi = etree.ProcessingInstruction('xml', 'version="1.0" encoding="utf-8"')
+        transform_pi = etree.ProcessingInstruction('xml-stylesheet', 'type="text/xsl" href="%s"' % cgi.escape(xslt_path, True))
+        root.addprevious(xml_pi)
+        root.addprevious(transform_pi)
+        self.schema.assertValid(doc)
+
+        self.last_xml = doc
+        self.files.append(file_info)
+
+    def on_finish(self) -> None:
+        import lxml.etree as etree
+
+        self.last_xml = None
+        index_path = os.path.join(self.output_dir, 'index.xml')
+        output_files = sorted(self.files, key=lambda x: x.module)
+
+        root = etree.Element('mypy-report-index', name=self.main_file)
+        doc = etree.ElementTree(root)
+
+        for file_info in output_files:
+            etree.SubElement(root, 'file',
+                             file_info.attrib(),
+                             total=str(file_info.total()),
+                             name=file_info.name,
+                             module=file_info.module)
+        xslt_path = os.path.relpath('mypy-html.xslt', '.')
+        xml_pi = etree.ProcessingInstruction('xml', 'version="1.0" encoding="utf-8"')
+        transform_pi = etree.ProcessingInstruction('xml-stylesheet', 'type="text/xsl" href="%s"' % cgi.escape(xslt_path, True))
+        root.addprevious(xml_pi)
+        root.addprevious(transform_pi)
+        self.schema.assertValid(doc)
+
+        self.last_xml = doc
+
+reporter_classes['memory-xml'] = MemoryXmlReporter
+
+class AbstractXmlReporter(AbstractReporter):
+    """Internal abstract class for reporters that work via XML."""
+
+    def __init__(self, reports: Reports, output_dir: str) -> None:
+        super().__init__(reports, output_dir)
+
+        memory_reporter = reports.add_report('memory-xml', '<memory>')
+        # The dependency will be called first.
+        self.memory_xml = cast(MemoryXmlReporter, memory_reporter)
+
+class XmlReporter(AbstractXmlReporter):
+    """Public reporter that exports XML.
+
+    The produced XML files contain a reference to the absolute path
+    of the html transform, so they will be locally viewable in a browser.
+
+    However, there is a bug in Chrome and all other WebKit-based browsers
+    that makes it fail from file:// URLs but work on http:// URLs.
+    """
+
+    def on_file(self, tree: MypyFile, type_map: Dict[Node, Type]) -> None:
+        last_xml = self.memory_xml.last_xml
+        if last_xml is None:
+            return
+        out_path = os.path.join(self.output_dir, 'xml', tree.path + '.xml')
+        stats.ensure_dir_exists(os.path.dirname(out_path))
+        last_xml.write(out_path, encoding='utf-8')
+
+    def on_finish(self) -> None:
+        last_xml = self.memory_xml.last_xml
+        out_path = os.path.join(self.output_dir, 'index.xml')
+        out_xslt = os.path.join(self.output_dir, 'mypy-html.xslt')
+        out_css = os.path.join(self.output_dir, 'mypy-html.css')
+        last_xml.write(out_path, encoding='utf-8')
+        shutil.copyfile(self.memory_xml.xslt_html_path, out_xslt)
+        shutil.copyfile(self.memory_xml.css_html_path, out_css)
+        print('Generated XML report:', os.path.abspath(out_path))
+
+reporter_classes['xml'] = XmlReporter
+
+class XsltHtmlReporter(AbstractXmlReporter):
+    """Public reporter that exports HTML via XSLT.
+
+    This is slightly different than running `xsltproc` on the .xml files,
+    because it passes a parameter to rewrite the links.
+    """
+
+    def __init__(self, reports: Reports, output_dir: str) -> None:
+        import lxml.etree as etree
+
+        super().__init__(reports, output_dir)
+
+        self.xslt_html = etree.XSLT(etree.parse(self.memory_xml.xslt_html_path))
+        self.param_html = etree.XSLT.strparam('html')
+
+    def on_file(self, tree: MypyFile, type_map: Dict[Node, Type]) -> None:
+        last_xml = self.memory_xml.last_xml
+        if last_xml is None:
+            return
+        out_path = os.path.join(self.output_dir, 'html', tree.path + '.html')
+        stats.ensure_dir_exists(os.path.dirname(out_path))
+        transformed_html = bytes(self.xslt_html(last_xml, ext=self.param_html))
+        with open(out_path, 'wb') as out_file:
+            out_file.write(transformed_html)
+
+    def on_finish(self) -> None:
+        last_xml = self.memory_xml.last_xml
+        out_path = os.path.join(self.output_dir, 'index.html')
+        out_css = os.path.join(self.output_dir, 'mypy-html.css')
+        transformed_html = bytes(self.xslt_html(last_xml, ext=self.param_html))
+        with open(out_path, 'wb') as out_file:
+            out_file.write(transformed_html)
+        shutil.copyfile(self.memory_xml.css_html_path, out_css)
+        print('Generated HTML report (via XSLT):', os.path.abspath(out_path))
+
+reporter_classes['xslt-html'] = XsltHtmlReporter
+
+class XsltTxtReporter(AbstractXmlReporter):
+    """Public reporter that exports TXT via XSLT.
+
+    Currently this only does the summary, not the individual reports.
+    """
+
+    def __init__(self, reports: Reports, output_dir: str) -> None:
+        import lxml.etree as etree
+
+        super().__init__(reports, output_dir)
+
+        self.xslt_txt = etree.XSLT(etree.parse(self.memory_xml.xslt_txt_path))
+
+    def on_file(self, tree: MypyFile, type_map: Dict[Node, Type]) -> None:
+        pass
+
+    def on_finish(self) -> None:
+        last_xml = self.memory_xml.last_xml
+        out_path = os.path.join(self.output_dir, 'index.txt')
+        stats.ensure_dir_exists(os.path.dirname(out_path))
+        transformed_txt = str(self.xslt_txt(last_xml))
+        with open(out_path, 'wt', encoding='utf-8') as out_file:
+            out_file.write(transformed_txt)
+        print('Generated TXT report (via XSLT):', os.path.abspath(out_path))
+
+reporter_classes['xslt-txt'] = XsltTxtReporter
+
+reporter_classes['html'] = reporter_classes['xslt-html']
+reporter_classes['txt'] = reporter_classes['xslt-txt']
-Original file line number
+Diff line change
@@ Expand Up / @@ -6,3 +6,4 @@ __pycache__ @@
     /build
     /env
     docs/build/
+    /out/