"""Utilities parsing and analyzing Python code."""
import re
import tokenize
import warnings
from collections import OrderedDict
from importlib import import_module
from inspect import Signature
from io import StringIO
from os import path
from typing import IO, Any, Dict, List, Optional, Tuple
from zipfile import ZipFile
from sphinx.deprecation import RemovedInSphinx50Warning
from sphinx.errors import PycodeError
from sphinx.pycode.parser import Parser
class ModuleAnalyzer:
annotations: Dict[Tuple[str, str], str]
attr_docs: Dict[Tuple[str, str], List[str]]
finals: List[str]
overloads: Dict[str, List[Signature]]
tagorder: Dict[str, int]
tags: Dict[str, Tuple[str, int, int]]
# cache for analyzer objects -- caches both by module and file name
cache: Dict[Tuple[str, str], Any] = {}
@staticmethod
def get_module_source(modname: str) -> Tuple[Optional[str], Optional[str]]:
"""Try to find the source code for a module.
Returns ('filename', 'source'). One of it can be None if
no filename or source found
"""
try:
mod = import_module(modname)
except Exception as err:
raise PycodeError('error importing %r' % modname, err) from err
loader = getattr(mod, '__loader__', None)
filename = getattr(mod, '__file__', None)
if loader and getattr(loader, 'get_source', None):
# prefer Native loader, as it respects #coding directive
try:
source = loader.get_source(modname)
if source:
# no exception and not None - it must be module source
return filename, source
except ImportError:
pass # Try other "source-mining" methods
if filename is None and loader and getattr(loader, 'get_filename', None):
# have loader, but no filename
try:
filename = loader.get_filename(modname)
except ImportError as err:
raise PycodeError('error getting filename for %r' % modname, err) from err
if filename is None:
# all methods for getting filename failed, so raise...
raise PycodeError('no source found for module %r' % modname)
filename = path.normpath(path.abspath(filename))
if filename.lower().endswith(('.pyo', '.pyc')):
filename = filename[:-1]
if not path.isfile(filename) and path.isfile(filename + 'w'):
filename += 'w'
elif not filename.lower().endswith(('.py', '.pyw')):
raise PycodeError('source is not a .py file: %r' % filename)
elif ('.egg' + path.sep) in filename:
pat = '(?<=\\.egg)' + re.escape(path.sep)
eggpath, _ = re.split(pat, filename, 1)
if path.isfile(eggpath):
return filename, None
if not path.isfile(filename):
raise PycodeError('source file is not present: %r' % filename)
return filename, None
@classmethod
def for_string(cls, string: str, modname: str, srcname: str = '<string>'
) -> "ModuleAnalyzer":
return cls(StringIO(string), modname, srcname)
@classmethod
def for_file(cls, filename: str, modname: str) -> "ModuleAnalyzer":
if ('file', filename) in cls.cache:
return cls.cache['file', filename]
try:
with tokenize.open(filename) as f:
obj = cls(f, modname, filename)
cls.cache['file', filename] = obj
except Exception as err:
if '.egg' + path.sep in filename:
obj = cls.cache['file', filename] = cls.for_egg(filename, modname)
else:
raise PycodeError('error opening %r' % filename, err) from err
return obj
@classmethod
def for_egg(cls, filename: str, modname: str) -> "ModuleAnalyzer":
SEP = re.escape(path.sep)
eggpath, relpath = re.split('(?<=\\.egg)' + SEP, filename)
try:
with ZipFile(eggpath) as egg:
code = egg.read(relpath).decode()
return cls.for_string(code, modname, filename)
except Exception as exc:
raise PycodeError('error opening %r' % filename, exc) from exc
@classmethod
def for_module(cls, modname: str) -> "ModuleAnalyzer":
if ('module', modname) in cls.cache:
entry = cls.cache['module', modname]
if isinstance(entry, PycodeError):
raise entry
return entry
try:
filename, source = cls.get_module_source(modname)
if source is not None:
obj = cls.for_string(source, modname, filename or '<string>')
elif filename is not None:
obj = cls.for_file(filename, modname)
except PycodeError as err:
cls.cache['module', modname] = err
raise
cls.cache['module', modname] = obj
return obj
def __init__(self, source: IO, modname: str, srcname: str) -> None:
self.modname = modname # name of the module
self.srcname = srcname # name of the source file
# cache the source code as well
self.code = source.read()
self._analyzed = False
def parse(self) -> None:
"""Parse the source code."""
warnings.warn('ModuleAnalyzer.parse() is deprecated.',
RemovedInSphinx50Warning, stacklevel=2)
self.analyze()
def analyze(self) -> None:
"""Analyze the source code."""
if self._analyzed:
return None
try:
parser = Parser(self.code)
parser.parse()
self.attr_docs = OrderedDict()
for (scope, comment) in parser.comments.items():
if comment:
self.attr_docs[scope] = comment.splitlines() + ['']
else:
self.attr_docs[scope] = ['']
self.annotations = parser.annotations
self.finals = parser.finals
self.overloads = parser.overloads
self.tags = parser.definitions
self.tagorder = parser.deforders
self._analyzed = True
except Exception as exc:
raise PycodeError('parsing %r failed: %r' % (self.srcname, exc)) from exc
def find_attr_docs(self) -> Dict[Tuple[str, str], List[str]]:
"""Find class and module-level attributes and their documentation."""
self.analyze()
return self.attr_docs
def find_tags(self) -> Dict[str, Tuple[str, int, int]]:
"""Find class, function and method definitions and their location."""
self.analyze()
return self.tags