"""
pygments.cmdline
~~~~~~~~~~~~~~~~
Command line interface.
:copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import os
import sys
import shutil
import argparse
from textwrap import dedent
from pygments import __version__, highlight
from pygments.util import ClassNotFound, OptionError, docstring_headline, \
guess_decode, guess_decode_from_terminal, terminal_encoding, \
UnclosingTextIOWrapper
from pygments.lexers import get_all_lexers, get_lexer_by_name, guess_lexer, \
load_lexer_from_file, get_lexer_for_filename, find_lexer_class_for_filename
from pygments.lexers.special import TextLexer
from pygments.formatters.latex import LatexEmbeddedLexer, LatexFormatter
from pygments.formatters import get_all_formatters, get_formatter_by_name, \
load_formatter_from_file, get_formatter_for_filename, find_formatter_class
from pygments.formatters.terminal import TerminalFormatter
from pygments.formatters.terminal256 import Terminal256Formatter, TerminalTrueColorFormatter
from pygments.filters import get_all_filters, find_filter_class
from pygments.styles import get_all_styles, get_style_by_name
def _parse_options(o_strs):
opts = {}
if not o_strs:
return opts
for o_str in o_strs:
if not o_str.strip():
continue
o_args = o_str.split(',')
for o_arg in o_args:
o_arg = o_arg.strip()
try:
o_key, o_val = o_arg.split('=', 1)
o_key = o_key.strip()
o_val = o_val.strip()
except ValueError:
opts[o_arg] = True
else:
opts[o_key] = o_val
return opts
def _parse_filters(f_strs):
filters = []
if not f_strs:
return filters
for f_str in f_strs:
if ':' in f_str:
fname, fopts = f_str.split(':', 1)
filters.append((fname, _parse_options([fopts])))
else:
filters.append((f_str, {}))
return filters
def _print_help(what, name):
try:
if what == 'lexer':
cls = get_lexer_by_name(name)
print("Help on the %s lexer:" % cls.name)
print(dedent(cls.__doc__))
elif what == 'formatter':
cls = find_formatter_class(name)
print("Help on the %s formatter:" % cls.name)
print(dedent(cls.__doc__))
elif what == 'filter':
cls = find_filter_class(name)
print("Help on the %s filter:" % name)
print(dedent(cls.__doc__))
return 0
except (AttributeError, ValueError):
print("%s not found!" % what, file=sys.stderr)
return 1
def _print_list(what):
if what == 'lexer':
print()
print("Lexers:")
print("~~~~~~~")
info = []
for fullname, names, exts, _ in get_all_lexers():
tup = (', '.join(names)+':', fullname,
exts and '(filenames ' + ', '.join(exts) + ')' or '')
info.append(tup)
info.sort()
for i in info:
print(('* %s\n %s %s') % i)
elif what == 'formatter':
print()
print("Formatters:")
print("~~~~~~~~~~~")
info = []
for cls in get_all_formatters():
doc = docstring_headline(cls)
tup = (', '.join(cls.aliases) + ':', doc, cls.filenames and
'(filenames ' + ', '.join(cls.filenames) + ')' or '')
info.append(tup)
info.sort()
for i in info:
print(('* %s\n %s %s') % i)
elif what == 'filter':
print()
print("Filters:")
print("~~~~~~~~")
for name in get_all_filters():
cls = find_filter_class(name)
print("* " + name + ':')
print(" %s" % docstring_headline(cls))
elif what == 'style':
print()
print("Styles:")
print("~~~~~~~")
for name in get_all_styles():
cls = get_style_by_name(name)
print("* " + name + ':')
print(" %s" % docstring_headline(cls))
def _print_list_as_json(requested_items):
import json
result = {}
if 'lexer' in requested_items:
info = {}
for fullname, names, filenames, mimetypes in get_all_lexers():
info[fullname] = {
'aliases': names,
'filenames': filenames,
'mimetypes': mimetypes
}
result['lexers'] = info
if 'formatter' in requested_items:
info = {}
for cls in get_all_formatters():
doc = docstring_headline(cls)
info[cls.name] = {
'aliases': cls.aliases,
'filenames': cls.filenames,
'doc': doc
}
result['formatters'] = info
if 'filter' in requested_items:
info = {}
for name in get_all_filters():
cls = find_filter_class(name)
info[name] = {
'doc': docstring_headline(cls)
}
result['filters'] = info
if 'style' in requested_items:
info = {}
for name in get_all_styles():
cls = get_style_by_name(name)
info[name] = {
'doc': docstring_headline(cls)
}
result['styles'] = info
json.dump(result, sys.stdout)
def main_inner(parser, argns):
if argns.help:
parser.print_help()
return 0
if argns.V:
print('Pygments version %s, (c) 2006-2022 by Georg Brandl, Matthäus '
'Chajdas and contributors.' % __version__)
return 0
def is_only_option(opt):
return not any(v for (k, v) in vars(argns).items() if k != opt)
# handle ``pygmentize -L``
if argns.L is not None:
arg_set = set()
for k, v in vars(argns).items():
if v:
arg_set.add(k)
arg_set.discard('L')
arg_set.discard('json')
if arg_set:
parser.print_help(sys.stderr)
return 2
# print version
if not argns.json:
main(['', '-V'])
allowed_types = {'lexer', 'formatter', 'filter', 'style'}
largs = [arg.rstrip('s') for arg in argns.L]
if any(arg not in allowed_types for arg in largs):
parser.print_help(sys.stderr)
return 0
if not largs:
largs = allowed_types
if not argns.json:
for arg in largs:
_print_list(arg)
else:
_print_list_as_json(largs)
return 0
# handle ``pygmentize -H``
if argns.H:
if not is_only_option('H'):
parser.print_help(sys.stderr)
return 2
what, name = argns.H
if what not in ('lexer', 'formatter', 'filter'):
parser.print_help(sys.stderr)
return 2
return _print_help(what, name)
# parse -O options
parsed_opts = _parse_options(argns.O or [])
# parse -P options
for p_opt in argns.P or []:
try:
name, value = p_opt.split('=', 1)
except ValueError:
parsed_opts[p_opt] = True
else:
parsed_opts[name] = value
# encodings
inencoding = parsed_opts.get('inencoding', parsed_opts.get('encoding'))
outencoding = parsed_opts.get('outencoding', parsed_opts.get('encoding'))
# handle ``pygmentize -N``
if argns.N:
lexer = find_lexer_class_for_filename(argns.N)
if lexer is None:
lexer = TextLexer
print(lexer.aliases[0])
return 0
# handle ``pygmentize -C``
if argns.C:
inp = sys.stdin.buffer.read()
try:
lexer = guess_lexer(inp, inencoding=inencoding)
except ClassNotFound:
lexer = TextLexer
print(lexer.aliases[0])
return 0
# handle ``pygmentize -S``
S_opt = argns.S
a_opt = argns.a
if S_opt is not None:
f_opt = argns.f
if not f_opt:
parser.print_help(sys.stderr)
return 2
if argns.l or argns.INPUTFILE:
parser.print_help(sys.stderr)
return 2
try:
parsed_opts['style'] = S_opt
fmter = get_formatter_by_name(f_opt, **parsed_opts)
except ClassNotFound as err:
print(err, file=sys.stderr)
return 1
print(fmter.get_style_defs(a_opt or ''))
return 0
# if no -S is given, -a is not allowed
if argns.a is not None:
parser.print_help(sys.stderr)
return 2
# parse -F options
F_opts = _parse_filters(argns.F or [])
# -x: allow custom (eXternal) lexers and formatters
allow_custom_lexer_formatter = bool(argns.x)
# select lexer
lexer = None
# given by name?
lexername = argns.l
if lexername:
# custom lexer, located relative to user's cwd
if allow_custom_lexer_formatter and '.py' in lexername:
try:
filename = None
name = None
if ':' in lexername:
filename, name = lexername.rsplit(':', 1)
if '.py' in name:
# This can happen on Windows: If the lexername is
# C:\lexer.py -- return to normal load path in that case
name = None
if filename and name:
lexer = load_lexer_from_file(filename, name,
**parsed_opts)
else:
lexer = load_lexer_from_file(lexername, **parsed_opts)
except ClassNotFound as err:
print('Error:', err, file=sys.stderr)
return 1
else:
try:
lexer = get_lexer_by_name(lexername, **parsed_opts)
except (OptionError, ClassNotFound) as err:
print('Error:', err, file=sys.stderr)
return 1
# read input code
code = None
if argns.INPUTFILE:
if argns.s:
print('Error: -s option not usable when input file specified',
file=sys.stderr)
return 2
infn = argns.INPUTFILE
try:
with open(infn, 'rb') as infp:
code = infp.read()
except Exception as err:
print('Error: cannot read infile:', err, file=sys.stderr)
return 1
if not inencoding:
code, inencoding = guess_decode(code)
# do we have to guess the lexer?
if not lexer:
try:
lexer = get_lexer_for_filename(infn, code, **parsed_opts)
except ClassNotFound as err:
if argns.g:
try:
lexer = guess_lexer(code, **parsed_opts)
except ClassNotFound:
lexer = TextLexer(**parsed_opts)
else:
print('Error:', err, file=sys.stderr)
return 1
except OptionError as err:
print('Error:', err, file=sys.stderr)
return 1
elif not argns.s: # treat stdin as full file (-s support is later)
# read code from terminal, always in binary mode since we want to
# decode ourselves and be tolerant with it
code = sys.stdin.buffer.read() # use .buffer to get a binary stream
if not inencoding:
code, inencoding = guess_decode_from_terminal(code, sys.stdin)
# else the lexer will do the decoding
if not lexer:
try:
lexer = guess_lexer(code, **parsed_opts)
except ClassNotFound:
lexer = TextLexer(**parsed_opts)
else: # -s option needs a lexer with -l
if not lexer:
print('Error: when using -s a lexer has to be selected with -l',
file=sys.stderr)
return 2
# process filters
for fname, fopts in F_opts:
try:
lexer.add_filter(fname, **fopts)
except ClassNotFound as err:
print('Error:', err, file=sys.stderr)
return 1
# select formatter
outfn = argns.o
fmter = argns.f
if fmter:
# custom formatter, located relative to user's cwd
if allow_custom_lexer_formatter and '.py' in fmter:
try:
filename = None
name = None
if ':' in fmter:
# Same logic as above for custom lexer
filename, name = fmter.rsplit(':', 1)
if '.py' in name:
name = None
if filename and name:
fmter = load_formatter_from_file(filename, name,
**parsed_opts)
else:
fmter = load_formatter_from_file(fmter, **parsed_opts)
except ClassNotFound as err:
print('Error:', err, file=sys.stderr)
return 1
else:
try:
fmter = get_formatter_by_name(fmter, **parsed_opts)
except (OptionError, ClassNotFound) as err:
print('Error:', err, file=sys.stderr)
return 1
if outfn:
if not fmter:
try:
fmter = get_formatter_for_filename(outfn, **parsed_opts)
except (OptionError, ClassNotFound) as err:
print('Error:', err, file=sys.stderr)
return 1
try:
outfile = open(outfn, 'wb')
except Exception as err:
print('Error: cannot open outfile:', err, file=sys.stderr)
return 1
else:
if not fmter:
if os.environ.get('COLORTERM','') in ('truecolor', '24bit'):
fmter = TerminalTrueColorFormatter(**parsed_opts)
elif '256' in os.environ.get('TERM', ''):
fmter = Terminal256Formatter(**parsed_opts)
else:
fmter = TerminalFormatter(**parsed_opts)
outfile = sys.stdout.buffer
# determine output encoding if not explicitly selected
if not outencoding:
if outfn:
# output file? use lexer encoding for now (can still be None)
fmter.encoding = inencoding
else:
# else use terminal encoding
fmter.encoding = terminal_encoding(sys.stdout)
# provide coloring under Windows, if possible
if not outfn and sys.platform in ('win32', 'cygwin') and \
fmter.name in ('Terminal', 'Terminal256'): # pragma: no cover
# unfortunately colorama doesn't support binary streams on Py3
outfile = UnclosingTextIOWrapper(outfile, encoding=fmter.encoding)
fmter.encoding = None
try:
import colorama.initialise
except ImportError:
pass
else:
outfile = colorama.initialise.wrap_stream(
outfile, convert=None, strip=None, autoreset=False, wrap=True)
# When using the LaTeX formatter and the option `escapeinside` is
# specified, we need a special lexer which collects escaped text
# before running the chosen language lexer.
escapeinside = parsed_opts.get('escapeinside', '')
if len(escapeinside) == 2 and isinstance(fmter, LatexFormatter):
left = escapeinside[0]
right = escapeinside[1]
lexer = LatexEmbeddedLexer(left, right, lexer)
# ... and do it!
if not argns.s:
# process whole input as per normal...
try:
highlight(code, lexer, fmter, outfile)
finally:
if outfn:
outfile.close()
return 0
else:
# line by line processing of stdin (eg: for 'tail -f')...
try:
while 1:
line = sys.stdin.buffer.readline()
if not line:
break
if not inencoding:
line = guess_decode_from_terminal(line, sys.stdin)[0]
highlight(line, lexer, fmter, outfile)
if hasattr(outfile, 'flush'):
outfile.flush()
return 0
except KeyboardInterrupt: # pragma: no cover
return 0
finally:
if outfn:
outfile.close()
class HelpFormatter(argparse.HelpFormatter):
def __init__(self, prog, indent_increment=2, max_help_position=16, width=None):
if width is None:
try:
width = shutil.get_terminal_size().columns - 2
except Exception:
pass
argparse.HelpFormatter.__init__(self, prog, indent_increment,
max_help_position, width)
def main(args=sys.argv):
"""
Main command line entry point.
"""
desc = "Highlight an input file and write the result to an output file."
parser = argparse.ArgumentParser(description=desc, add_help=False,
formatter_class=HelpFormatter)
operation = parser.add_argument_group('Main operation')
lexersel = operation.add_mutually_exclusive_group()
lexersel.add_argument(
'-l', metavar='LEXER',
help='Specify the lexer to use. (Query names with -L.) If not '
'given and -g is not present, the lexer is guessed from the filename.')
lexersel.add_argument(
'-g', action='store_true',
help='Guess the lexer from the file contents, or pass through '
'as plain text if nothing can be guessed.')
operation.add_argument(
'-F', metavar='FILTER[:options]', action='append',
help='Add a filter to the token stream. (Query names with -L.) '
'Filter options are given after a colon if necessary.')
operation.add_argument(
'-f', metavar='FORMATTER',
help='Specify the formatter to use. (Query names with -L.) '
'If not given, the formatter is guessed from the output filename, '
'and defaults to the terminal formatter if the output is to the '
'terminal or an unknown file extension.')
operation.add_argument(
'-O', metavar='OPTION=value[,OPTION=value,...]', action='append',
help='Give options to the lexer and formatter as a comma-separated '
'list of key-value pairs. '
'Example: `-O bg=light,python=cool`.')
operation.add_argument(
'-P', metavar='OPTION=value', action='append',
help='Give a single option to the lexer and formatter - with this '
'you can pass options whose value contains commas and equal signs. '
'Example: `-P "heading=Pygments, the Python highlighter"`.')
operation.add_argument(
'-o', metavar='OUTPUTFILE',
help='Where to write the output. Defaults to standard output.')
operation.add_argument(
'INPUTFILE', nargs='?',
help='Where to read the input. Defaults to standard input.')
flags = parser.add_argument_group('Operation flags')
flags.add_argument(
'-v', action='store_true',
help='Print a detailed traceback on unhandled exceptions, which '
'is useful for debugging and bug reports.')
flags.add_argument(
'-s', action='store_true',
help='Process lines one at a time until EOF, rather than waiting to '
'process the entire file. This only works for stdin, only for lexers '
'with no line-spanning constructs, and is intended for streaming '
'input such as you get from `tail -f`. '
'Example usage: `tail -f sql.log | pygmentize -s -l sql`.')
flags.add_argument(
'-x', action='store_true',
help='Allow custom lexers and formatters to be loaded from a .py file '
'relative to the current working directory. For example, '
'`-l ./customlexer.py -x`. By default, this option expects a file '
'with a class named CustomLexer or CustomFormatter; you can also '
'specify your own class name with a colon (`-l ./lexer.py:MyLexer`). '
'Users should be very careful not to use this option with untrusted '
'files, because it will import and run them.')
flags.add_argument('--json', help='Output as JSON. This can '
'be only used in conjunction with -L.',
default=False,
action='store_true')
special_modes_group = parser.add_argument_group(
'Special modes - do not do any highlighting')
special_modes = special_modes_group.add_mutually_exclusive_group()
special_modes.add_argument(
'-S', metavar='STYLE -f formatter',
help='Print style definitions for STYLE for a formatter '
'given with -f. The argument given by -a is formatter '
'dependent.')
special_modes.add_argument(
'-L', nargs='*', metavar='WHAT',
help='List lexers, formatters, styles or filters -- '
'give additional arguments for the thing(s) you want to list '
'(e.g. "styles"), or omit them to list everything.')
special_modes.add_argument(
'-N', metavar='FILENAME',
help='Guess and print out a lexer name based solely on the given '
'filename. Does not take input or highlight anything. If no specific '
'lexer can be determined, "text" is printed.')
special_modes.add_argument(
'-C', action='store_true',
help='Like -N, but print out a lexer name based solely on '
'a given content from standard input.')
special_modes.add_argument(
'-H', action='store', nargs=2, metavar=('NAME', 'TYPE'),
help='Print detailed help for the object <name> of type <type>, '
'where <type> is one of "lexer", "formatter" or "filter".')
special_modes.add_argument(
'-V', action='store_true',
help='Print the package version.')
special_modes.add_argument(
'-h', '--help', action='store_true',
help='Print this help.')
special_modes_group.add_argument(
'-a', metavar='ARG',
help='Formatter-specific additional argument for the -S (print '
'style sheet) mode.')
argns = parser.parse_args(args[1:])
try:
return main_inner(parser, argns)
except BrokenPipeError:
# someone closed our stdout, e.g. by quitting a pager.
return 0
except Exception:
if argns.v:
print(file=sys.stderr)
print('*' * 65, file=sys.stderr)
print('An unhandled exception occurred while highlighting.',
file=sys.stderr)
print('Please report the whole traceback to the issue tracker at',
file=sys.stderr)
print('<https://github.com/pygments/pygments/issues>.',
file=sys.stderr)
print('*' * 65, file=sys.stderr)
print(file=sys.stderr)
raise
import traceback
info = traceback.format_exception(*sys.exc_info())
msg = info[-1].strip()
if len(info) >= 3:
# extract relevant file and position info
msg += '\n (f%s)' % info[-2].split('\n')[0].strip()[1:]
print(file=sys.stderr)
print('*** Error while highlighting:', file=sys.stderr)
print(msg, file=sys.stderr)
print('*** If this is a bug you want to report, please rerun with -v.',
file=sys.stderr)
return 1