mirror of
https://github.com/NixOS/nixpkgs.git
synced 2025-06-09 17:46:29 +09:00

this adds support for structural includes to nixos-render-docs. structural includes provide a way to denote the (sub)structure of the nixos manual in the markdown source files, very similar to how we used literal docbook blocks before, and are processed by nixos-render-docs without involvement of xml tooling. this will ultimately allow us to emit the nixos manual in other formats as well, e.g. html, without going through docbook at all. alternatives to this source layout were also considered: a parallel structure using e.g. toml files that describe the document tree and links to each part is possible, but much more complicated to implement than the solution chosen here and makes it harder to follow which files have what substructure. it also makes it much harder to include a substructure in the middle of a file. much the same goes for command-line arguments to the converter, only that command-lined arguments are even harder to specify correctly and cannot be reasonably pulled together from many places without involving another layer of tooling. cli arguments would also mean that the manual structure would be fixed in default.nix, which is also not ideal.
227 lines
11 KiB
Python
227 lines
11 KiB
Python
import argparse
|
|
import json
|
|
|
|
from abc import abstractmethod
|
|
from collections.abc import Mapping, MutableMapping, Sequence
|
|
from pathlib import Path
|
|
from typing import Any, cast, NamedTuple, Optional, Union
|
|
from xml.sax.saxutils import escape, quoteattr
|
|
|
|
import markdown_it
|
|
from markdown_it.token import Token
|
|
from markdown_it.utils import OptionsDict
|
|
|
|
from . import options
|
|
from .docbook import DocBookRenderer, Heading
|
|
from .md import Converter
|
|
|
|
class ManualDocBookRenderer(DocBookRenderer):
|
|
_toplevel_tag: str
|
|
|
|
def __init__(self, toplevel_tag: str, manpage_urls: Mapping[str, str],
|
|
parser: Optional[markdown_it.MarkdownIt] = None):
|
|
super().__init__(manpage_urls, parser)
|
|
self._toplevel_tag = toplevel_tag
|
|
self.rules |= {
|
|
'included_sections': lambda *args: self._included_thing("section", *args),
|
|
'included_chapters': lambda *args: self._included_thing("chapter", *args),
|
|
'included_preface': lambda *args: self._included_thing("preface", *args),
|
|
'included_parts': lambda *args: self._included_thing("part", *args),
|
|
'included_appendix': lambda *args: self._included_thing("appendix", *args),
|
|
'included_options': self.included_options,
|
|
}
|
|
|
|
def render(self, tokens: Sequence[Token], options: OptionsDict,
|
|
env: MutableMapping[str, Any]) -> str:
|
|
wanted = { 'h1': 'title' }
|
|
wanted |= { 'h2': 'subtitle' } if self._toplevel_tag == 'book' else {}
|
|
for (i, (tag, kind)) in enumerate(wanted.items()):
|
|
if len(tokens) < 3 * (i + 1):
|
|
raise RuntimeError(f"missing {kind} ({tag}) heading")
|
|
token = tokens[3 * i]
|
|
if token.type != 'heading_open' or token.tag != tag:
|
|
assert token.map
|
|
raise RuntimeError(f"expected {kind} ({tag}) heading in line {token.map[0] + 1}", token)
|
|
for t in tokens[3 * len(wanted):]:
|
|
if t.type != 'heading_open' or (info := wanted.get(t.tag)) is None:
|
|
continue
|
|
assert t.map
|
|
raise RuntimeError(
|
|
f"only one {info[0]} heading ({t.markup} [text...]) allowed per "
|
|
f"{self._toplevel_tag}, but found a second in lines [{t.map[0] + 1}..{t.map[1]}]. "
|
|
"please remove all such headings except the first or demote the subsequent headings.",
|
|
t)
|
|
|
|
# books get special handling because they have *two* title tags. doing this with
|
|
# generic code is more complicated than it's worth. the checks above have verified
|
|
# that both titles actually exist.
|
|
if self._toplevel_tag == 'book':
|
|
assert tokens[1].children
|
|
assert tokens[4].children
|
|
if (maybe_id := cast(str, tokens[0].attrs.get('id', ""))):
|
|
maybe_id = "xml:id=" + quoteattr(maybe_id)
|
|
return (f'<book xmlns="http://docbook.org/ns/docbook"'
|
|
f' xmlns:xlink="http://www.w3.org/1999/xlink"'
|
|
f' {maybe_id} version="5.0">'
|
|
f' <title>{self.renderInline(tokens[1].children, options, env)}</title>'
|
|
f' <subtitle>{self.renderInline(tokens[4].children, options, env)}</subtitle>'
|
|
f' {super().render(tokens[6:], options, env)}'
|
|
f'</book>')
|
|
|
|
return super().render(tokens, options, env)
|
|
|
|
def _heading_tag(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict,
|
|
env: MutableMapping[str, Any]) -> tuple[str, dict[str, str]]:
|
|
(tag, attrs) = super()._heading_tag(token, tokens, i, options, env)
|
|
# render() has already verified that we don't have supernumerary headings and since the
|
|
# book tag is handled specially we can leave the check this simple
|
|
if token.tag != 'h1':
|
|
return (tag, attrs)
|
|
return (self._toplevel_tag, attrs | {
|
|
'xmlns': "http://docbook.org/ns/docbook",
|
|
'xmlns:xlink': "http://www.w3.org/1999/xlink",
|
|
})
|
|
|
|
def _included_thing(self, tag: str, token: Token, tokens: Sequence[Token], i: int,
|
|
options: OptionsDict, env: MutableMapping[str, Any]) -> str:
|
|
result = []
|
|
# close existing partintro. the generic render doesn't really need this because
|
|
# it doesn't have a concept of structure in the way the manual does.
|
|
if self._headings and self._headings[-1] == Heading('part', 1):
|
|
result.append("</partintro>")
|
|
self._headings[-1] = self._headings[-1]._replace(partintro_closed=True)
|
|
# must nest properly for structural includes. this requires saving at least
|
|
# the headings stack, but creating new renderers is cheap and much easier.
|
|
r = ManualDocBookRenderer(tag, self._manpage_urls, None)
|
|
for (included, path) in token.meta['included']:
|
|
try:
|
|
result.append(r.render(included, options, env))
|
|
except Exception as e:
|
|
raise RuntimeError(f"rendering {path}") from e
|
|
return "".join(result)
|
|
def included_options(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict,
|
|
env: MutableMapping[str, Any]) -> str:
|
|
return cast(str, token.meta['rendered-options'])
|
|
|
|
# TODO minimize docbook diffs with existing conversions. remove soon.
|
|
def paragraph_open(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict,
|
|
env: MutableMapping[str, Any]) -> str:
|
|
return super().paragraph_open(token, tokens, i, options, env) + "\n "
|
|
def paragraph_close(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict,
|
|
env: MutableMapping[str, Any]) -> str:
|
|
return "\n" + super().paragraph_close(token, tokens, i, options, env)
|
|
def code_block(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict,
|
|
env: MutableMapping[str, Any]) -> str:
|
|
return f"<programlisting>\n{escape(token.content)}</programlisting>"
|
|
def fence(self, token: Token, tokens: Sequence[Token], i: int, options: OptionsDict,
|
|
env: MutableMapping[str, Any]) -> str:
|
|
info = f" language={quoteattr(token.info)}" if token.info != "" else ""
|
|
return f"<programlisting{info}>\n{escape(token.content)}</programlisting>"
|
|
|
|
class DocBookConverter(Converter):
|
|
def __renderer__(self, manpage_urls: Mapping[str, str],
|
|
parser: Optional[markdown_it.MarkdownIt]) -> ManualDocBookRenderer:
|
|
return ManualDocBookRenderer('book', manpage_urls, parser)
|
|
|
|
_base_paths: list[Path]
|
|
_revision: str
|
|
|
|
def __init__(self, manpage_urls: Mapping[str, str], revision: str):
|
|
super().__init__(manpage_urls)
|
|
self._revision = revision
|
|
|
|
def convert(self, file: Path) -> str:
|
|
self._base_paths = [ file ]
|
|
try:
|
|
with open(file, 'r') as f:
|
|
return self._render(f.read())
|
|
except Exception as e:
|
|
raise RuntimeError(f"failed to render manual {file}") from e
|
|
|
|
def _parse(self, src: str, env: Optional[MutableMapping[str, Any]] = None) -> list[Token]:
|
|
tokens = super()._parse(src, env)
|
|
for token in tokens:
|
|
if token.type != "fence" or not token.info.startswith("{=include=} "):
|
|
continue
|
|
typ = token.info[12:].strip()
|
|
if typ == 'options':
|
|
token.type = 'included_options'
|
|
self._parse_options(token)
|
|
elif typ in [ 'sections', 'chapters', 'preface', 'parts', 'appendix' ]:
|
|
token.type = 'included_' + typ
|
|
self._parse_included_blocks(token, env)
|
|
else:
|
|
raise RuntimeError(f"unsupported structural include type '{typ}'")
|
|
return tokens
|
|
|
|
def _parse_included_blocks(self, token: Token, env: Optional[MutableMapping[str, Any]]) -> None:
|
|
assert token.map
|
|
included = token.meta['included'] = []
|
|
for (lnum, line) in enumerate(token.content.splitlines(), token.map[0] + 2):
|
|
line = line.strip()
|
|
path = self._base_paths[-1].parent / line
|
|
if path in self._base_paths:
|
|
raise RuntimeError(f"circular include found in line {lnum}")
|
|
try:
|
|
self._base_paths.append(path)
|
|
with open(path, 'r') as f:
|
|
tokens = self._parse(f.read(), env)
|
|
included.append((tokens, path))
|
|
self._base_paths.pop()
|
|
except Exception as e:
|
|
raise RuntimeError(f"processing included file {path} from line {lnum}") from e
|
|
|
|
def _parse_options(self, token: Token) -> None:
|
|
assert token.map
|
|
|
|
items = {}
|
|
for (lnum, line) in enumerate(token.content.splitlines(), token.map[0] + 2):
|
|
if len(args := line.split(":", 1)) != 2:
|
|
raise RuntimeError(f"options directive with no argument in line {lnum}")
|
|
(k, v) = (args[0].strip(), args[1].strip())
|
|
if k in items:
|
|
raise RuntimeError(f"duplicate options directive {k} in line {lnum}")
|
|
items[k] = v
|
|
try:
|
|
id_prefix = items.pop('id-prefix')
|
|
varlist_id = items.pop('list-id')
|
|
source = items.pop('source')
|
|
except KeyError as e:
|
|
raise RuntimeError(f"options directive {e} missing in block at line {token.map[0] + 1}")
|
|
if items.keys():
|
|
raise RuntimeError(
|
|
f"unsupported options directives in block at line {token.map[0] + 1}",
|
|
" ".join(items.keys()))
|
|
|
|
try:
|
|
conv = options.DocBookConverter(
|
|
self._manpage_urls, self._revision, False, 'fragment', varlist_id, id_prefix)
|
|
with open(self._base_paths[-1].parent / source, 'r') as f:
|
|
conv.add_options(json.load(f))
|
|
token.meta['rendered-options'] = conv.finalize(fragment=True)
|
|
except Exception as e:
|
|
raise RuntimeError(f"processing options block in line {token.map[0] + 1}") from e
|
|
|
|
|
|
|
|
def _build_cli_db(p: argparse.ArgumentParser) -> None:
|
|
p.add_argument('--manpage-urls', required=True)
|
|
p.add_argument('--revision', required=True)
|
|
p.add_argument('infile', type=Path)
|
|
p.add_argument('outfile', type=Path)
|
|
|
|
def _run_cli_db(args: argparse.Namespace) -> None:
|
|
with open(args.manpage_urls, 'r') as manpage_urls:
|
|
md = DocBookConverter(json.load(manpage_urls), args.revision)
|
|
converted = md.convert(args.infile)
|
|
args.outfile.write_text(converted)
|
|
|
|
def build_cli(p: argparse.ArgumentParser) -> None:
|
|
formats = p.add_subparsers(dest='format', required=True)
|
|
_build_cli_db(formats.add_parser('docbook'))
|
|
|
|
def run_cli(args: argparse.Namespace) -> None:
|
|
if args.format == 'docbook':
|
|
_run_cli_db(args)
|
|
else:
|
|
raise RuntimeError('format not hooked up', args)
|