Source code for urwidgets.text_embed

from __future__ import annotations

__all__ = (
    "parse_text",
    "TextEmbed",
    # Type Aliases
    "Markup",
    "StringMarkup",
    "ListMarkup",
    "TupleMarkup",
    "NormalTupleMarkup",
    "DisplayAttribute",
    "WidgetTupleMarkup",
    "WidgetListMarkup",
)

import re
from functools import lru_cache
from itertools import islice
from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Union

import urwid

# NOTE: Any new "private" attribute of any subclass of an urwid class should be
# prepended with "_uw" to avoid clashes with names used by urwid itself.

# I really hope these are correct :D
Markup = Union["StringMarkup", "ListMarkup", "TupleMarkup"]
StringMarkup = Union[str, bytes]
ListMarkup = List["Markup"]
TupleMarkup = Union["NormalTupleMarkup", "WidgetTupleMarkup"]
NormalTupleMarkup = Tuple["DisplayAttribute", Union["StringMarkup", "ListMarkup"]]
DisplayAttribute = Union[None, str, bytes, "urwid.AttrSpec"]
WidgetTupleMarkup = Tuple[int, Union["urwid.Widget", "WidgetListMarkup"]]
WidgetListMarkup = List[Union["urwid.Widget", "Markup", "WidgetListMarkup"]]


[docs]class TextEmbed(urwid.Text): """A text widget within which other widgets may be embedded. This is an extension of the :py:class:`urwid.Text` widget. Every feature and interface of :py:class:`~urwid.Text` is supported and works essentially the same, **except for the "ellipsis" wrap mode** which is currently not implemented. Text markup format is essentially the same, except when embedding widgets. **Embedding Widgets** A widget is embedded by specifying it as a markup element with an **integer display attribute**, where the display attribute is the number of screen columns the widget should occupy. Examples: >>> # w1 spans 2 columns >>> TextEmbed(["This widget (", (2, w1), ") spans two columns"]) >>> # w1 and w2 span 2 columns >>> TextEmbed(["These widgets (", (2, [w1, w2]), ") span two columns each"]) >>> # w1 and w2 span 2 columns, the text in-between has no display attribute >>> TextEmbed([(2, [w1, (None, "and"), w2]), " span two columns each"]) >>> # w1 and w2 span 2 columns, text in the middle is red >>> TextEmbed((2, [w1, ("red", " i am red "), w2])) >>> # w1 and w3 span 2 columns, w2 spans 5 columns >>> TextEmbed((2, [w1, (5, w2), w3])) Visible embedded widgets are always rendered (may be cached) whenever the ``TextEmbed`` widget is re-rendered (i.e an uncached render). Hence, this allows for dynamic parts of text without updating the entire widget. Going a step further, embeddded widgets can be swapped by using ``urwid.WidgetPlaceholder`` but their widths will remain the same. NOTE: - Every embedded widget must be a box widget and is always rendered with size ``(width, 1)``. :py:class:`urwid.Filler` can be used to wrap flow widgets. - Each embedded widgets are treated as a single WORD (i.e containing no whitespace). Therefore, consecutive embedded widgets are also treated as a single WORD. This affects the "space" wrap mode. - After updating or swapping an embedded widget, this widget's canvases should be invalidated to ensure it re-renders. Raises: TypeError: A widget markup element has a non-integer display attribute. ValueError: A widget doesn't support box sizing. ValueError: A widget has a non-positive width (display attribute). """ # In case a placeholder gets wrapped or clipped, this pattern will only match the # head of a placeholder not tails on subsequent lines _uw_placeholder_pattern = re.compile("(\0\1*)") # A tail must occur at the beginning of a line but may be preceded by padding # spaces when `align != "left"` and `wrap != "clip"` _uw_tail_pattern = re.compile("^( *)(\1+)") attrib = property( lambda self: super().attrib, doc="""Run-length encoding of display attributes of the widget's content. :type: List[Tuple[Union[DisplayAttribute, int], int]] See the description of the second item in the return value of :py:meth:`get_text`. """, ) embedded = property( lambda self: [(widget, width) for widget, width, _ in self._uw_embedded], doc="""Embedded widgets. Returns: A list of all embedded widgets and their respective widths, in the same order in which they were given in the text markup. :type: List[Tuple[urwid.Widget, int]] """, ) text = property( lambda self: super().text, doc="""Raw text content of the widget. :type: str See the description of the first item in the return value of :py:meth:`get_text`. """, )
[docs] def get_text( self, ) -> Tuple[str, List[Tuple[Union[DisplayAttribute, int], int]]]: """Returns a representation of the widget's content. Returns: A tuple ``(text, attrib)``, where - *text* is the raw text content of the widget. Each embedded widget is represented by a substring starting with a ``"\\x00"`` character followed by zero or more ``"\\x01"`` characters, with length equal to the widget's width. - *attrib* is the run-length encoding of display attributes. Any entry containing a display attribute of the ``int`` type (e.g ``(1, 4)``) denotes an embedded widget, where the display attirbute is the index of the widget within the :py:attr:`embedded` widgets list and the run length is the width of the widget. """ return super().get_text()
def render( self, size: Tuple[int,], focus: bool = False ) -> Union[urwid.TextCanvas, urwid.CompositeCanvas]: text_canv = fix_text_canvas_attr(super().render(size, focus)) embedded = self._uw_embedded if not embedded: return text_canv def append_text_lines(): nonlocal top if n_lines: partial_canv = urwid.CompositeCanvas(text_canv) partial_canv.trim(top, n_lines) canvases.append((partial_canv, None, focus)) top += n_lines text = text_canv.text canvases = [] placeholder_pattern = __class__._uw_placeholder_pattern tail = None top = 0 n_lines = 0 clipped = self.wrap == "clip" if clipped: if self.align != "left": translation = self.get_line_translation(size[0]) text_canv_content = tuple(text_canv.content()) else: embedded_iter = iter(embedded) for row_index, line in enumerate(text): line = line.decode() if clipped: if line.startswith("\1"): # align != "left" widget_index = text_canv_content[row_index][0][0] widget, width, start_pos = embedded[widget_index] tail_canv = widget.render((width, 1), focus) left_trim = -translation[row_index][0][0] # the placeholder is clipped => left_trim > start_pos tail_width = width - (left_trim - start_pos) tail = (tail_width, tail_canv) embedded_iter = islice(embedded, widget_index + 1, None) else: tail = None if tail: if clipped: append_text_lines() line_canv = urwid.CompositeCanvas(text_canv) line_canv.trim(top, 1) partial_canv, tail = self._uw_embed( line, line_canv, embedded_iter, focus, tail ) canvases.append((partial_canv, None, focus)) n_lines = 0 top += 1 elif placeholder_pattern.search(line): append_text_lines() if clipped: for attr, *_ in text_canv_content[row_index]: if isinstance(attr, int): break embedded_iter = islice(embedded, attr, None) line_canv = urwid.CompositeCanvas(text_canv) line_canv.trim(top, 1) partial_canv, tail = self._uw_embed( line, line_canv, embedded_iter, focus ) canvases.append((partial_canv, None, focus)) n_lines = 0 top += 1 else: n_lines += 1 append_text_lines() return urwid.CanvasCombine(canvases)
[docs] def set_text(self, markup: Markup) -> None: """Sets the widget's content. Also supports widget markup elements. See the class description. """ markup, self._uw_embedded = self._uw_substitute_widgets(markup) super().set_text(markup) self._uw_update_widget_start_pos()
def set_wrap_mode(self, mode: str) -> None: if mode == "ellipsis": raise NotImplementedError("Wrap mode 'ellipsis' is not implemented.") super().set_wrap_mode(mode) wrap = property(lambda self: super().wrap, set_wrap_mode) def _uw_update_widget_start_pos(self) -> None: """Updates the start position of embedded widgets on their respective lines.""" if not self._uw_embedded: return # - Text is clipped per line. # - Since the pad/trim amount in the translation (produced by # `StandardTextLayout.align_layout()`) is relative to the start of the line # wrt the layout width (maxcol), the position of an embedded widgets on its # respective line should be relative to the start of the line, not considering # alignment. find_placeholders = __class__._uw_placeholder_pattern.finditer embedded_iter = iter(self._uw_embedded) self._uw_embedded = [ # Using `Text.pack()` instead of `match.start()` directly to account for # wide characters (widget, width, urwid.Text(line[: match.start()]).pack()[0]) for line in super().get_text()[0].splitlines() for match, (widget, width, _) in zip(find_placeholders(line), embedded_iter) ] @staticmethod def _uw_substitute_widgets( markup: Markup, ) -> Tuple[Markup, List[Tuple[urwid.Widget, int, int]]]: """Extracts embedded widgets from *markup* and replace widget markup elements with placeholders. Returns: A tuple containing: - The given markup flattened and with all widget elements replaced by placeholders. - A list of ``(widget, width, start_position)`` tuples describing the embedded widgets, where *start_position* is initialized to zero and later updated by :py:meth:`_uw_update_widget_start_pos`. """ def recurse_markup(attr: Union[DisplayAttribute, int], markup: Markup) -> None: if isinstance(markup, list): for markup in markup: recurse_markup(attr, markup) elif isinstance(markup, tuple): if len(markup) != 2: raise urwid.TagMarkupException( "Tuples must be in the form `(attribute, tagmarkup)` " f"(got: {markup!r})" ) recurse_markup(*markup) elif isinstance(markup, urwid.Widget): if not isinstance(attr, int): raise TypeError( "Invalid type for embedded widget width " f"(got: {type(attr).__name__!r})" ) if "box" not in markup.sizing(): raise ValueError(f"Not a box widget (got: {markup!r})") if attr <= 0: raise ValueError(f"Invalid widget width (got: {attr!r})") new_markup.append((len(embedded), "\0" + "\1" * (attr - 1))) embedded.append((markup, attr, 0)) else: # Normalize text type to `str` since other parts of this class use # and expect `str` if isinstance(markup, bytes): markup = markup.decode() new_markup.append(markup if attr is None else (attr, markup)) embedded = [] new_markup = [] recurse_markup(None, markup) return new_markup, embedded @staticmethod def _uw_embed( line: str, line_canv: urwid.CompositeCanvas, embedded_iter: Iterator[Tuple[urwid.Widget, int, int]], focus: bool = False, tail: Optional[Tuple[int, urwid.Canvas]] = None, ) -> Tuple[urwid.CompositeCanvas, Optional[Tuple[int, urwid.Canvas]]]: """Replaces widget placeholders in a line with with the widgets' contents. Args: line: A line of the original text canvas. line_canv: A canvas corresponding to *line*. embedded_iter: An iterator of ``(widget, width, start_position)`` tuples in the same order as :py:attr:`embedded`, where *start_position* is as determined by :py:meth:`_uw_update_widget_start_pos`. focus: As in :py:meth:`render`. tail: The description of the "tail" of an embedded widget that is the first part of the line ``(tail_width, tail_canv)``, if it was wrapped/clipped, where: - *tail_width* is the width of the remaining (unused) portion of the widget's canvas content towards it's right end. - *tail_canv* is the original rendered canvas of the widget, unmodified. OR ``None`` if a widget is not the first part of the line. Returns: A tuple containing: - A ``CompositeCanvas`` containing the separate parts from the original text canvas and the embedded widgets' canvases. - The description of the "tail" of an embedded widget that is the last part of the line ``(tail_width, tail_canv)`` (see the description of *tail* above), if it was wrapped/clipped OR ``None`` if it wasn't wrapped/clipped or a widget is not the last part of the line. """ canvases = [] line_index = 0 if tail: # - Since this is the line after the head, then it must contain [a part of] # the tail # - Only one possible occurence of a tail per line # - Might be preceded by padding spaces when `align != "left"` _, padding, tail_string, line = __class__._uw_tail_pattern.split(line) if padding: # Can use `len(padding)` since all characters should be spaces canv = urwid.Text(padding).render((len(padding),), focus) canvases.append((canv, None, focus, len(padding))) line_index += len(padding) tail_width, tail_canv = tail canv = urwid.CompositeCanvas(tail_canv) canv.pad_trim_left_right(tail_width - tail_canv.cols(), 0) canvases.append((canv, None, focus, len(tail_string))) line_index += len(tail_string) if not line: tail = ( (tail_width - len(tail_string), tail_canv) if len(tail_string) < tail_width else None ) return urwid.CanvasJoin(canvases), tail tail = None placeholder_pattern = __class__._uw_placeholder_pattern for part in placeholder_pattern.split(line): if not part: continue if placeholder_pattern.fullmatch(part): widget, width, _ = next(embedded_iter) canv = widget.render((width, 1), focus) # `len(part)`, in case the placeholder was wrapped canvases.append((canv, None, focus, len(part))) line_index += len(part) if len(part) != width: tail = (width - len(part), canv) else: # Should't use `len(part)` because of wide characters maxcol = urwid.Text(part).pack()[0] canv = urwid.CompositeCanvas(line_canv) canv.pad_trim_left_right(-line_index, 0) canvases.append((canv, None, focus, maxcol)) line_index += maxcol return urwid.CanvasJoin(canvases), tail
[docs]def parse_text( text: str, patterns: Iterable[re.Pattern], repl: Callable[[re.Pattern, Tuple[Optional[str]], Tuple[int, int], ...], Markup], *repl_args: Any, **repl_kwargs: Any, ) -> Markup: r"""Parses a string into a text/widget markup list. Args: text: The string to parse. patterns: An iterable of RegEx pattern objects. repl: A callable to replace a substring of *text* matched by any of the given RegEx patterns. repl_args: Additional positional arguments to be passed to *repl* whenever it's called. repl_kwargs: keyword arguments to be passed to *repl* whenever it's called. Returns: A text/widget markup (see :py:data:`Markup`) that should be compatible with :py:class:`TextEmbed` and/or :py:class:`urwid.Text`, depending on the values returned by *repl*. Raises: TypeError: An argument is of an unexpected type. ValueError: *patterns* is empty. ValueError: A given pattern object was not compiled from a :py:class:`str` instance. Whenever any of the given RegEx patterns matches a **non-empty** substring of *text*, *repl* is called with the following arguments (in the given order): - the :py:class:`re.Pattern` object that matched the substring - a tuple containing the match groups - starting with the whole match, - followed by the all the subgroups of the match, from 1 up to however many groups are in the pattern, if any (``None`` for each group that didn't participate in the match) - a tuple containing the indexes of the start and end of the substring - *repl_args* unpacked - *repl_kwargs* unpacked and *should* return a valid text/widget markup (see :py:data:`Markup`). If the value returned is *false* (such as ``None`` or an empty string), it is omitted from the result. Example:: import re from urwid import Filler from urwidgets import Hyperlink, TextEmbed, parse_text MARKDOWN = { re.compile(r"\*\*(.+?)\*\*"): lambda g: ("bold", g[1]), re.compile("https://[^ ]+"): ( lambda g: (min(len(g[0]), 14), Filler(Hyperlink(g[0], "blue"))) ), re.compile(r"\[(.+)\]\((.+)\)"): ( lambda g: (len(g[1]), Filler(Hyperlink(g[2], "blue", g[1]))) ), } link = "https://urwid.org" text = f"[This]({link}) is a **link** to {link}" print(text) # Output: [This](https://urwid.org) is a **link** to https://urwid.org markup = parse_text( text, MARKDOWN, lambda pattern, groups, span: MARKDOWN[pattern](groups) ) print(markup) # Output: # [ # (4, <Filler box widget <Hyperlink flow widget>>), # ' is a ', # ('bold', 'link'), # ' to ', # (14, <Filler box widget <Hyperlink flow widget>>), # ] text_widget = TextEmbed(markup) canv = text_widget.render(text_widget.pack()[:1]) print(canv.text[0].decode()) # Output: This is a link to https://urwid… # The hyperlinks will be clickable if supported NOTE: In the case of overlapping matches, the substring that occurs first is matched and if they start at the same index, the pattern that appears first in *patterns* takes precedence. """ if not isinstance(text, str): raise TypeError(f"Tnvalid type for 'text' (got: {type(text).__name__!r})") if not text: return text patterns = tuple(patterns) if not patterns: raise ValueError("No RegEx patterns") combined_pattern, indexed_patterns = combine_patterns(patterns) full_markup = [] ptr = 0 for match in combined_pattern.finditer(text): span = match.span() if ptr < span[0]: full_markup.append(text[ptr : span[0]]) if match.group(): pattern_index = match.lastindex pattern = indexed_patterns[pattern_index] markup = repl( pattern, match.groups()[pattern_index - 1 : pattern_index + pattern.groups], span, *repl_args, **repl_kwargs, ) if markup: full_markup.append(markup) ptr = span[1] if ptr < len(text): full_markup.append(text[ptr:]) return full_markup[0] if len(full_markup) == 1 else full_markup
# Private RE_INLINE_FLAGS = {re.A: "a", re.I: "i", re.L: "L", re.M: "m", re.S: "s", re.X: "x"} @lru_cache() def combine_patterns( patterns: Tuple[re.Pattern], ) -> Tuple[re.Pattern, Dict[int, re.Pattern]]: """Combines multiple RegEx patterns with their respective flags into a single OR-ed pattern. Returns: A tuple containing - the combined RegEx pattern - a dictionary mapping the index of the group in the combined pattern corresponding to each given pattern to the pattern """ grouped_patterns = [] indexed_patterns = {} # <index of group in combined pattern>: <pattern> group_index = 1 for pattern in patterns: pattern_string = pattern.pattern if not isinstance(pattern_string, str): raise ValueError(f"Pattern not compiled from `str` (got: {pattern!r})") inline_flags = get_inline_flags(pattern.flags) grouped_patterns.append( f"(?{inline_flags}:({pattern_string}))" if inline_flags else f"({pattern_string})" ) indexed_patterns[group_index] = pattern group_index += pattern.groups + 1 return re.compile("|".join(grouped_patterns)), indexed_patterns def fix_text_canvas_attr(canv: urwid.TextCanvas) -> urwid.TextCanvas: """Workaround for a bug in in `urwid.text_layout.StandardTextLayout`. When `wrap=clip, align=center` and there's a line starting with a markup that has a display attribute, when the render width (maxcol) is one less than the line's width (in screen columns, not characters), the line is rendered as an empty string. See https://github.com/urwid/urwid/issues/542. """ for line_attr in canv._attr: if line_attr[0] == (None, 0): del line_attr[0] return canv # Only 511 (zero is excluded) unique bit patterns (and not even all can occur) @lru_cache(maxsize=None) def get_inline_flags(flags: int) -> str: """Converts a RegEx integer flag into the corresponding set of inline flags""" return "".join([inline for flag, inline in RE_INLINE_FLAGS.items() if flag & flags])