Source code for urwidgets.text_embed

from __future__ import annotations

__all__ = (
    "parse_text",
    "TextEmbed",
    # Type Aliases
    "Markup",
    "StringMarkup",
    "ListMarkup",
    "TupleMarkup",
    "NormalTupleMarkup",
    "DisplayAttribute",
    "WidgetTupleMarkup",
    "WidgetListMarkup",
)

import re
from functools import lru_cache
from itertools import islice
from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Union

import urwid

# NOTE: Any new "private" attribute of any subclass of an urwid class should be
# prepended with "_uw" to avoid clashes with names used by urwid itself.

# I really hope these are correct :D
Markup = Union["StringMarkup", "ListMarkup", "TupleMarkup"]
StringMarkup = Union[str, bytes]
ListMarkup = List["Markup"]
TupleMarkup = Union["NormalTupleMarkup", "WidgetTupleMarkup"]
NormalTupleMarkup = Tuple["DisplayAttribute", Union["StringMarkup", "ListMarkup"]]
DisplayAttribute = Union[None, str, bytes, "urwid.AttrSpec"]
WidgetTupleMarkup = Tuple[int, Union["urwid.Widget", "WidgetListMarkup"]]
WidgetListMarkup = List[Union["urwid.Widget", "Markup", "WidgetListMarkup"]]


[docs]class TextEmbed(urwid.Text):
    """A text widget within which other widgets may be embedded.

    This is an extension of the :py:class:`urwid.Text` widget. Every feature and
    interface of :py:class:`~urwid.Text` is supported and works essentially the same,
    **except for the "ellipsis" wrap mode** which is currently not implemented.
    Text markup format is essentially the same, except when embedding widgets.

    **Embedding Widgets**
        A widget is embedded by specifying it as a markup element with an **integer
        display attribute**, where the display attribute is the number of screen
        columns the widget should occupy.

        Examples:

        >>> # w1 spans 2 columns
        >>> TextEmbed(["This widget (", (2, w1), ") spans two columns"])
        >>> # w1 and w2 span 2 columns
        >>> TextEmbed(["These widgets (", (2, [w1, w2]), ") span two columns each"])
        >>> # w1 and w2 span 2 columns, the text in-between has no display attribute
        >>> TextEmbed([(2, [w1, (None, "and"), w2]), " span two columns each"])
        >>> # w1 and w2 span 2 columns, text in the middle is red
        >>> TextEmbed((2, [w1, ("red", " i am red "), w2]))
        >>> # w1 and w3 span 2 columns, w2 spans 5 columns
        >>> TextEmbed((2, [w1, (5, w2), w3]))

        Visible embedded widgets are always rendered (may be cached) whenever the
        ``TextEmbed`` widget is re-rendered (i.e an uncached render). Hence, this
        allows for dynamic parts of text without updating the entire widget.
        Going a step further, embeddded widgets can be swapped by using
        ``urwid.WidgetPlaceholder`` but their widths will remain the same.

        NOTE:
            - Every embedded widget must be a box widget and is always rendered with
              size ``(width, 1)``.  :py:class:`urwid.Filler` can be used to wrap flow
              widgets.
            - Each embedded widgets are treated as a single WORD (i.e containing no
              whitespace). Therefore, consecutive embedded widgets are also treated as
              a single WORD. This affects the "space" wrap mode.
            - After updating or swapping an embedded widget, this widget's canvases
              should be invalidated to ensure it re-renders.

    Raises:
        TypeError: A widget markup element has a non-integer display attribute.
        ValueError: A widget doesn't support box sizing.
        ValueError: A widget has a non-positive width (display attribute).
    """

    # In case a placeholder gets wrapped or clipped, this pattern will only match the
    # head of a placeholder not tails on subsequent lines
    _uw_placeholder_pattern = re.compile("(\0\1*)")

    # A tail must occur at the beginning of a line but may be preceded by padding
    # spaces when `align != "left"` and `wrap != "clip"`
    _uw_tail_pattern = re.compile("^( *)(\1+)")

    attrib = property(
        lambda self: super().attrib,
        doc="""Run-length encoding of display attributes of the widget's content.

        :type: List[Tuple[Union[DisplayAttribute, int], int]]

        See the description of the second item in the return value of
        :py:meth:`get_text`.
        """,
    )

    embedded = property(
        lambda self: [(widget, width) for widget, width, _ in self._uw_embedded],
        doc="""Embedded widgets.

        Returns:
            A list of all embedded widgets and their respective widths, in the same
            order in which they were given in the text markup.

        :type: List[Tuple[urwid.Widget, int]]
        """,
    )

    text = property(
        lambda self: super().text,
        doc="""Raw text content of the widget.

        :type: str

        See the description of the first item in the return value of
        :py:meth:`get_text`.
        """,
    )

[docs]    def get_text(
        self,
    ) -> Tuple[str, List[Tuple[Union[DisplayAttribute, int], int]]]:
        """Returns a representation of the widget's content.

        Returns:
            A tuple ``(text, attrib)``, where

            - *text* is the raw text content of the widget.

              Each embedded widget is represented by a substring starting with a
              ``"\\x00"`` character followed by zero or more ``"\\x01"`` characters,
              with length equal to the widget's width.

            - *attrib* is the run-length encoding of display attributes.

              Any entry containing a display attribute of the ``int`` type (e.g
              ``(1, 4)``) denotes an embedded widget, where the display attirbute is
              the index of the widget within the :py:attr:`embedded` widgets list and
              the run length is the width of the widget.
        """
        return super().get_text()

    def render(
        self, size: Tuple[int,], focus: bool = False
    ) -> Union[urwid.TextCanvas, urwid.CompositeCanvas]:
        text_canv = fix_text_canvas_attr(super().render(size, focus))
        embedded = self._uw_embedded
        if not embedded:
            return text_canv

        def append_text_lines():
            nonlocal top

            if n_lines:
                partial_canv = urwid.CompositeCanvas(text_canv)
                partial_canv.trim(top, n_lines)
                canvases.append((partial_canv, None, focus))
                top += n_lines

        text = text_canv.text
        canvases = []
        placeholder_pattern = __class__._uw_placeholder_pattern
        tail = None
        top = 0
        n_lines = 0
        clipped = self.wrap == "clip"

        if clipped:
            if self.align != "left":
                translation = self.get_line_translation(size[0])
            text_canv_content = tuple(text_canv.content())
        else:
            embedded_iter = iter(embedded)

        for row_index, line in enumerate(text):
            line = line.decode()
            if clipped:
                if line.startswith("\1"):  # align != "left"
                    widget_index = text_canv_content[row_index][0][0]
                    widget, width, start_pos = embedded[widget_index]
                    tail_canv = widget.render((width, 1), focus)
                    left_trim = -translation[row_index][0][0]
                    # the placeholder is clipped => left_trim > start_pos
                    tail_width = width - (left_trim - start_pos)
                    tail = (tail_width, tail_canv)
                    embedded_iter = islice(embedded, widget_index + 1, None)
                else:
                    tail = None
            if tail:
                if clipped:
                    append_text_lines()
                line_canv = urwid.CompositeCanvas(text_canv)
                line_canv.trim(top, 1)
                partial_canv, tail = self._uw_embed(
                    line, line_canv, embedded_iter, focus, tail
                )
                canvases.append((partial_canv, None, focus))
                n_lines = 0
                top += 1
            elif placeholder_pattern.search(line):
                append_text_lines()
                if clipped:
                    for attr, *_ in text_canv_content[row_index]:
                        if isinstance(attr, int):
                            break
                    embedded_iter = islice(embedded, attr, None)
                line_canv = urwid.CompositeCanvas(text_canv)
                line_canv.trim(top, 1)
                partial_canv, tail = self._uw_embed(
                    line, line_canv, embedded_iter, focus
                )
                canvases.append((partial_canv, None, focus))
                n_lines = 0
                top += 1
            else:
                n_lines += 1
        append_text_lines()

        return urwid.CanvasCombine(canvases)

[docs]    def set_text(self, markup: Markup) -> None:
        """Sets the widget's content.

        Also supports widget markup elements. See the class description.
        """
        markup, self._uw_embedded = self._uw_substitute_widgets(markup)
        super().set_text(markup)
        self._uw_update_widget_start_pos()

    def set_wrap_mode(self, mode: str) -> None:
        if mode == "ellipsis":
            raise NotImplementedError("Wrap mode 'ellipsis' is not implemented.")
        super().set_wrap_mode(mode)

    wrap = property(lambda self: super().wrap, set_wrap_mode)

    def _uw_update_widget_start_pos(self) -> None:
        """Updates the start position of embedded widgets on their respective lines."""
        if not self._uw_embedded:
            return

        # - Text is clipped per line.
        # - Since the pad/trim amount in the translation (produced by
        #   `StandardTextLayout.align_layout()`) is relative to the start of the line
        #   wrt the layout width (maxcol), the position of an embedded widgets on its
        #   respective line should be relative to the start of the line, not considering
        #   alignment.
        find_placeholders = __class__._uw_placeholder_pattern.finditer
        embedded_iter = iter(self._uw_embedded)
        self._uw_embedded = [
            # Using `Text.pack()` instead of `match.start()` directly to account for
            # wide characters
            (widget, width, urwid.Text(line[: match.start()]).pack()[0])
            for line in super().get_text()[0].splitlines()
            for match, (widget, width, _) in zip(find_placeholders(line), embedded_iter)
        ]

    @staticmethod
    def _uw_substitute_widgets(
        markup: Markup,
    ) -> Tuple[Markup, List[Tuple[urwid.Widget, int, int]]]:
        """Extracts embedded widgets from *markup* and replace widget markup elements
        with placeholders.

        Returns:
            A tuple containing:

            - The given markup flattened and with all widget elements replaced by
              placeholders.
            - A list of ``(widget, width, start_position)`` tuples describing the
              embedded widgets, where *start_position* is initialized to zero and
              later updated by :py:meth:`_uw_update_widget_start_pos`.
        """

        def recurse_markup(attr: Union[DisplayAttribute, int], markup: Markup) -> None:
            if isinstance(markup, list):
                for markup in markup:
                    recurse_markup(attr, markup)
            elif isinstance(markup, tuple):
                if len(markup) != 2:
                    raise urwid.TagMarkupException(
                        "Tuples must be in the form `(attribute, tagmarkup)` "
                        f"(got: {markup!r})"
                    )
                recurse_markup(*markup)
            elif isinstance(markup, urwid.Widget):
                if not isinstance(attr, int):
                    raise TypeError(
                        "Invalid type for embedded widget width "
                        f"(got: {type(attr).__name__!r})"
                    )
                if "box" not in markup.sizing():
                    raise ValueError(f"Not a box widget (got: {markup!r})")
                if attr <= 0:
                    raise ValueError(f"Invalid widget width (got: {attr!r})")
                new_markup.append((len(embedded), "\0" + "\1" * (attr - 1)))
                embedded.append((markup, attr, 0))
            else:
                # Normalize text type to `str` since other parts of this class use
                # and expect `str`
                if isinstance(markup, bytes):
                    markup = markup.decode()
                new_markup.append(markup if attr is None else (attr, markup))

        embedded = []
        new_markup = []
        recurse_markup(None, markup)

        return new_markup, embedded

    @staticmethod
    def _uw_embed(
        line: str,
        line_canv: urwid.CompositeCanvas,
        embedded_iter: Iterator[Tuple[urwid.Widget, int, int]],
        focus: bool = False,
        tail: Optional[Tuple[int, urwid.Canvas]] = None,
    ) -> Tuple[urwid.CompositeCanvas, Optional[Tuple[int, urwid.Canvas]]]:
        """Replaces widget placeholders in a line with with the widgets' contents.

        Args:
            line: A line of the original text canvas.
            line_canv: A canvas corresponding to *line*.
            embedded_iter: An iterator of ``(widget, width, start_position)`` tuples
              in the same order as :py:attr:`embedded`, where *start_position* is as
              determined by :py:meth:`_uw_update_widget_start_pos`.
            focus: As in :py:meth:`render`.
            tail: The description of the "tail" of an embedded widget that is the first
              part of the line ``(tail_width, tail_canv)``, if it was wrapped/clipped,
              where:

              - *tail_width* is the width of the remaining (unused) portion of the
                widget's canvas content towards it's right end.
              - *tail_canv* is the original rendered canvas of the widget, unmodified.

              OR ``None`` if a widget is not the first part of the line.

        Returns:
            A tuple containing:

            - A ``CompositeCanvas`` containing the separate parts from the original
              text canvas and the embedded widgets' canvases.
            - The description of the "tail" of an embedded widget that is the last part
              of the line ``(tail_width, tail_canv)`` (see the description of *tail*
              above), if it was wrapped/clipped OR ``None`` if it wasn't wrapped/clipped
              or a widget is not the last part of the line.
        """
        canvases = []
        line_index = 0

        if tail:
            # - Since this is the line after the head, then it must contain [a part of]
            #   the tail
            # - Only one possible occurence of a tail per line
            # - Might be preceded by padding spaces when `align != "left"`
            _, padding, tail_string, line = __class__._uw_tail_pattern.split(line)

            if padding:
                # Can use `len(padding)` since all characters should be spaces
                canv = urwid.Text(padding).render((len(padding),), focus)
                canvases.append((canv, None, focus, len(padding)))
                line_index += len(padding)

            tail_width, tail_canv = tail
            canv = urwid.CompositeCanvas(tail_canv)
            canv.pad_trim_left_right(tail_width - tail_canv.cols(), 0)
            canvases.append((canv, None, focus, len(tail_string)))
            line_index += len(tail_string)

            if not line:
                tail = (
                    (tail_width - len(tail_string), tail_canv)
                    if len(tail_string) < tail_width
                    else None
                )
                return urwid.CanvasJoin(canvases), tail
            tail = None

        placeholder_pattern = __class__._uw_placeholder_pattern

        for part in placeholder_pattern.split(line):
            if not part:
                continue

            if placeholder_pattern.fullmatch(part):
                widget, width, _ = next(embedded_iter)
                canv = widget.render((width, 1), focus)
                # `len(part)`, in case the placeholder was wrapped
                canvases.append((canv, None, focus, len(part)))
                line_index += len(part)
                if len(part) != width:
                    tail = (width - len(part), canv)
            else:
                # Should't use `len(part)` because of wide characters
                maxcol = urwid.Text(part).pack()[0]
                canv = urwid.CompositeCanvas(line_canv)
                canv.pad_trim_left_right(-line_index, 0)
                canvases.append((canv, None, focus, maxcol))
                line_index += maxcol

        return urwid.CanvasJoin(canvases), tail


[docs]def parse_text(
    text: str,
    patterns: Iterable[re.Pattern],
    repl: Callable[[re.Pattern, Tuple[Optional[str]], Tuple[int, int], ...], Markup],
    *repl_args: Any,
    **repl_kwargs: Any,
) -> Markup:
    r"""Parses a string into a text/widget markup list.

    Args:
        text: The string to parse.
        patterns: An iterable of RegEx pattern objects.
        repl: A callable to replace a substring of *text* matched by any of the given
          RegEx patterns.
        repl_args: Additional positional arguments to be passed to *repl* whenever it's
          called.
        repl_kwargs: keyword arguments to be passed to *repl* whenever it's called.

    Returns:
        A text/widget markup (see :py:data:`Markup`) that should be compatible with
        :py:class:`TextEmbed` and/or :py:class:`urwid.Text`, depending on the values
        returned by *repl*.

    Raises:
        TypeError: An argument is of an unexpected type.
        ValueError: *patterns* is empty.
        ValueError: A given pattern object was not compiled from a :py:class:`str`
          instance.

    Whenever any of the given RegEx patterns matches a **non-empty** substring of
    *text*, *repl* is called with the following arguments (in the given order):

    - the :py:class:`re.Pattern` object that matched the substring
    - a tuple containing the match groups

      - starting with the whole match,
      - followed by the all the subgroups of the match, from 1 up to however many
        groups are in the pattern, if any (``None`` for each group that didn't
        participate in the match)

    - a tuple containing the indexes of the start and end of the substring
    - *repl_args* unpacked
    - *repl_kwargs* unpacked

    and *should* return a valid text/widget markup (see :py:data:`Markup`). If the
    value returned is *false* (such as ``None`` or an empty string), it is omitted
    from the result.

    Example::

        import re
        from urwid import Filler
        from urwidgets import Hyperlink, TextEmbed, parse_text

        MARKDOWN = {
            re.compile(r"\*\*(.+?)\*\*"): lambda g: ("bold", g[1]),
            re.compile("https://[^ ]+"): (
                lambda g: (min(len(g[0]), 14), Filler(Hyperlink(g[0], "blue")))
            ),
            re.compile(r"\[(.+)\]\((.+)\)"): (
                lambda g: (len(g[1]), Filler(Hyperlink(g[2], "blue", g[1])))
            ),
        }

        link = "https://urwid.org"
        text = f"[This]({link}) is a **link** to {link}"
        print(text)
        # Output: [This](https://urwid.org) is a **link** to https://urwid.org

        markup = parse_text(
            text, MARKDOWN, lambda pattern, groups, span: MARKDOWN[pattern](groups)
        )
        print(markup)
        # Output:
        # [
        #   (4, <Filler box widget <Hyperlink flow widget>>),
        #   ' is a ',
        #   ('bold', 'link'),
        #   ' to ',
        #   (14, <Filler box widget <Hyperlink flow widget>>),
        # ]

        text_widget = TextEmbed(markup)
        canv = text_widget.render(text_widget.pack()[:1])
        print(canv.text[0].decode())
        # Output: This is a link to https://urwid…
        # The hyperlinks will be clickable if supported

    NOTE:
        In the case of overlapping matches, the substring that occurs first is matched
        and if they start at the same index, the pattern that appears first in
        *patterns* takes precedence.
    """
    if not isinstance(text, str):
        raise TypeError(f"Tnvalid type for 'text' (got: {type(text).__name__!r})")
    if not text:
        return text

    patterns = tuple(patterns)
    if not patterns:
        raise ValueError("No RegEx patterns")

    combined_pattern, indexed_patterns = combine_patterns(patterns)
    full_markup = []
    ptr = 0
    for match in combined_pattern.finditer(text):
        span = match.span()
        if ptr < span[0]:
            full_markup.append(text[ptr : span[0]])
        if match.group():
            pattern_index = match.lastindex
            pattern = indexed_patterns[pattern_index]
            markup = repl(
                pattern,
                match.groups()[pattern_index - 1 : pattern_index + pattern.groups],
                span,
                *repl_args,
                **repl_kwargs,
            )
            if markup:
                full_markup.append(markup)
        ptr = span[1]
    if ptr < len(text):
        full_markup.append(text[ptr:])

    return full_markup[0] if len(full_markup) == 1 else full_markup


# Private

RE_INLINE_FLAGS = {re.A: "a", re.I: "i", re.L: "L", re.M: "m", re.S: "s", re.X: "x"}


@lru_cache()
def combine_patterns(
    patterns: Tuple[re.Pattern],
) -> Tuple[re.Pattern, Dict[int, re.Pattern]]:
    """Combines multiple RegEx patterns with their respective flags into a single OR-ed
    pattern.

    Returns:
        A tuple containing

        - the combined RegEx pattern
        - a dictionary mapping the index of the group in the combined pattern
          corresponding to each given pattern to the pattern
    """
    grouped_patterns = []
    indexed_patterns = {}  # <index of group in combined pattern>: <pattern>
    group_index = 1
    for pattern in patterns:
        pattern_string = pattern.pattern
        if not isinstance(pattern_string, str):
            raise ValueError(f"Pattern not compiled from `str` (got: {pattern!r})")

        inline_flags = get_inline_flags(pattern.flags)
        grouped_patterns.append(
            f"(?{inline_flags}:({pattern_string}))"
            if inline_flags
            else f"({pattern_string})"
        )
        indexed_patterns[group_index] = pattern
        group_index += pattern.groups + 1

    return re.compile("|".join(grouped_patterns)), indexed_patterns


def fix_text_canvas_attr(canv: urwid.TextCanvas) -> urwid.TextCanvas:
    """Workaround for a bug in in `urwid.text_layout.StandardTextLayout`.

    When `wrap=clip, align=center` and there's a line starting with a markup that has
    a display attribute, when the render width (maxcol) is one less than the line's
    width (in screen columns, not characters), the line is rendered as an empty
    string.

    See https://github.com/urwid/urwid/issues/542.
    """
    for line_attr in canv._attr:
        if line_attr[0] == (None, 0):
            del line_attr[0]

    return canv


# Only 511 (zero is excluded) unique bit patterns (and not even all can occur)
@lru_cache(maxsize=None)
def get_inline_flags(flags: int) -> str:
    """Converts a RegEx integer flag into the corresponding set of inline flags"""
    return "".join([inline for flag, inline in RE_INLINE_FLAGS.items() if flag & flags])