Revert back to old CELL_WIDTHS format.

Ideally we'd use the parameter `key` of `bisect_right` with `itemgetter(0)` but that's +3.10 only. So, instead we build the tuple `(cp,)`. This means that the tuple `(2,)` will be placed to the right of the range `(1, 31, -1)`, and we can do the calculations as before. However, this will codepoints at the start of their ranges on the wrong side of the range. E.g., `(1,)` would be placed to the left of `(1, 31, -1)` instead of to the right. To fix this, we add a second element to the tuple that is larger than any second element in the ranges, which ensures that whenever the first element of the tuples (the codepoint and the range start) match, the tuple with the codepoint is always placed on the right. Relevant review comment: https://github.com/Textualize/rich/pull/3300#issuecomment-1987269715
2024-03-11 11:56:01 +00:00 · 2024-03-11 11:56:01 +00:00 · 7a53a9f683
parent 85c041999c
commit 7a53a9f683
3 changed files with 473 additions and 1392 deletions
--- a/rich/_cell_widths.py
+++ b/rich/_cell_widths.py
--- a/rich/cells.py
+++ b/rich/cells.py
@ -2,15 +2,12 @@ from __future__ import annotations
 import bisect
 import re
 import sys
 from functools import lru_cache
 from itertools import accumulate
 from typing import Callable
-from ._cell_widths import (
+from ._cell_widths import CELL_WIDTHS
    CELL_WIDTH_RANGE_ENDS,
    CELL_WIDTH_RANGE_STARTS,
    CELL_WIDTHS,
 )
 # Regex to match sequence of the most common character ranges
 _is_single_cell_widths = re.compile("^[\u0020-\u006f\u00a0\u02ff\u0370-\u0482]*$").match
@ -73,10 +70,13 @@ def _get_codepoint_cell_size(codepoint: int) -> int:
    Returns:
        int: Number of cells (0, 1 or 2) occupied by that character.
    """
-
+    # We create the tuple as `(cp, sys.maxunicode + 2)` instead of just (cp,)
-    idx = bisect.bisect_right(CELL_WIDTH_RANGE_STARTS, codepoint)
+    # because we want the index to always be on the right of the range that
-    if codepoint <= CELL_WIDTH_RANGE_ENDS[idx - 1]:
+    # `cp` belongs to. E.g., `(1,)` won't be placed to the right of `(1, 31, -1)`
-        width = CELL_WIDTHS[idx - 1]
+    # but `(1, sys.maxunicode + 2)` will.
    idx = bisect.bisect_right(CELL_WIDTHS, (codepoint, sys.maxunicode + 2))
    _start, end, width = CELL_WIDTHS[idx - 1]
    if codepoint <= end:
        return 0 if width == -1 else width
    else:
        return 1
--- a/tools/make_terminal_widths.py
+++ b/tools/make_terminal_widths.py
@ -13,10 +13,8 @@ from wcwidth import wcwidth
 progress = Progress()
-def make_widths_lists() -> Tuple[List[int], List[int], List[int]]:
+def make_widths_table() -> List[Tuple[int, int, int]]:
-    starts: List[int] = []
+    widths: List[Tuple[int, int, int]] = []
    ends: List[int] = []
    widths: List[int] = []
    make_table_task = progress.add_task("Calculating table...")
@ -32,16 +30,12 @@ def make_widths_lists() -> Tuple[List[int], List[int], List[int]]:
        progress.advance(make_table_task, len(cp_list))
        if width == 1:
            continue
-        starts.append(cp_list[0][0])
+        widths.append((cp_list[0][0], cp_list[-1][0], width))
-        ends.append(cp_list[-1][0])
+    return widths
        widths.append(width)
    return starts, ends, widths
 def get_cell_size(
-    starts: List[int],
+    widths: List[Tuple[int, int, int]],
    ends: List[int],
    widths: List[int],
    codepoint: int,
 ) -> int:
    """Get the cell size of a character.
@ -53,37 +47,32 @@ def get_cell_size(
        int: Number of cells (0, 1 or 2) occupied by that character.
    """
-    idx = bisect.bisect_right(starts, codepoint)
+    idx = bisect.bisect_right(widths, (codepoint, sys.maxunicode + 2))
-    if codepoint <= ends[idx - 1]:
+    _start, end, width = widths[idx - 1]
-        return widths[idx - 1]
+    if codepoint <= end:
        return width
    else:
        return 1
-def test(starts: List[int], ends: List[int], widths: List[int]) -> None:
+def test(widths: List[Tuple[int, int, int]]) -> None:
    for codepoint in progress.track(
        range(0, sys.maxunicode + 1), description="Testing..."
    ):
        character = chr(codepoint)
-        width1 = get_cell_size(starts, ends, widths, codepoint)
+        width1 = get_cell_size(widths, codepoint)
        width2 = wcwidth(character)
        if width1 != width2:
-            print(f"{width1} != {width2}")
+            print(f"{codepoint}: {width1} != {width2}")
            break
 def run() -> None:
    with progress:
-        starts, ends, widths = make_widths_lists()
+        widths = make_widths_table()
-        test(starts, ends, widths)
+        test(widths)
    table_file = f"""# Auto generated by make_terminal_widths.py
 CELL_WIDTH_RANGE_STARTS = {starts!r}
 CELL_WIDTH_RANGE_ENDS = {ends!r}
 CELL_WIDTHS = {widths!r}
 """
    with open("../rich/_cell_widths.py", "wt") as fh: