Revert back to old CELL_WIDTHS format.

Ideally we'd use the parameter `key` of `bisect_right` with `itemgetter(0)` but that's +3.10 only.
So, instead we build the tuple `(cp,)`.
This means that the tuple `(2,)` will be placed to the right of the range `(1, 31, -1)`, and we can do the calculations as before.
However, this will codepoints at the start of their ranges on the wrong side of the range.
E.g., `(1,)` would be placed to the left of `(1, 31, -1)` instead of to the right.
To fix this, we add a second element to the tuple that is larger than any second element in the ranges, which ensures that whenever the first element of the tuples (the codepoint and the range start) match, the tuple with the codepoint is always placed on the right.

Relevant review comment: https://github.com/Textualize/rich/pull/3300#issuecomment-1987269715
This commit is contained in:
Rodrigo Girão Serrão 2024-03-11 11:56:01 +00:00
parent 85c041999c
commit 7a53a9f683
No known key found for this signature in database
GPG Key ID: 84116786F3295A35
3 changed files with 473 additions and 1392 deletions

File diff suppressed because it is too large Load Diff

View File

@ -2,15 +2,12 @@ from __future__ import annotations
import bisect import bisect
import re import re
import sys
from functools import lru_cache from functools import lru_cache
from itertools import accumulate from itertools import accumulate
from typing import Callable from typing import Callable
from ._cell_widths import ( from ._cell_widths import CELL_WIDTHS
CELL_WIDTH_RANGE_ENDS,
CELL_WIDTH_RANGE_STARTS,
CELL_WIDTHS,
)
# Regex to match sequence of the most common character ranges # Regex to match sequence of the most common character ranges
_is_single_cell_widths = re.compile("^[\u0020-\u006f\u00a0\u02ff\u0370-\u0482]*$").match _is_single_cell_widths = re.compile("^[\u0020-\u006f\u00a0\u02ff\u0370-\u0482]*$").match
@ -73,10 +70,13 @@ def _get_codepoint_cell_size(codepoint: int) -> int:
Returns: Returns:
int: Number of cells (0, 1 or 2) occupied by that character. int: Number of cells (0, 1 or 2) occupied by that character.
""" """
# We create the tuple as `(cp, sys.maxunicode + 2)` instead of just (cp,)
idx = bisect.bisect_right(CELL_WIDTH_RANGE_STARTS, codepoint) # because we want the index to always be on the right of the range that
if codepoint <= CELL_WIDTH_RANGE_ENDS[idx - 1]: # `cp` belongs to. E.g., `(1,)` won't be placed to the right of `(1, 31, -1)`
width = CELL_WIDTHS[idx - 1] # but `(1, sys.maxunicode + 2)` will.
idx = bisect.bisect_right(CELL_WIDTHS, (codepoint, sys.maxunicode + 2))
_start, end, width = CELL_WIDTHS[idx - 1]
if codepoint <= end:
return 0 if width == -1 else width return 0 if width == -1 else width
else: else:
return 1 return 1

View File

@ -13,10 +13,8 @@ from wcwidth import wcwidth
progress = Progress() progress = Progress()
def make_widths_lists() -> Tuple[List[int], List[int], List[int]]: def make_widths_table() -> List[Tuple[int, int, int]]:
starts: List[int] = [] widths: List[Tuple[int, int, int]] = []
ends: List[int] = []
widths: List[int] = []
make_table_task = progress.add_task("Calculating table...") make_table_task = progress.add_task("Calculating table...")
@ -32,16 +30,12 @@ def make_widths_lists() -> Tuple[List[int], List[int], List[int]]:
progress.advance(make_table_task, len(cp_list)) progress.advance(make_table_task, len(cp_list))
if width == 1: if width == 1:
continue continue
starts.append(cp_list[0][0]) widths.append((cp_list[0][0], cp_list[-1][0], width))
ends.append(cp_list[-1][0]) return widths
widths.append(width)
return starts, ends, widths
def get_cell_size( def get_cell_size(
starts: List[int], widths: List[Tuple[int, int, int]],
ends: List[int],
widths: List[int],
codepoint: int, codepoint: int,
) -> int: ) -> int:
"""Get the cell size of a character. """Get the cell size of a character.
@ -53,37 +47,32 @@ def get_cell_size(
int: Number of cells (0, 1 or 2) occupied by that character. int: Number of cells (0, 1 or 2) occupied by that character.
""" """
idx = bisect.bisect_right(starts, codepoint) idx = bisect.bisect_right(widths, (codepoint, sys.maxunicode + 2))
if codepoint <= ends[idx - 1]: _start, end, width = widths[idx - 1]
return widths[idx - 1] if codepoint <= end:
return width
else: else:
return 1 return 1
def test(starts: List[int], ends: List[int], widths: List[int]) -> None: def test(widths: List[Tuple[int, int, int]]) -> None:
for codepoint in progress.track( for codepoint in progress.track(
range(0, sys.maxunicode + 1), description="Testing..." range(0, sys.maxunicode + 1), description="Testing..."
): ):
character = chr(codepoint) character = chr(codepoint)
width1 = get_cell_size(starts, ends, widths, codepoint) width1 = get_cell_size(widths, codepoint)
width2 = wcwidth(character) width2 = wcwidth(character)
if width1 != width2: if width1 != width2:
print(f"{width1} != {width2}") print(f"{codepoint}: {width1} != {width2}")
break break
def run() -> None: def run() -> None:
with progress: with progress:
starts, ends, widths = make_widths_lists() widths = make_widths_table()
test(starts, ends, widths) test(widths)
table_file = f"""# Auto generated by make_terminal_widths.py table_file = f"""# Auto generated by make_terminal_widths.py
CELL_WIDTH_RANGE_STARTS = {starts!r}
CELL_WIDTH_RANGE_ENDS = {ends!r}
CELL_WIDTHS = {widths!r} CELL_WIDTHS = {widths!r}
""" """
with open("../rich/_cell_widths.py", "wt") as fh: with open("../rich/_cell_widths.py", "wt") as fh: