From 59b1aca63bf9ec69ada93af960d8b2a7bd920477 Mon Sep 17 00:00:00 2001 From: Darren Burns Date: Tue, 14 Nov 2023 10:03:54 +0000 Subject: [PATCH] Fix double-width characters disappearing when wrapping (#3180) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update docstring for `Text.wrap`s width parameter to indicate that it's referring to the number of *single-width* characters. Also a small addition to the gitignore file. * Working on double width wrapping fixes * Chop cells to fit to width * Fix folding when theres already text on line * Update wrapping logic to fix issues with CJK charcters disappearing when the "fold" location sat *within* a double-width character. Ensure we retain browser logic of: if there is no space on the current line, move to a new line, and if theres not enough space on the entire new line, fold the text over multiple lines at appropriate locations. * Remove old TODO comments * Add regression test note * Rename function to avoid breaking change * Update CHANGELOG * Remove old comment that is no longer relevant * Cover off some wrapping edge cases * Adding docstrings to tests explaining their purpose * Renaming a local, function scope function alias * Update rich/_wrap.py Co-authored-by: Rodrigo Girão Serrão <5621605+rodrigogiraoserrao@users.noreply.github.com> * PR feedback * Testing wrapping with trailing and leading whitespace * Improve docstring wording --------- Co-authored-by: Rodrigo Girão Serrão <5621605+rodrigogiraoserrao@users.noreply.github.com> --- .gitignore | 2 ++ CHANGELOG.md | 1 + rich/_wrap.py | 73 ++++++++++++++++++++++++++++++++++----------- rich/cells.py | 54 ++++++++++++++++++++------------- tests/test_cells.py | 19 ++++++++++++ tests/test_text.py | 66 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 177 insertions(+), 38 deletions(-) diff --git a/.gitignore b/.gitignore index b291d4c2..4d0a1d3a 100644 --- a/.gitignore +++ b/.gitignore @@ -117,3 +117,5 @@ venv.bak/ # airspeed velocity benchmarks/env/ benchmarks/html/ + +sandbox/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 52548cc4..0817e86d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- Some text goes missing during wrapping when it contains double width characters https://github.com/Textualize/rich/issues/3176 - Ensure font is correctly inherited in exported HTML https://github.com/Textualize/rich/issues/3104 - Fixed typing for `FloatPrompt`. diff --git a/rich/_wrap.py b/rich/_wrap.py index c45f193f..2e94ff6f 100644 --- a/rich/_wrap.py +++ b/rich/_wrap.py @@ -1,5 +1,7 @@ +from __future__ import annotations + import re -from typing import Iterable, List, Tuple +from typing import Iterable from ._loop import loop_last from .cells import cell_len, chop_cells @@ -7,7 +9,11 @@ from .cells import cell_len, chop_cells re_word = re.compile(r"\s*\S+\s*") -def words(text: str) -> Iterable[Tuple[int, int, str]]: +def words(text: str) -> Iterable[tuple[int, int, str]]: + """Yields each word from the text as a tuple + containing (start_index, end_index, word). A "word" in this context may + include the actual word and any whitespace to the right. + """ position = 0 word_match = re_word.match(text, position) while word_match is not None: @@ -17,35 +23,59 @@ def words(text: str) -> Iterable[Tuple[int, int, str]]: word_match = re_word.match(text, end) -def divide_line(text: str, width: int, fold: bool = True) -> List[int]: - divides: List[int] = [] - append = divides.append - line_position = 0 +def divide_line(text: str, width: int, fold: bool = True) -> list[int]: + """Given a string of text, and a width (measured in cells), return a list + of cell offsets which the string should be split at in order for it to fit + within the given width. + + Args: + text: The text to examine. + width: The available cell width. + fold: If True, words longer than `width` will be folded onto a new line. + + Returns: + A list of indices to break the line at. + """ + break_positions: list[int] = [] # offsets to insert the breaks at + append = break_positions.append + cell_offset = 0 _cell_len = cell_len + for start, _end, word in words(text): word_length = _cell_len(word.rstrip()) - if line_position + word_length > width: + remaining_space = width - cell_offset + word_fits_remaining_space = remaining_space >= word_length + + if word_fits_remaining_space: + # Simplest case - the word fits within the remaining width for this line. + cell_offset += _cell_len(word) + else: + # Not enough space remaining for this word on the current line. if word_length > width: + # The word doesn't fit on any line, so we can't simply + # place it on the next line... if fold: - chopped_words = chop_cells(word, max_size=width, position=0) - for last, line in loop_last(chopped_words): + # Fold the word across multiple lines. + folded_word = chop_cells(word, width=width) + for last, line in loop_last(folded_word): if start: append(start) - if last: - line_position = _cell_len(line) + cell_offset = _cell_len(line) else: start += len(line) else: + # Folding isn't allowed, so crop the word. if start: append(start) - line_position = _cell_len(word) - elif line_position and start: + cell_offset = _cell_len(word) + elif cell_offset and start: + # The word doesn't fit within the remaining space on the current + # line, but it *can* fit on to the next (empty) line. append(start) - line_position = _cell_len(word) - else: - line_position += _cell_len(word) - return divides + cell_offset = _cell_len(word) + + return break_positions if __name__ == "__main__": # pragma: no cover @@ -53,4 +83,11 @@ if __name__ == "__main__": # pragma: no cover console = Console(width=10) console.print("12345 abcdefghijklmnopqrstuvwyxzABCDEFGHIJKLMNOPQRSTUVWXYZ 12345") - print(chop_cells("abcdefghijklmnopqrstuvwxyz", 10, position=2)) + print(chop_cells("abcdefghijklmnopqrstuvwxyz", 10)) + + console = Console(width=20) + console.rule() + console.print("TextualはPythonの高速アプリケーション開発フレームワークです") + + console.rule() + console.print("アプリケーションは1670万色を使用でき") diff --git a/rich/cells.py b/rich/cells.py index 31d0c4cb..f85f928f 100644 --- a/rich/cells.py +++ b/rich/cells.py @@ -1,6 +1,8 @@ +from __future__ import annotations + import re from functools import lru_cache -from typing import Callable, List +from typing import Callable from ._cell_widths import CELL_WIDTHS @@ -119,27 +121,39 @@ def set_cell_size(text: str, total: int) -> str: start = pos -# TODO: This is inefficient -# TODO: This might not work with CWJ type characters -def chop_cells(text: str, max_size: int, position: int = 0) -> List[str]: - """Break text in to equal (cell) length strings, returning the characters in reverse - order""" - _get_character_cell_size = get_character_cell_size - characters = [ - (character, _get_character_cell_size(character)) for character in text - ] - total_size = position - lines: List[List[str]] = [[]] - append = lines[-1].append +def chop_cells( + text: str, + width: int, +) -> list[str]: + """Split text into lines such that each line fits within the available (cell) width. - for character, size in reversed(characters): - if total_size + size > max_size: - lines.append([character]) - append = lines[-1].append - total_size = size + Args: + text: The text to fold such that it fits in the given width. + width: The width available (number of cells). + + Returns: + A list of strings such that each string in the list has cell width + less than or equal to the available width. + """ + _get_character_cell_size = get_character_cell_size + lines: list[list[str]] = [[]] + + append_new_line = lines.append + append_to_last_line = lines[-1].append + + total_width = 0 + + for character in text: + cell_width = _get_character_cell_size(character) + char_doesnt_fit = total_width + cell_width > width + + if char_doesnt_fit: + append_new_line([character]) + append_to_last_line = lines[-1].append + total_width = cell_width else: - total_size += size - append(character) + append_to_last_line(character) + total_width += cell_width return ["".join(line) for line in lines] diff --git a/tests/test_cells.py b/tests/test_cells.py index 50fa5219..fe451fa5 100644 --- a/tests/test_cells.py +++ b/tests/test_cells.py @@ -1,4 +1,5 @@ from rich import cells +from rich.cells import chop_cells def test_cell_len_long_string(): @@ -40,3 +41,21 @@ def test_set_cell_size_infinite(): ) == size ) + + +def test_chop_cells(): + """Simple example of splitting cells into lines of width 3.""" + text = "abcdefghijk" + assert chop_cells(text, 3) == ["abc", "def", "ghi", "jk"] + + +def test_chop_cells_double_width_boundary(): + """The available width lies within a double-width character.""" + text = "ありがとう" + assert chop_cells(text, 3) == ["あ", "り", "が", "と", "う"] + + +def test_chop_cells_mixed_width(): + """Mixed single and double-width characters.""" + text = "あ1り234が5と6う78" + assert chop_cells(text, 3) == ["あ1", "り2", "34", "が5", "と6", "う7", "8"] diff --git a/tests/test_text.py b/tests/test_text.py index 22c34e44..18c91cff 100644 --- a/tests/test_text.py +++ b/tests/test_text.py @@ -449,6 +449,20 @@ def test_wrap_cjk_width_mid_character(): ] +def test_wrap_cjk_mixed(): + """Regression test covering https://github.com/Textualize/rich/issues/3176 and + https://github.com/Textualize/textual/issues/3567 - double width characters could + result in text going missing when wrapping.""" + text = Text("123ありがとうございました") + console = Console(width=20) # let's ensure the width passed to wrap() wins. + + wrapped_lines = text.wrap(console, width=8) + with console.capture() as capture: + console.print(wrapped_lines) + + assert capture.get() == "123あり\nがとうご\nざいまし\nた\n" + + def test_wrap_long(): text = Text("abracadabra", justify="left") lines = text.wrap(Console(), 4) @@ -497,6 +511,47 @@ def test_wrap_long_words_2(): ] +def test_wrap_long_words_followed_by_other_words(): + """After folding a word across multiple lines, we should continue from + the next word immediately after the folded word (don't take a newline + following completion of the folded word).""" + text = Text("123 12345678 123 123") + lines = text.wrap(Console(), 6) + assert lines._lines == [ + Text("123 "), + Text("123456"), + Text("78 123"), + Text("123"), + ] + + +def test_wrap_long_word_preceeded_by_word_of_full_line_length(): + """The width of the first word is the same as the available width. + Ensures that folding works correctly when there's no space available + on the current line.""" + text = Text("123456 12345678 123 123") + lines = text.wrap(Console(), 6) + assert lines._lines == [ + Text("123456"), + Text("123456"), + Text("78 123"), + Text("123"), + ] + + +def test_wrap_multiple_consecutive_spaces(): + """Adding multiple consecutive spaces at the end of a line does not impact + the location at which a break will be added during the process of wrapping.""" + text = Text("123456 12345678 123 123") + lines = text.wrap(Console(), 6) + assert lines._lines == [ + Text("123456"), + Text("123456"), + Text("78 123"), + Text("123"), + ] + + def test_wrap_long_words_justify_left(): text = Text("X 123456789", justify="left") lines = text.wrap(Console(), 4) @@ -508,6 +563,17 @@ def test_wrap_long_words_justify_left(): assert lines[3] == Text("9 ") +def test_wrap_leading_and_trailing_whitespace(): + text = Text(" 123 456 789 ") + lines = text.wrap(Console(), 4) + assert lines._lines == [ + Text(" 1"), + Text("23 "), + Text("456 "), + Text("789 "), + ] + + def test_no_wrap_no_crop(): text = Text("Hello World!" * 3)