From 59b1aca63bf9ec69ada93af960d8b2a7bd920477 Mon Sep 17 00:00:00 2001
From: Darren Burns <darrenburns@users.noreply.github.com>
Date: Tue, 14 Nov 2023 10:03:54 +0000
Subject: [PATCH] Fix double-width characters disappearing when wrapping
 (#3180)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Update docstring for `Text.wrap`s width parameter to indicate that it's referring to the number of *single-width* characters.

Also a small addition to the gitignore file.

* Working on double width wrapping fixes

* Chop cells to fit to width

* Fix folding when theres already text on line

* Update wrapping logic to fix issues with CJK charcters disappearing when the "fold" location sat *within* a double-width character. Ensure we retain browser logic of: if there is no space on the current line, move to a new line, and if theres not enough space on the entire new line, fold the text over multiple lines at appropriate locations.

* Remove old TODO comments

* Add regression test note

* Rename function to avoid breaking change

* Update CHANGELOG

* Remove old comment that is no longer relevant

* Cover off some wrapping edge cases

* Adding docstrings to tests explaining their purpose

* Renaming a local, function scope function alias

* Update rich/_wrap.py

Co-authored-by: Rodrigo Girão Serrão <5621605+rodrigogiraoserrao@users.noreply.github.com>

* PR feedback

* Testing wrapping with trailing and leading whitespace

* Improve docstring wording

---------

Co-authored-by: Rodrigo Girão Serrão <5621605+rodrigogiraoserrao@users.noreply.github.com>
---
 .gitignore          |  2 ++
 CHANGELOG.md        |  1 +
 rich/_wrap.py       | 73 ++++++++++++++++++++++++++++++++++-----------
 rich/cells.py       | 54 ++++++++++++++++++++-------------
 tests/test_cells.py | 19 ++++++++++++
 tests/test_text.py  | 66 ++++++++++++++++++++++++++++++++++++++++
 6 files changed, 177 insertions(+), 38 deletions(-)

diff --git a/.gitignore b/.gitignore
index b291d4c2..4d0a1d3a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -117,3 +117,5 @@ venv.bak/
 # airspeed velocity
 benchmarks/env/
 benchmarks/html/
+
+sandbox/
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 52548cc4..0817e86d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Fixed
 
+- Some text goes missing during wrapping when it contains double width characters https://github.com/Textualize/rich/issues/3176
 - Ensure font is correctly inherited in exported HTML https://github.com/Textualize/rich/issues/3104
 - Fixed typing for `FloatPrompt`.
 
diff --git a/rich/_wrap.py b/rich/_wrap.py
index c45f193f..2e94ff6f 100644
--- a/rich/_wrap.py
+++ b/rich/_wrap.py
@@ -1,5 +1,7 @@
+from __future__ import annotations
+
 import re
-from typing import Iterable, List, Tuple
+from typing import Iterable
 
 from ._loop import loop_last
 from .cells import cell_len, chop_cells
@@ -7,7 +9,11 @@ from .cells import cell_len, chop_cells
 re_word = re.compile(r"\s*\S+\s*")
 
 
-def words(text: str) -> Iterable[Tuple[int, int, str]]:
+def words(text: str) -> Iterable[tuple[int, int, str]]:
+    """Yields each word from the text as a tuple
+    containing (start_index, end_index, word). A "word" in this context may
+    include the actual word and any whitespace to the right.
+    """
     position = 0
     word_match = re_word.match(text, position)
     while word_match is not None:
@@ -17,35 +23,59 @@ def words(text: str) -> Iterable[Tuple[int, int, str]]:
         word_match = re_word.match(text, end)
 
 
-def divide_line(text: str, width: int, fold: bool = True) -> List[int]:
-    divides: List[int] = []
-    append = divides.append
-    line_position = 0
+def divide_line(text: str, width: int, fold: bool = True) -> list[int]:
+    """Given a string of text, and a width (measured in cells), return a list
+    of cell offsets which the string should be split at in order for it to fit
+    within the given width.
+
+    Args:
+        text: The text to examine.
+        width: The available cell width.
+        fold: If True, words longer than `width` will be folded onto a new line.
+
+    Returns:
+        A list of indices to break the line at.
+    """
+    break_positions: list[int] = []  # offsets to insert the breaks at
+    append = break_positions.append
+    cell_offset = 0
     _cell_len = cell_len
+
     for start, _end, word in words(text):
         word_length = _cell_len(word.rstrip())
-        if line_position + word_length > width:
+        remaining_space = width - cell_offset
+        word_fits_remaining_space = remaining_space >= word_length
+
+        if word_fits_remaining_space:
+            # Simplest case - the word fits within the remaining width for this line.
+            cell_offset += _cell_len(word)
+        else:
+            # Not enough space remaining for this word on the current line.
             if word_length > width:
+                # The word doesn't fit on any line, so we can't simply
+                # place it on the next line...
                 if fold:
-                    chopped_words = chop_cells(word, max_size=width, position=0)
-                    for last, line in loop_last(chopped_words):
+                    # Fold the word across multiple lines.
+                    folded_word = chop_cells(word, width=width)
+                    for last, line in loop_last(folded_word):
                         if start:
                             append(start)
-
                         if last:
-                            line_position = _cell_len(line)
+                            cell_offset = _cell_len(line)
                         else:
                             start += len(line)
                 else:
+                    # Folding isn't allowed, so crop the word.
                     if start:
                         append(start)
-                    line_position = _cell_len(word)
-            elif line_position and start:
+                    cell_offset = _cell_len(word)
+            elif cell_offset and start:
+                # The word doesn't fit within the remaining space on the current
+                # line, but it *can* fit on to the next (empty) line.
                 append(start)
-                line_position = _cell_len(word)
-        else:
-            line_position += _cell_len(word)
-    return divides
+                cell_offset = _cell_len(word)
+
+    return break_positions
 
 
 if __name__ == "__main__":  # pragma: no cover
@@ -53,4 +83,11 @@ if __name__ == "__main__":  # pragma: no cover
 
     console = Console(width=10)
     console.print("12345 abcdefghijklmnopqrstuvwyxzABCDEFGHIJKLMNOPQRSTUVWXYZ 12345")
-    print(chop_cells("abcdefghijklmnopqrstuvwxyz", 10, position=2))
+    print(chop_cells("abcdefghijklmnopqrstuvwxyz", 10))
+
+    console = Console(width=20)
+    console.rule()
+    console.print("TextualはPythonの高速アプリケーション開発フレームワークです")
+
+    console.rule()
+    console.print("アプリケーションは1670万色を使用でき")
diff --git a/rich/cells.py b/rich/cells.py
index 31d0c4cb..f85f928f 100644
--- a/rich/cells.py
+++ b/rich/cells.py
@@ -1,6 +1,8 @@
+from __future__ import annotations
+
 import re
 from functools import lru_cache
-from typing import Callable, List
+from typing import Callable
 
 from ._cell_widths import CELL_WIDTHS
 
@@ -119,27 +121,39 @@ def set_cell_size(text: str, total: int) -> str:
             start = pos
 
 
-# TODO: This is inefficient
-# TODO: This might not work with CWJ type characters
-def chop_cells(text: str, max_size: int, position: int = 0) -> List[str]:
-    """Break text in to equal (cell) length strings, returning the characters in reverse
-    order"""
-    _get_character_cell_size = get_character_cell_size
-    characters = [
-        (character, _get_character_cell_size(character)) for character in text
-    ]
-    total_size = position
-    lines: List[List[str]] = [[]]
-    append = lines[-1].append
+def chop_cells(
+    text: str,
+    width: int,
+) -> list[str]:
+    """Split text into lines such that each line fits within the available (cell) width.
 
-    for character, size in reversed(characters):
-        if total_size + size > max_size:
-            lines.append([character])
-            append = lines[-1].append
-            total_size = size
+    Args:
+        text: The text to fold such that it fits in the given width.
+        width: The width available (number of cells).
+
+    Returns:
+        A list of strings such that each string in the list has cell width
+        less than or equal to the available width.
+    """
+    _get_character_cell_size = get_character_cell_size
+    lines: list[list[str]] = [[]]
+
+    append_new_line = lines.append
+    append_to_last_line = lines[-1].append
+
+    total_width = 0
+
+    for character in text:
+        cell_width = _get_character_cell_size(character)
+        char_doesnt_fit = total_width + cell_width > width
+
+        if char_doesnt_fit:
+            append_new_line([character])
+            append_to_last_line = lines[-1].append
+            total_width = cell_width
         else:
-            total_size += size
-            append(character)
+            append_to_last_line(character)
+            total_width += cell_width
 
     return ["".join(line) for line in lines]
 
diff --git a/tests/test_cells.py b/tests/test_cells.py
index 50fa5219..fe451fa5 100644
--- a/tests/test_cells.py
+++ b/tests/test_cells.py
@@ -1,4 +1,5 @@
 from rich import cells
+from rich.cells import chop_cells
 
 
 def test_cell_len_long_string():
@@ -40,3 +41,21 @@ def test_set_cell_size_infinite():
             )
             == size
         )
+
+
+def test_chop_cells():
+    """Simple example of splitting cells into lines of width 3."""
+    text = "abcdefghijk"
+    assert chop_cells(text, 3) == ["abc", "def", "ghi", "jk"]
+
+
+def test_chop_cells_double_width_boundary():
+    """The available width lies within a double-width character."""
+    text = "ありがとう"
+    assert chop_cells(text, 3) == ["あ", "り", "が", "と", "う"]
+
+
+def test_chop_cells_mixed_width():
+    """Mixed single and double-width characters."""
+    text = "あ1り234が5と6う78"
+    assert chop_cells(text, 3) == ["あ1", "り2", "34", "が5", "と6", "う7", "8"]
diff --git a/tests/test_text.py b/tests/test_text.py
index 22c34e44..18c91cff 100644
--- a/tests/test_text.py
+++ b/tests/test_text.py
@@ -449,6 +449,20 @@ def test_wrap_cjk_width_mid_character():
     ]
 
 
+def test_wrap_cjk_mixed():
+    """Regression test covering https://github.com/Textualize/rich/issues/3176 and
+    https://github.com/Textualize/textual/issues/3567 - double width characters could
+    result in text going missing when wrapping."""
+    text = Text("123ありがとうございました")
+    console = Console(width=20)  # let's ensure the width passed to wrap() wins.
+
+    wrapped_lines = text.wrap(console, width=8)
+    with console.capture() as capture:
+        console.print(wrapped_lines)
+
+    assert capture.get() == "123あり\nがとうご\nざいまし\nた\n"
+
+
 def test_wrap_long():
     text = Text("abracadabra", justify="left")
     lines = text.wrap(Console(), 4)
@@ -497,6 +511,47 @@ def test_wrap_long_words_2():
     ]
 
 
+def test_wrap_long_words_followed_by_other_words():
+    """After folding a word across multiple lines, we should continue from
+    the next word immediately after the folded word (don't take a newline
+    following completion of the folded word)."""
+    text = Text("123 12345678 123 123")
+    lines = text.wrap(Console(), 6)
+    assert lines._lines == [
+        Text("123 "),
+        Text("123456"),
+        Text("78 123"),
+        Text("123"),
+    ]
+
+
+def test_wrap_long_word_preceeded_by_word_of_full_line_length():
+    """The width of the first word is the same as the available width.
+    Ensures that folding works correctly when there's no space available
+    on the current line."""
+    text = Text("123456 12345678 123 123")
+    lines = text.wrap(Console(), 6)
+    assert lines._lines == [
+        Text("123456"),
+        Text("123456"),
+        Text("78 123"),
+        Text("123"),
+    ]
+
+
+def test_wrap_multiple_consecutive_spaces():
+    """Adding multiple consecutive spaces at the end of a line does not impact
+    the location at which a break will be added during the process of wrapping."""
+    text = Text("123456    12345678 123 123")
+    lines = text.wrap(Console(), 6)
+    assert lines._lines == [
+        Text("123456"),
+        Text("123456"),
+        Text("78 123"),
+        Text("123"),
+    ]
+
+
 def test_wrap_long_words_justify_left():
     text = Text("X 123456789", justify="left")
     lines = text.wrap(Console(), 4)
@@ -508,6 +563,17 @@ def test_wrap_long_words_justify_left():
     assert lines[3] == Text("9   ")
 
 
+def test_wrap_leading_and_trailing_whitespace():
+    text = Text("   123  456 789   ")
+    lines = text.wrap(Console(), 4)
+    assert lines._lines == [
+        Text("   1"),
+        Text("23  "),
+        Text("456 "),
+        Text("789 "),
+    ]
+
+
 def test_no_wrap_no_crop():
     text = Text("Hello World!" * 3)