Skip to content

Commit 953cfe5

Browse files
committed
Add grapheme clustering support for cursor movement
**Problem** Test sequence (copy and paste into any REPL/edit area):: πŸ‘¨β€πŸ‘©β€πŸ‘§ πŸ‘©β€β€β€πŸ‘¨ πŸ‘©β€πŸ’»πŸ‘‹πŸΏ ❀️⭐ πŸ‡―πŸ‡΅πŸ‡©πŸ‡ͺ café ninΜƒo ÅoΜ‚Μ£ δΈ­ζ–‡!. Moving the cursor over and around emojis get strange. insertions become chaotic. Cursor position becomes indeterminate (even negative!), input result becomes more corrupted with user confusion as draws become corrupted. This is briefly described in #274 by @jonathanslenders: > Notice that it still requires multiple cursor movements (left/right arrow) to move across these characters. **Solution**: Close #274 "Handle decomposed unicode characters" (2018) through careful integration of new functions, [wcwidth.iter_graphemes](https://wcwidth.readthedocs.io/en/latest/intro.html#iter-graphemes) and [wcwidth.grapheme_boundary_before](https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.grapheme_boundary_before). getting there, working on a PTY test suite I don't feel comfortable changing so much code for a large library without also including more detailed tests -- i keep fixing all errors with TDD/automatic tests, but when using it interactively, the cursor position is out of control
1 parent c7c629c commit 953cfe5

File tree

15 files changed

+742
-82
lines changed

15 files changed

+742
-82
lines changed

β€Ž.github/workflows/test.yamlβ€Ž

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ jobs:
3333
- name: Unit test
3434
run: |
3535
uvx --with . --with pytest coverage run -m pytest tests/
36+
uvx coverage combine
37+
uvx coverage report
3638
- name: Type Checking
3739
if: ${{ matrix.python-version != '3.8' }}
3840
run: |

β€Žpyproject.tomlβ€Ž

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ classifiers = [
2222
]
2323
requires-python = ">=3.8"
2424
dependencies = [
25-
"wcwidth>=0.1.4",
25+
"wcwidth>=0.5.1",
2626
]
2727

2828
[project.urls]
@@ -118,6 +118,10 @@ warn_return_any = true
118118
warn_unused_configs = true
119119
warn_unused_ignores = true
120120

121+
[tool.coverage.run]
122+
source = ["src/prompt_toolkit"]
123+
parallel = true
124+
121125
[build-system]
122126
requires = ["setuptools>=68"]
123127
build-backend = "setuptools.build_meta"

β€Žsrc/prompt_toolkit/buffer.pyβ€Ž

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
from functools import wraps
1919
from typing import Any, Callable, Coroutine, Iterable, TypeVar, cast
2020

21+
import wcwidth
22+
2123
from .application.current import get_app
2224
from .application.run_in_terminal import run_in_terminal
2325
from .auto_suggest import AutoSuggest, Suggestion
@@ -764,20 +766,24 @@ def auto_down(
764766

765767
def delete_before_cursor(self, count: int = 1) -> str:
766768
"""
767-
Delete specified number of characters before cursor and return the
768-
deleted text.
769+
Delete specified number of grapheme clusters before cursor and return
770+
the deleted text.
769771
"""
770772
assert count >= 0
771773
deleted = ""
772774

773775
if self.cursor_position > 0:
774-
deleted = self.text[self.cursor_position - count : self.cursor_position]
775-
776-
new_text = (
777-
self.text[: self.cursor_position - count]
778-
+ self.text[self.cursor_position :]
779-
)
780-
new_cursor_position = self.cursor_position - len(deleted)
776+
# Find position after deleting `count` grapheme clusters.
777+
# Loop is required since grapheme clusters have variable length.
778+
pos = self.cursor_position
779+
for _ in range(count):
780+
if pos <= 0:
781+
break
782+
pos = wcwidth.grapheme_boundary_before(self.text, pos)
783+
784+
deleted = self.text[pos:self.cursor_position]
785+
new_text = self.text[:pos] + self.text[self.cursor_position :]
786+
new_cursor_position = pos
781787

782788
# Set new Document atomically.
783789
self.document = Document(new_text, new_cursor_position)
@@ -786,14 +792,19 @@ def delete_before_cursor(self, count: int = 1) -> str:
786792

787793
def delete(self, count: int = 1) -> str:
788794
"""
789-
Delete specified number of characters and Return the deleted text.
795+
Delete specified number of grapheme clusters and return the deleted text.
790796
"""
791797
if self.cursor_position < len(self.text):
792-
deleted = self.document.text_after_cursor[:count]
793-
self.text = (
794-
self.text[: self.cursor_position]
795-
+ self.text[self.cursor_position + len(deleted) :]
796-
)
798+
# Find position after `count` grapheme clusters.
799+
text_after = self.text[self.cursor_position :]
800+
pos = 0
801+
for i, grapheme in enumerate(wcwidth.iter_graphemes(text_after)):
802+
if i >= count:
803+
break
804+
pos += len(grapheme)
805+
806+
deleted = text_after[:pos]
807+
self.text = self.text[: self.cursor_position] + text_after[pos:]
797808
return deleted
798809
else:
799810
return ""

β€Žsrc/prompt_toolkit/document.pyβ€Ž

Lines changed: 69 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
import weakref
1111
from typing import Callable, Dict, Iterable, List, NoReturn, Pattern, cast
1212

13+
import wcwidth
14+
1315
from .clipboard import ClipboardData
1416
from .filters import vi_mode
1517
from .selection import PasteMode, SelectionState, SelectionType
@@ -158,13 +160,49 @@ def selection(self) -> SelectionState | None:
158160

159161
@property
160162
def current_char(self) -> str:
161-
"""Return character under cursor or an empty string."""
162-
return self._get_char_relative_to_cursor(0) or ""
163+
"""
164+
Return grapheme cluster at cursor position, or empty string at end.
165+
166+
Note: Returns a grapheme cluster which may contain multiple code points.
167+
If cursor is inside a grapheme cluster (e.g., on a combining character),
168+
returns the complete grapheme containing the cursor.
169+
"""
170+
if self.cursor_position >= len(self.text):
171+
return ""
172+
grapheme_start = wcwidth.grapheme_boundary_before(
173+
self.text, self.cursor_position + 1
174+
)
175+
for g in wcwidth.iter_graphemes(self.text[grapheme_start:]):
176+
return g
177+
return ""
163178

164179
@property
165180
def char_before_cursor(self) -> str:
166-
"""Return character before the cursor or an empty string."""
167-
return self._get_char_relative_to_cursor(-1) or ""
181+
"""
182+
Return grapheme cluster before the cursor, or empty string at start.
183+
184+
Note: Returns a grapheme cluster which may contain multiple code points.
185+
If cursor is inside a grapheme cluster (e.g., on a combining character),
186+
returns the grapheme before the one containing the cursor.
187+
"""
188+
if self.cursor_position == 0:
189+
return ""
190+
191+
text = self.text
192+
cursor = self.cursor_position
193+
194+
# Find reference point: cursor position or start of containing grapheme.
195+
if cursor >= len(text):
196+
reference = len(text)
197+
else:
198+
grapheme_start = wcwidth.grapheme_boundary_before(text, cursor + 1)
199+
reference = grapheme_start if grapheme_start < cursor else cursor
200+
201+
if reference == 0:
202+
return ""
203+
204+
prev_start = wcwidth.grapheme_boundary_before(text, reference)
205+
return text[prev_start:reference]
168206

169207
@property
170208
def text_before_cursor(self) -> str:
@@ -251,15 +289,6 @@ def leading_whitespace_in_current_line(self) -> str:
251289
length = len(current_line) - len(current_line.lstrip())
252290
return current_line[:length]
253291

254-
def _get_char_relative_to_cursor(self, offset: int = 0) -> str:
255-
"""
256-
Return character relative to cursor position, or empty string
257-
"""
258-
try:
259-
return self.text[self.cursor_position + offset]
260-
except IndexError:
261-
return ""
262-
263292
@property
264293
def on_first_line(self) -> bool:
265294
"""
@@ -692,21 +721,44 @@ def find_previous_matching_line(
692721

693722
def get_cursor_left_position(self, count: int = 1) -> int:
694723
"""
695-
Relative position for cursor left.
724+
Relative position for cursor left (grapheme cluster aware).
696725
"""
697726
if count < 0:
698727
return self.get_cursor_right_position(-count)
699728

700-
return -min(self.cursor_position_col, count)
729+
line_before = self.current_line_before_cursor
730+
if not line_before:
731+
return 0
732+
733+
pos = len(line_before)
734+
for _ in range(count):
735+
if pos <= 0:
736+
break
737+
new_pos = wcwidth.grapheme_boundary_before(line_before, pos)
738+
if new_pos == pos:
739+
break
740+
pos = new_pos
741+
742+
return pos - len(line_before)
701743

702744
def get_cursor_right_position(self, count: int = 1) -> int:
703745
"""
704-
Relative position for cursor_right.
746+
Relative position for cursor right (grapheme cluster aware).
705747
"""
706748
if count < 0:
707749
return self.get_cursor_left_position(-count)
708750

709-
return min(count, len(self.current_line_after_cursor))
751+
line_after = self.current_line_after_cursor
752+
if not line_after:
753+
return 0
754+
755+
pos = 0
756+
for i, grapheme in enumerate(wcwidth.iter_graphemes(line_after)):
757+
if i >= count:
758+
break
759+
pos += len(grapheme)
760+
761+
return pos
710762

711763
def get_cursor_up_position(
712764
self, count: int = 1, preferred_column: int | None = None

β€Žsrc/prompt_toolkit/formatted_text/utils.pyβ€Ž

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from typing import Iterable, cast
1111

12-
from prompt_toolkit.utils import get_cwidth
12+
import wcwidth
1313

1414
from .base import (
1515
AnyFormattedText,
@@ -48,17 +48,15 @@ def fragment_list_len(fragments: StyleAndTextTuples) -> int:
4848
def fragment_list_width(fragments: StyleAndTextTuples) -> int:
4949
"""
5050
Return the character width of this text fragment list.
51-
(Take double width characters into account.)
51+
(Take double width characters and grapheme clusters into account.)
5252
5353
:param fragments: List of ``(style_str, text)`` or
5454
``(style_str, text, mouse_handler)`` tuples.
5555
"""
56-
ZeroWidthEscape = "[ZeroWidthEscape]"
5756
return sum(
58-
get_cwidth(c)
57+
wcwidth.width(item[1], control_codes="ignore")
5958
for item in fragments
60-
for c in item[1]
61-
if ZeroWidthEscape not in item[0]
59+
if "[ZeroWidthEscape]" not in item[0]
6260
)
6361

6462

β€Žsrc/prompt_toolkit/layout/containers.pyβ€Ž

Lines changed: 4 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
from functools import partial
1111
from typing import TYPE_CHECKING, Callable, Sequence, Union, cast
1212

13+
import wcwidth
14+
1315
from prompt_toolkit.application.current import get_app
1416
from prompt_toolkit.cache import SimpleCache
1517
from prompt_toolkit.data_structures import Point
@@ -2014,7 +2016,7 @@ def copy_line(
20142016
new_screen.zero_width_escapes[y + ypos][x + xpos] += text
20152017
continue
20162018

2017-
for c in text:
2019+
for c in wcwidth.iter_graphemes(text):
20182020
char = _CHAR_CACHE[c, style]
20192021
char_width = char.width
20202022

@@ -2052,26 +2054,7 @@ def copy_line(
20522054
for i in range(1, char_width):
20532055
new_buffer_row[x + xpos + i] = empty_char
20542056

2055-
# If this is a zero width characters, then it's
2056-
# probably part of a decomposed unicode character.
2057-
# See: https://en.wikipedia.org/wiki/Unicode_equivalence
2058-
# Merge it in the previous cell.
2059-
elif char_width == 0:
2060-
# Handle all character widths. If the previous
2061-
# character is a multiwidth character, then
2062-
# merge it two positions back.
2063-
for pw in [2, 1]: # Previous character width.
2064-
if (
2065-
x - pw >= 0
2066-
and new_buffer_row[x + xpos - pw].width == pw
2067-
):
2068-
prev_char = new_buffer_row[x + xpos - pw]
2069-
char2 = _CHAR_CACHE[
2070-
prev_char.char + c, prev_char.style
2071-
]
2072-
new_buffer_row[x + xpos - pw] = char2
2073-
2074-
# Keep track of write position for each character.
2057+
# Keep track of write position for each grapheme.
20752058
current_rowcol_to_yx[lineno, col + skipped] = (
20762059
y + ypos,
20772060
x + xpos,

β€Žsrc/prompt_toolkit/layout/controls.pyβ€Ž

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
from abc import ABCMeta, abstractmethod
99
from typing import TYPE_CHECKING, Callable, Hashable, Iterable, NamedTuple
1010

11+
import wcwidth
12+
1113
from prompt_toolkit.application.current import get_app
1214
from prompt_toolkit.buffer import Buffer
1315
from prompt_toolkit.cache import SimpleCache
@@ -674,29 +676,45 @@ def transform(
674676
) -> _ProcessedLine:
675677
"Transform the fragments for a given line number."
676678

677-
# Get cursor position at this line.
678-
def source_to_display(i: int) -> int:
679-
"""X position from the buffer to the x position in the
680-
processed fragment list. By default, we start from the 'identity'
681-
operation."""
682-
return i
679+
# Build code point to grapheme index mapping for cursor positioning.
680+
line_text = fragment_list_to_text(fragments)
681+
codepoint_to_grapheme: dict[int, int] = {}
682+
grapheme_idx = 0
683+
codepoint_idx = 0
684+
for grapheme in wcwidth.iter_graphemes(line_text):
685+
for _ in grapheme:
686+
codepoint_to_grapheme[codepoint_idx] = grapheme_idx
687+
codepoint_idx += 1
688+
grapheme_idx += 1
689+
690+
def grapheme_source_to_display(i: int) -> int:
691+
"""Map code point index to grapheme index."""
692+
if i >= codepoint_idx:
693+
return grapheme_idx + (i - codepoint_idx)
694+
return codepoint_to_grapheme.get(i, grapheme_idx)
683695

684696
transformation = merged_processor.apply_transformation(
685697
TransformationInput(
686698
self,
687699
document,
688700
lineno,
689-
source_to_display,
701+
grapheme_source_to_display,
690702
fragments,
691703
width,
692704
height,
693705
get_line,
694706
)
695707
)
696708

709+
# Compose grapheme mapping with processor transformations.
710+
proc_s2d = transformation.source_to_display
711+
712+
def final_source_to_display(i: int) -> int:
713+
return proc_s2d(grapheme_source_to_display(i))
714+
697715
return _ProcessedLine(
698716
transformation.fragments,
699-
transformation.source_to_display,
717+
final_source_to_display,
700718
transformation.display_to_source,
701719
)
702720

β€Žsrc/prompt_toolkit/layout/utils.pyβ€Ž

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
from typing import TYPE_CHECKING, Iterable, List, TypeVar, cast, overload
44

5+
import wcwidth
6+
57
from prompt_toolkit.formatted_text.base import OneStyleAndTextTuple
68

79
if TYPE_CHECKING:
@@ -60,7 +62,7 @@ def __setitem__(
6062
def explode_text_fragments(fragments: Iterable[_T]) -> _ExplodedList[_T]:
6163
"""
6264
Turn a list of (style_str, text) tuples into another list where each string is
63-
exactly one character.
65+
exactly one grapheme cluster.
6466
6567
It should be fine to call this function several times. Calling this on a
6668
list that is already exploded, is a null operation.
@@ -74,7 +76,7 @@ def explode_text_fragments(fragments: Iterable[_T]) -> _ExplodedList[_T]:
7476
result: list[_T] = []
7577

7678
for style, string, *rest in fragments:
77-
for c in string:
78-
result.append((style, c, *rest)) # type: ignore
79+
for grapheme in wcwidth.iter_graphemes(string):
80+
result.append((style, grapheme, *rest)) # type: ignore
7981

8082
return _ExplodedList(result)

0 commit comments

Comments
Β (0)