Skip to content

Commit

Permalink
Initial work on fixing some text tool issues with Unicode characters.
Browse files Browse the repository at this point in the history
There currently are assumptions that each char is a single visible character, which is not the case for surrogate pairs, combining sequences, etc

This change updates the left / right key handlers to use the StringInfo class to advance through text elements rather than chars. The other key hanlders will need similar changes.

Also adjusted the naming of IndexToPosition() and related methods to make it clear that they are dealing with UTF8 byte indices, not (UTF16) char indices or text element indices.

Bug: 1422445
  • Loading branch information
cameronwhite committed Dec 28, 2020
1 parent f5be9d4 commit 1c57a84
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 31 deletions.
67 changes: 46 additions & 21 deletions Pinta.Core/Classes/Re-editable/Text/TextEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
using Pinta.Core;
using System.Reflection;
using System.Linq;
using System.Globalization;

namespace Pinta.Core
{
Expand Down Expand Up @@ -241,13 +242,14 @@ public void PerformLeft (bool control, bool shift)
}

// Move caret to the left, or to the previous line
if (currentPos.Offset > 0)
currentPos.Offset--;
else if (currentPos.Offset == 0 && currentPos.Line > 0) {
if (currentPos.Offset > 0) {
var currentLine = lines[currentPos.Line];
FindTextElementIndex (currentLine, currentPos.Offset, out var elements, out var elementIndex);
currentPos.Offset = elements[elementIndex - 1];
} else if (currentPos.Offset == 0 && currentPos.Line > 0) {
currentPos.Line--;
currentPos.Offset = lines[currentPos.Line].Length;
} else
return;
}

if (!shift)
ClearSelection ();
Expand Down Expand Up @@ -296,14 +298,19 @@ public void PerformRight (bool control, bool shift)
return;
}

// Move caret to the right, or to the next line
if (currentPos.Offset < lines[currentPos.Line].Length) {
currentPos.Offset++;
} else if (currentPos.Offset == lines[currentPos.Line].Length && currentPos.Line < lines.Count - 1) {
var currentLine = lines[currentPos.Line];
if (currentPos.Offset < currentLine.Length) {
FindTextElementIndex (currentLine, currentPos.Offset, out var elements, out var elementIndex);
if (elementIndex < elements.Length - 1)
currentPos.Offset = elements[elementIndex + 1];
else
currentPos.Offset = currentLine.Length;

} else if (currentPos.Offset == currentLine.Length && currentPos.Line < lines.Count - 1) {
currentPos.Line++;
currentPos.Offset = 0;
} else
return;

}

if (!shift)
ClearSelection ();
Expand Down Expand Up @@ -481,52 +488,52 @@ private void ForeachLine (TextPosition start, TextPosition end, Action action)
action (start.Line, start.Offset, end.Offset);
}

public TextPosition IndexToPosition (int index)
public TextPosition UTF8IndexToPosition (int index)
{
int current = 0;
int line = 0;
int offset = 0;

foreach (string s in lines) {
// It's past this line, move along
if (current + StringToByteSize (s) < index) {
current += StringToByteSize (s) + 1;
if (current + StringToUTF8Size (s) < index) {
current += StringToUTF8Size (s) + 1;
line++;
continue;
}

// It's in this line
offset = index - current;
offset = ByteOffsetToCharacterOffset (lines[line], offset);
offset = UTF8OffsetToCharacterOffset (lines[line], offset);
return new TextPosition (line, offset);
}

// It's below all of our lines, return the end of the last line
return new TextPosition (lines.Count - 1, lines[lines.Count - 1].Length);
}

public int PositionToIndex (TextPosition p)
public int PositionToUTF8Index (TextPosition p)
{
int index = 0;

for (int i = 0; i < p.Line; i++)
index += StringToByteSize (lines[i]) + 1;
index += StringToUTF8Size (lines[i]) + 1;

index += StringToByteSize (lines[p.Line].Substring (0, p.Offset));
index += StringToUTF8Size (lines[p.Line].Substring (0, p.Offset));
return index;
}

private int StringToByteSize (string s)
private int StringToUTF8Size (string s)
{
System.Text.UTF8Encoding enc = new System.Text.UTF8Encoding ();
return (enc.GetBytes (s)).Length;
}

private int ByteOffsetToCharacterOffset (string s, int offset)
private int UTF8OffsetToCharacterOffset (string s, int offset)
{
int i = 0;
for (i = 0; i < offset; i++) {
if (StringToByteSize (s.Substring (0, i)) >= offset) break;
if (StringToUTF8Size (s.Substring (0, i)) >= offset) break;
}
return i;
}
Expand Down Expand Up @@ -590,6 +597,24 @@ private void ClearSelection ()
selectionStart = currentPos;
}

/// <summary>
/// Returns a list of the char indices where each text element begins, along with
/// the element index corresponding to the specified character.
/// </summary>
private static void FindTextElementIndex (string s, int charIndex, out int[] elements, out int elementIndex)
{
elements = StringInfo.ParseCombiningCharacters (s);

// It's valid to position the caret after the last character in the line.
if (charIndex == s.Length)
elementIndex = elements.Length;
else {
elementIndex = Array.FindIndex (elements, i => i == charIndex);
if (elementIndex < 0)
throw new InvalidOperationException ("Text position is not at the beginning of a text element");
}
}

#endregion
}
}
6 changes: 3 additions & 3 deletions Pinta.Core/Classes/Re-editable/Text/TextLayout.cs
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ public Rectangle GetCursorLocation ()
{
Pango.Rectangle weak, strong;

int index = engine.PositionToIndex (engine.CurrentPosition);
int index = engine.PositionToUTF8Index (engine.CurrentPosition);

Layout.GetCursorPos (index, out strong, out weak);

Expand Down Expand Up @@ -108,12 +108,12 @@ public TextPosition PointToTextPosition (Point point)

Layout.XyToIndex (x, y, out index, out trailing);

return engine.IndexToPosition (index + trailing);
return engine.UTF8IndexToPosition (index + trailing);
}

public Point TextPositionToPoint (TextPosition p)
{
int index = engine.PositionToIndex (p);
int index = engine.PositionToUTF8Index (p);

var rect = Layout.IndexToPos (index);

Expand Down
27 changes: 20 additions & 7 deletions tests/Pinta.Core.Tests/TextEngineTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -61,20 +61,33 @@ public void DeleteSelection ()
}

[Test]
public void PerformLeft ()
public void PerformLeftRight ()
{
var engine = new TextEngine (new List<string> () { "foo", "bar" });
// The string below contains combining characters, so there are fewer text elements than chars.
var engine = new TextEngine (new List<string> () { "a\u0304\u0308bc\u0327", "c\u0327ba\u0304\u0308" });

engine.SetCursorPosition (new TextPosition (1, 0), true);
engine.PerformLeft (false, false);
Assert.AreEqual (new TextPosition (0, 3), engine.CurrentPosition);
engine.SetCursorPosition (new TextPosition (0, 3), true);
engine.PerformRight (false, false);
Assert.AreEqual (new TextPosition (0, 4), engine.CurrentPosition);
engine.PerformRight (false, false);
Assert.AreEqual (new TextPosition (0, 6), engine.CurrentPosition);
engine.PerformRight (false, false);
engine.PerformRight (false, false);
Assert.AreEqual (new TextPosition (1, 2), engine.CurrentPosition);

engine.SetCursorPosition (new TextPosition (0, 1), true);
engine.PerformLeft (false, false);
Assert.AreEqual (new TextPosition (0, 0), engine.CurrentPosition);
Assert.AreEqual (new TextPosition (1, 0), engine.CurrentPosition);
engine.PerformLeft (false, false);
Assert.AreEqual (new TextPosition (0, 6), engine.CurrentPosition);

// Should stay at the beginning / end when attempting to advance further.
engine.SetCursorPosition (new TextPosition (0, 0), true);
engine.PerformLeft (false, false);
Assert.AreEqual (new TextPosition (0, 0), engine.CurrentPosition);

engine.SetCursorPosition (new TextPosition (1, 6), true);
engine.PerformRight (false, false);
Assert.AreEqual (new TextPosition (1, 6), engine.CurrentPosition);
}
}
}

0 comments on commit 1c57a84

Please sign in to comment.