From b735e58920c9da01ffab1d1508d26519ff394c4f Mon Sep 17 00:00:00 2001 From: udaken Date: Fri, 3 Jul 2020 20:14:23 +0900 Subject: [PATCH] Add Utf16ValueStringBuilder.Replace() --- .../Benchmarks/ReplaceBenchmark.cs | 189 ++++++++++++++++++ .../ZString/Utf16ValueStringBuilder.cs | 135 +++++++++++++ src/ZString/Utf16ValueStringBuilder.cs | 135 +++++++++++++ tests/ZString.Tests/ReplaceTest.cs | 76 +++++++ 4 files changed, 535 insertions(+) create mode 100644 sandbox/PerfBenchmark/Benchmarks/ReplaceBenchmark.cs create mode 100644 tests/ZString.Tests/ReplaceTest.cs diff --git a/sandbox/PerfBenchmark/Benchmarks/ReplaceBenchmark.cs b/sandbox/PerfBenchmark/Benchmarks/ReplaceBenchmark.cs new file mode 100644 index 0000000..ceed26b --- /dev/null +++ b/sandbox/PerfBenchmark/Benchmarks/ReplaceBenchmark.cs @@ -0,0 +1,189 @@ +using BenchmarkDotNet.Attributes; +using Cysharp.Text; +using System; +using System.Collections.Generic; +using System.Text; +using System.Text.Formatting; + +namespace PerfBenchmark.Benchmarks +{ + [Config(typeof(BenchmarkConfig))] + public class ReplaceBenchmark + { + StringBuilder bcl; + + string text = "The quick brown fox jumped over the lazy dogs."; + string largeText; + + string guid = Guid.NewGuid().ToString(); + + readonly string[] csharpKeywords = + { + "abstract", + "as", + "async", + "await", + "base", + "bool", + "break", + "byte", + "case", + "catch", + "char", + "checked", + "class", + "const", + "continue", + "decimal", + "default", + "delegate", + "do", + "double", + "else", + "enum", + "event", + "explicit", + "extern", + "false", + "finally", + "fixed", + "float", + "for", + "foreach", + "goto", + "if", + "implicit", + "in", + "int", + "interface", + "internal", + "is", + "lock", + "long", + "namespace", + "new", + "null", + "object", + "operator", + "out", + "override", + "params", + "private", + "protected", + "public", + "readonly", + "ref", + "return", + "sbyte", + "sealed", + "short", + "sizeof", + "stackalloc", + "static", + "string", + "struct", + "switch", + "this", + "throw", + "true", + "try", + "typeof", + "uint", + "ulong", + "unchecked", + "unsafe", + "ushort", + "using", + "virtual", + "volatile", + "void", + "while", + }; + + private static string GetThisFilePath([System.Runtime.CompilerServices.CallerFilePath] string path = null) => path; + + public ReplaceBenchmark() + { + bcl = new StringBuilder(); + largeText = System.IO.File.ReadAllText(GetThisFilePath()); //read this file + if (largeText.Length < 2048) + throw new Exception(); + } + + [Benchmark] + public int ReplaceChar() + { + bcl.Clear(); + return bcl.Append(text).Replace(' ', '\n').Length; + } + + [Benchmark] + public int ZReplaceChar() + { + using var zsb = ZString.CreateStringBuilder(true); + zsb.Append(text); + zsb.Replace(' ', '\n'); + return zsb.Length; // Use Length to avoid omitting it + } + + [Benchmark] + public int ReplaceString() + { + bcl.Clear(); + return bcl.Append(text).Replace(" ", "\r\n").Length; // Use Length to avoid omitting it + } + + [Benchmark] + public int ZReplaceString() + { + using var zsb = ZString.CreateStringBuilder(true); + zsb.Append(text); + zsb.Replace(" ", "\r\n"); + return zsb.Length; // Use Length to avoid omitting it + } + + [Benchmark] + public int NotReplaced() + { + bcl.Clear(); + bcl.Append(largeText); + bcl.Replace(guid, "XXXXXX"); // GUID value should not be included in this file. + return bcl.Length; // Use Length to avoid omitting it + } + + [Benchmark] + public int ZNotReplaced() + { + using var zsb = ZString.CreateStringBuilder(true); + zsb.Append(text); + zsb.Replace(guid, "XXXXXX"); // GUID value should not be included in this file. + return zsb.Length; // Use Length to avoid omitting it + } + + [Benchmark] + public int ManyTimesReplace() + { + bcl.Clear(); + bcl.Append(largeText); + // remove all keywords + foreach (var keyword in csharpKeywords) + { + bcl.Replace(keyword, ""); + } + return bcl.Length; // Use Length to avoid omitting it + } + + [Benchmark] + public int ZManyTimesReplace() + { + using var zsb = ZString.CreateStringBuilder(true); + zsb.Append(text); + // remove all keywords + foreach (var keyword in csharpKeywords) + { + zsb.Replace(keyword, ""); + } + return zsb.Length; // Use Length to avoid omitting it + } + } +} diff --git a/src/ZString.Unity/Assets/Scripts/ZString/Utf16ValueStringBuilder.cs b/src/ZString.Unity/Assets/Scripts/ZString/Utf16ValueStringBuilder.cs index 3b3ff8f..9dfe664 100644 --- a/src/ZString.Unity/Assets/Scripts/ZString/Utf16ValueStringBuilder.cs +++ b/src/ZString.Unity/Assets/Scripts/ZString/Utf16ValueStringBuilder.cs @@ -227,6 +227,141 @@ public void AppendLine(T value) AppendLine(); } + /// + /// Replaces all instances of one character with another in this builder. + /// + /// The character to replace. + /// The character to replace with. + public void Replace(char oldChar, char newChar) => Replace(oldChar, newChar, 0, Length); + + /// + /// Replaces all instances of one character with another in this builder. + /// + /// The character to replace. + /// The character to replace with. + /// The index to start in this builder. + /// The number of characters to read in this builder. + public void Replace(char oldChar, char newChar, int startIndex, int count) + { + int currentLength = Length; + if ((uint)startIndex > (uint)currentLength) + { + throw new ArgumentOutOfRangeException(nameof(startIndex)); + } + + if (count < 0 || startIndex > currentLength - count) + { + throw new ArgumentOutOfRangeException(nameof(count)); + } + + int endIndex = startIndex + count; + + for (int i = startIndex; i < endIndex; i++) + { + if (buffer[i] == oldChar) + { + buffer[i] = newChar; + } + } + } + + /// + /// Replaces all instances of one string with another in this builder. + /// + /// The string to replace. + /// The string to replace with. + /// + /// If is null, instances of + /// are removed from this builder. + /// + public void Replace(string oldValue, string newValue) => Replace(oldValue, newValue, 0, Length); + + /// + /// Replaces all instances of one string with another in part of this builder. + /// + /// The string to replace. + /// The string to replace with. + /// The index to start in this builder. + /// The number of characters to read in this builder. + /// + /// If is null, instances of + /// are removed from this builder. + /// + public void Replace(string oldValue, string newValue, int startIndex, int count) + { + int currentLength = Length; + + if ((uint)startIndex > (uint)currentLength) + { + throw new ArgumentOutOfRangeException(nameof(startIndex)); + } + + if (count < 0 || startIndex > currentLength - count) + { + throw new ArgumentOutOfRangeException(nameof(count)); + } + + if (oldValue == null) + { + throw new ArgumentNullException(nameof(oldValue)); + } + + if (oldValue.Length == 0) + { + throw new ArgumentException("oldValue.Length is 0", nameof(oldValue)); + } + + newValue = newValue ?? string.Empty; + + var readOnlySpan = AsSpan(); + int endIndex = startIndex + count; + int matchCount = 0; + + for (int i = startIndex; i < endIndex; i += oldValue.Length) + { + var span = readOnlySpan.Slice(i, endIndex - i); + var pos = span.IndexOf(oldValue.AsSpan(), StringComparison.Ordinal); + if (pos == -1) + { + break; + } + i += pos; + matchCount++; + } + + if (matchCount == 0) + return; + + var newBuffer = ArrayPool.Shared.Rent(Math.Max(DefaultBufferSize, Length + (newValue.Length - oldValue.Length) * matchCount)); + + buffer.AsSpan(0, startIndex).CopyTo(newBuffer); + int newBufferIndex = startIndex; + + for (int i = startIndex; i < endIndex; i += oldValue.Length) + { + var span = readOnlySpan.Slice(i, endIndex - i); + var pos = span.IndexOf(oldValue.AsSpan(), StringComparison.Ordinal); + if (pos == -1) + { + var remain = readOnlySpan.Slice(i); + remain.CopyTo(newBuffer.AsSpan(newBufferIndex)); + newBufferIndex += remain.Length; + break; + } + readOnlySpan.Slice(i, pos).CopyTo(newBuffer.AsSpan(newBufferIndex)); + newValue.AsSpan().CopyTo(newBuffer.AsSpan(newBufferIndex + pos)); + newBufferIndex += pos + newValue.Length; + i += pos; + } + + if (buffer.Length != ThreadStaticBufferSize) + { + ArrayPool.Shared.Return(buffer); + } + buffer = newBuffer; + index = newBufferIndex; + } + // Output /// Copy inner buffer to the destination span. diff --git a/src/ZString/Utf16ValueStringBuilder.cs b/src/ZString/Utf16ValueStringBuilder.cs index 3b3ff8f..9dfe664 100644 --- a/src/ZString/Utf16ValueStringBuilder.cs +++ b/src/ZString/Utf16ValueStringBuilder.cs @@ -227,6 +227,141 @@ public void AppendLine(T value) AppendLine(); } + /// + /// Replaces all instances of one character with another in this builder. + /// + /// The character to replace. + /// The character to replace with. + public void Replace(char oldChar, char newChar) => Replace(oldChar, newChar, 0, Length); + + /// + /// Replaces all instances of one character with another in this builder. + /// + /// The character to replace. + /// The character to replace with. + /// The index to start in this builder. + /// The number of characters to read in this builder. + public void Replace(char oldChar, char newChar, int startIndex, int count) + { + int currentLength = Length; + if ((uint)startIndex > (uint)currentLength) + { + throw new ArgumentOutOfRangeException(nameof(startIndex)); + } + + if (count < 0 || startIndex > currentLength - count) + { + throw new ArgumentOutOfRangeException(nameof(count)); + } + + int endIndex = startIndex + count; + + for (int i = startIndex; i < endIndex; i++) + { + if (buffer[i] == oldChar) + { + buffer[i] = newChar; + } + } + } + + /// + /// Replaces all instances of one string with another in this builder. + /// + /// The string to replace. + /// The string to replace with. + /// + /// If is null, instances of + /// are removed from this builder. + /// + public void Replace(string oldValue, string newValue) => Replace(oldValue, newValue, 0, Length); + + /// + /// Replaces all instances of one string with another in part of this builder. + /// + /// The string to replace. + /// The string to replace with. + /// The index to start in this builder. + /// The number of characters to read in this builder. + /// + /// If is null, instances of + /// are removed from this builder. + /// + public void Replace(string oldValue, string newValue, int startIndex, int count) + { + int currentLength = Length; + + if ((uint)startIndex > (uint)currentLength) + { + throw new ArgumentOutOfRangeException(nameof(startIndex)); + } + + if (count < 0 || startIndex > currentLength - count) + { + throw new ArgumentOutOfRangeException(nameof(count)); + } + + if (oldValue == null) + { + throw new ArgumentNullException(nameof(oldValue)); + } + + if (oldValue.Length == 0) + { + throw new ArgumentException("oldValue.Length is 0", nameof(oldValue)); + } + + newValue = newValue ?? string.Empty; + + var readOnlySpan = AsSpan(); + int endIndex = startIndex + count; + int matchCount = 0; + + for (int i = startIndex; i < endIndex; i += oldValue.Length) + { + var span = readOnlySpan.Slice(i, endIndex - i); + var pos = span.IndexOf(oldValue.AsSpan(), StringComparison.Ordinal); + if (pos == -1) + { + break; + } + i += pos; + matchCount++; + } + + if (matchCount == 0) + return; + + var newBuffer = ArrayPool.Shared.Rent(Math.Max(DefaultBufferSize, Length + (newValue.Length - oldValue.Length) * matchCount)); + + buffer.AsSpan(0, startIndex).CopyTo(newBuffer); + int newBufferIndex = startIndex; + + for (int i = startIndex; i < endIndex; i += oldValue.Length) + { + var span = readOnlySpan.Slice(i, endIndex - i); + var pos = span.IndexOf(oldValue.AsSpan(), StringComparison.Ordinal); + if (pos == -1) + { + var remain = readOnlySpan.Slice(i); + remain.CopyTo(newBuffer.AsSpan(newBufferIndex)); + newBufferIndex += remain.Length; + break; + } + readOnlySpan.Slice(i, pos).CopyTo(newBuffer.AsSpan(newBufferIndex)); + newValue.AsSpan().CopyTo(newBuffer.AsSpan(newBufferIndex + pos)); + newBufferIndex += pos + newValue.Length; + i += pos; + } + + if (buffer.Length != ThreadStaticBufferSize) + { + ArrayPool.Shared.Return(buffer); + } + buffer = newBuffer; + index = newBufferIndex; + } + // Output /// Copy inner buffer to the destination span. diff --git a/tests/ZString.Tests/ReplaceTest.cs b/tests/ZString.Tests/ReplaceTest.cs new file mode 100644 index 0000000..8c1ead1 --- /dev/null +++ b/tests/ZString.Tests/ReplaceTest.cs @@ -0,0 +1,76 @@ +using Cysharp.Text; +using FluentAssertions; +using System.Text; +using Xunit; + +namespace ZStringTests +{ + public class ReplaceTest + { + [Fact] + public void ReplaceCharTest() + { + var s = new string(' ', 10); + using (var zsb = ZString.CreateStringBuilder()) + { + zsb.Append(s); + zsb.Replace(' ', '-', 2, 5); + zsb.ToString().Should().Be(new StringBuilder(s).Replace(' ', '-', 2, 5).ToString()); + } + + s = "0"; + using (var zsb = ZString.CreateStringBuilder()) + { + zsb.Append(s); + zsb.Replace('0', '1'); + zsb.ToString().Should().Be(new StringBuilder(s).Replace('0', '1').ToString()); + } + } + + [Fact] + public void ReplaceStringTest() + { + using (var zsb = ZString.CreateStringBuilder(notNested: true)) + { + var text = "bra bra BRA bra bra"; + zsb.Append(text); + var bcl = new StringBuilder(text); + + zsb.Replace("bra", null, 1, text.Length - 2); + bcl.Replace("bra", null, 1, text.Length - 2); + + // "bra BRA bra" + zsb.ToString().Should().Be(bcl.ToString()); + } + + using (var zsb = ZString.CreateStringBuilder()) + { + var text = "The quick brown dog jumps over the lazy cat."; + zsb.Append(text); + var bcl = new StringBuilder(text); + + // All "cat" -> "dog" + zsb.Replace("cat", "dog"); + bcl.Replace("cat", "dog"); + zsb.ToString().Should().Be(bcl.ToString()); + + // Some "dog" -> "fox" + zsb.Replace("dog", "fox", 15, 20); + bcl.Replace("dog", "fox", 15, 20); + zsb.ToString().Should().Be(bcl.ToString()); + } + } + + [Fact] + public void NotMatchTest() + { + using (var zsb = ZString.CreateStringBuilder(notNested: true)) + { + var text = "The quick brown dog jumps over the lazy cat."; + zsb.Append(text); + zsb.Replace("pig", "dog"); + zsb.ToString().Should().Be(text); + } + } + } +}