Skip to content

Commit 2c80b22

Browse files
committed
Optimize StemmerUtil for ReadOnlySpan<char>, apache#1140
1 parent a96e768 commit 2c80b22

File tree

2 files changed

+127
-41
lines changed

2 files changed

+127
-41
lines changed
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
// Lucene version compatibility level 4.8.1
22
using Lucene.Net.Diagnostics;
3-
using Lucene.Net.Support;
43
using System;
5-
using System.Diagnostics;
64

75
namespace Lucene.Net.Analysis.Util
86
{
@@ -25,7 +23,7 @@ namespace Lucene.Net.Analysis.Util
2523

2624
/// <summary>
2725
/// Some commonly-used stemming functions
28-
///
26+
///
2927
/// @lucene.internal
3028
/// </summary>
3129
public static class StemmerUtil // LUCENENET specific: CA1052 Static holder types should be Static or NotInheritable
@@ -37,21 +35,34 @@ public static class StemmerUtil // LUCENENET specific: CA1052 Static holder type
3735
/// <param name="len"> length of input buffer </param>
3836
/// <param name="prefix"> Prefix string to test </param>
3937
/// <returns> <c>true</c> if <paramref name="s"/> starts with <paramref name="prefix"/> </returns>
40-
public static bool StartsWith(char[] s, int len, string prefix)
38+
/// <remarks>
39+
/// LUCENENET NOTE: This method has been converted to use <see cref="ReadOnlySpan{T}"/>.
40+
/// </remarks>
41+
public static bool StartsWith(ReadOnlySpan<char> s, int len, string prefix)
42+
{
43+
return StartsWith(s, len, prefix.AsSpan());
44+
}
45+
46+
/// <summary>
47+
/// Returns true if the character array starts with the prefix.
48+
/// </summary>
49+
/// <param name="s"> Input Buffer </param>
50+
/// <param name="len"> length of input buffer </param>
51+
/// <param name="prefix"> Prefix string to test </param>
52+
/// <returns> <c>true</c> if <paramref name="s"/> starts with <paramref name="prefix"/> </returns>
53+
/// <remarks>
54+
/// LUCENENET NOTE: This method has been converted to use <see cref="ReadOnlySpan{T}"/>.
55+
/// </remarks>
56+
public static bool StartsWith(ReadOnlySpan<char> s, int len, ReadOnlySpan<char> prefix)
4157
{
4258
int prefixLen = prefix.Length;
4359
if (prefixLen > len)
4460
{
4561
return false;
4662
}
47-
for (int i = 0; i < prefixLen; i++)
48-
{
49-
if (s[i] != prefix[i])
50-
{
51-
return false;
52-
}
53-
}
54-
return true;
63+
64+
// LUCENENET: use more efficient implementation in MemoryExtensions
65+
return s.StartsWith(prefix);
5566
}
5667

5768
/// <summary>
@@ -61,22 +72,12 @@ public static bool StartsWith(char[] s, int len, string prefix)
6172
/// <param name="len"> length of input buffer </param>
6273
/// <param name="suffix"> Suffix string to test </param>
6374
/// <returns> <c>true</c> if <paramref name="s"/> ends with <paramref name="suffix"/> </returns>
64-
public static bool EndsWith(char[] s, int len, string suffix)
75+
/// <remarks>
76+
/// LUCENENET NOTE: This method has been converted to use <see cref="ReadOnlySpan{T}"/>.
77+
/// </remarks>
78+
public static bool EndsWith(ReadOnlySpan<char> s, int len, string suffix)
6579
{
66-
int suffixLen = suffix.Length;
67-
if (suffixLen > len)
68-
{
69-
return false;
70-
}
71-
for (int i = suffixLen - 1; i >= 0; i--)
72-
{
73-
if (s[len - (suffixLen - i)] != suffix[i])
74-
{
75-
return false;
76-
}
77-
}
78-
79-
return true;
80+
return EndsWith(s, len, suffix.AsSpan());
8081
}
8182

8283
/// <summary>
@@ -86,37 +87,40 @@ public static bool EndsWith(char[] s, int len, string suffix)
8687
/// <param name="len"> length of input buffer </param>
8788
/// <param name="suffix"> Suffix string to test </param>
8889
/// <returns> <c>true</c> if <paramref name="s"/> ends with <paramref name="suffix"/> </returns>
89-
public static bool EndsWith(char[] s, int len, char[] suffix)
90+
/// <remarks>
91+
/// LUCENENET NOTE: This method has been converted to use <see cref="ReadOnlySpan{T}"/>.
92+
/// </remarks>
93+
public static bool EndsWith(ReadOnlySpan<char> s, int len, ReadOnlySpan<char> suffix)
9094
{
9195
int suffixLen = suffix.Length;
9296
if (suffixLen > len)
9397
{
9498
return false;
9599
}
96-
for (int i = suffixLen - 1; i >= 0; i--)
97-
{
98-
if (s[len - (suffixLen - i)] != suffix[i])
99-
{
100-
return false;
101-
}
102-
}
103100

104-
return true;
101+
// LUCENENET: use more efficient implementation in MemoryExtensions
102+
return s.Slice(0, len).EndsWith(suffix);
105103
}
106104

105+
// LUCENENET NOTE: char[] overload of EndsWith removed because the ReadOnlySpan<char> overload can be used instead
106+
107107
/// <summary>
108108
/// Delete a character in-place
109109
/// </summary>
110110
/// <param name="s"> Input Buffer </param>
111111
/// <param name="pos"> Position of character to delete </param>
112112
/// <param name="len"> length of input buffer </param>
113113
/// <returns> length of input buffer after deletion </returns>
114-
public static int Delete(char[] s, int pos, int len)
114+
/// <remarks>
115+
/// LUCENENET NOTE: This method has been converted to use <see cref="Span{T}"/>.
116+
/// </remarks>
117+
public static int Delete(Span<char> s, int pos, int len)
115118
{
116119
if (Debugging.AssertsEnabled) Debugging.Assert(pos < len);
117120
if (pos < len - 1) // don't arraycopy if asked to delete last character
118121
{
119-
Arrays.Copy(s, pos + 1, s, pos, len - pos - 1);
122+
// Arrays.Copy(s, pos + 1, s, pos, len - pos - 1);
123+
s.Slice(pos + 1, len - pos - 1).CopyTo(s.Slice(pos, len - pos - 1));
120124
}
121125
return len - 1;
122126
}
@@ -129,14 +133,18 @@ public static int Delete(char[] s, int pos, int len)
129133
/// <param name="len"> Length of input buffer </param>
130134
/// <param name="nChars"> number of characters to delete </param>
131135
/// <returns> length of input buffer after deletion </returns>
132-
public static int DeleteN(char[] s, int pos, int len, int nChars)
136+
/// <remarks>
137+
/// LUCENENET NOTE: This method has been converted to use <see cref="Span{T}"/>.
138+
/// </remarks>
139+
public static int DeleteN(Span<char> s, int pos, int len, int nChars)
133140
{
134141
if (Debugging.AssertsEnabled) Debugging.Assert(pos + nChars <= len);
135142
if (pos + nChars < len) // don't arraycopy if asked to delete the last characters
136143
{
137-
Arrays.Copy(s, pos + nChars, s, pos, len - pos - nChars);
144+
// Arrays.Copy(s, pos + nChars, s, pos, len - pos - nChars);
145+
s.Slice(pos + nChars, len - pos - nChars).CopyTo(s.Slice(pos, len - pos - nChars));
138146
}
139147
return len - nChars;
140148
}
141149
}
142-
}
150+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
using Lucene.Net.Attributes;
2+
using Lucene.Net.Util;
3+
using NUnit.Framework;
4+
using System;
5+
using Assert = Lucene.Net.TestFramework.Assert;
6+
7+
namespace Lucene.Net.Analysis.Util
8+
{
9+
/*
10+
* Licensed to the Apache Software Foundation (ASF) under one or more
11+
* contributor license agreements. See the NOTICE file distributed with
12+
* this work for additional information regarding copyright ownership.
13+
* The ASF licenses this file to You under the Apache License, Version 2.0
14+
* (the "License"); you may not use this file except in compliance with
15+
* the License. You may obtain a copy of the License at
16+
*
17+
* http://www.apache.org/licenses/LICENSE-2.0
18+
*
19+
* Unless required by applicable law or agreed to in writing, software
20+
* distributed under the License is distributed on an "AS IS" BASIS,
21+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22+
* See the License for the specific language governing permissions and
23+
* limitations under the License.
24+
*/
25+
26+
/// <summary>
27+
/// Tests for <see cref="StemmerUtil"/>
28+
/// </summary>
29+
[TestFixture]
30+
[LuceneNetSpecific]
31+
public class TestStemmerUtil : LuceneTestCase
32+
{
33+
[Test]
34+
[TestCase("foobar", 6, "foo", true)]
35+
[TestCase("foobar", 3, "foo", true)]
36+
[TestCase("foobar", 6, "bar", false)]
37+
[TestCase("foobar", 2, "foo", false)]
38+
public void TestStartsWith(string input, int len, string prefix, bool expected)
39+
{
40+
Assert.AreEqual(expected, StemmerUtil.StartsWith(input.AsSpan(), len, prefix));
41+
}
42+
43+
[Test]
44+
[TestCase("foobar", 6, "bar", true)]
45+
[TestCase("foobar", 3, "bar", false)]
46+
[TestCase("foobar", 6, "foo", false)]
47+
[TestCase("foobar", 2, "bar", false)]
48+
[TestCase("foobar", 3, "foo", true)]
49+
public void TestEndsWith(string input, int len, string prefix, bool expected)
50+
{
51+
Assert.AreEqual(expected, StemmerUtil.EndsWith(input.AsSpan(), len, prefix));
52+
}
53+
54+
[Test]
55+
[TestCase("foobar", 3, 6, "fooar", 5)]
56+
[TestCase("foobar", 0, 6, "oobar", 5)]
57+
[TestCase("foobar", 0, 3, "oo", 2)]
58+
[TestCase("foobar", 5, 6, "fooba", 5)]
59+
public void TestDelete(string input, int pos, int len, string expected, int expectedLen)
60+
{
61+
char[] buffer = input.ToCharArray();
62+
Assert.AreEqual(expectedLen, StemmerUtil.Delete(buffer, pos, len));
63+
Assert.AreEqual(expected, new string(buffer, 0, expectedLen));
64+
}
65+
66+
[Test]
67+
[TestCase("foobar", 3, 6, 2, "foor", 4)]
68+
[TestCase("foobar", 0, 6, 2, "obar", 4)]
69+
[TestCase("foobar", 0, 3, 2, "o", 1)]
70+
[TestCase("foobar", 4, 6, 2, "foob", 4)]
71+
public void TestDeleteN(string input, int pos, int len, int nChars, string expected, int expectedLen)
72+
{
73+
char[] buffer = input.ToCharArray();
74+
Assert.AreEqual(expectedLen, StemmerUtil.DeleteN(buffer, pos, len, nChars));
75+
Assert.AreEqual(expected, new string(buffer, 0, expectedLen));
76+
}
77+
}
78+
}

0 commit comments

Comments
 (0)