TestSegmenterPerf.cpp (7603B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #include <fstream> 8 9 #include "gtest/gtest.h" 10 #include "gtest/MozGTestBench.h" // For MOZ_GTEST_BENCH 11 #include "mozilla/intl/LineBreaker.h" 12 #include "mozilla/intl/Segmenter.h" 13 #include "mozilla/Preferences.h" 14 #include "nsAtom.h" 15 #include "nsLineBreaker.h" 16 #include "nsString.h" 17 #include "nsTArray.h" 18 19 namespace mozilla::intl { 20 21 using mozilla::intl::LineBreakRule; 22 using mozilla::intl::WordBreakRule; 23 24 constexpr size_t kIterations = 100; 25 26 static std::string ReadFileIntoString(const char* aPath) { 27 std::ifstream file(aPath); 28 std::stringstream sstr; 29 sstr << file.rdbuf(); 30 return sstr.str(); 31 } 32 33 class SegmenterPerf : public ::testing::Test { 34 protected: 35 void SetUp() override { 36 // Test files are into xpcom/tests/gtest/wikipedia 37 mArUtf8 = ReadFileIntoString("ar.txt"); 38 mDeUtf8 = ReadFileIntoString("de.txt"); 39 mJaUtf8 = ReadFileIntoString("ja.txt"); 40 mRuUtf8 = ReadFileIntoString("ru.txt"); 41 mThUtf8 = ReadFileIntoString("th.txt"); 42 mTrUtf8 = ReadFileIntoString("tr.txt"); 43 mViUtf8 = ReadFileIntoString("vi.txt"); 44 45 CopyUTF8toUTF16(mArUtf8, mArUtf16); 46 CopyUTF8toUTF16(mDeUtf8, mDeUtf16); 47 CopyUTF8toUTF16(mJaUtf8, mJaUtf16); 48 CopyUTF8toUTF16(mRuUtf8, mRuUtf16); 49 CopyUTF8toUTF16(mThUtf8, mThUtf16); 50 CopyUTF8toUTF16(mTrUtf8, mTrUtf16); 51 CopyUTF8toUTF16(mViUtf8, mViUtf16); 52 53 mAr = NS_Atomize(u"ar"); 54 mDe = NS_Atomize(u"de"); 55 mJa = NS_Atomize(u"ja"); 56 mRu = NS_Atomize(u"ru"); 57 mTh = NS_Atomize(u"th"); 58 mTr = NS_Atomize(u"tr"); 59 mVi = NS_Atomize(u"vi"); 60 } 61 62 public: 63 std::string mArUtf8; 64 std::string mDeUtf8; 65 std::string mJaUtf8; 66 std::string mRuUtf8; 67 std::string mThUtf8; 68 std::string mTrUtf8; 69 std::string mViUtf8; 70 71 nsString mArUtf16; 72 nsString mDeUtf16; 73 nsString mJaUtf16; 74 nsString mRuUtf16; 75 nsString mThUtf16; 76 nsString mTrUtf16; 77 nsString mViUtf16; 78 79 RefPtr<nsAtom> mAr; 80 RefPtr<nsAtom> mDe; 81 RefPtr<nsAtom> mJa; 82 RefPtr<nsAtom> mRu; 83 RefPtr<nsAtom> mTh; 84 RefPtr<nsAtom> mTr; 85 RefPtr<nsAtom> mVi; 86 }; 87 88 class AutoSetSegmenter final { 89 public: 90 explicit AutoSetSegmenter(bool aValue) { 91 nsresult rv = 92 mozilla::Preferences::SetBool("intl.icu4x.segmenter.enabled", aValue); 93 EXPECT_TRUE(rv == NS_OK); 94 } 95 96 ~AutoSetSegmenter() { 97 mozilla::Preferences::ClearUser("intl.icu4x.segmenter.enabled"); 98 } 99 }; 100 101 static void TestSegmenterBench(const nsString& aStr, bool aIsJaOrZh, 102 size_t aCount = kIterations) { 103 nsTArray<uint8_t> breakState; 104 breakState.SetLength(aStr.Length()); 105 106 for (size_t i = 0; i < aCount; i++) { 107 LineBreaker::ComputeBreakPositions( 108 aStr.get(), aStr.Length(), WordBreakRule::Normal, LineBreakRule::Strict, 109 aIsJaOrZh, breakState.Elements()); 110 } 111 } 112 113 MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakAROld, [this] { 114 AutoSetSegmenter set(false); 115 TestSegmenterBench(mArUtf16, false); 116 }); 117 118 MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakDEOld, [this] { 119 AutoSetSegmenter set(false); 120 TestSegmenterBench(mDeUtf16, false); 121 }); 122 123 MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakJAOld, [this] { 124 AutoSetSegmenter set(false); 125 TestSegmenterBench(mJaUtf16, true); 126 }); 127 128 MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakRUOld, [this] { 129 AutoSetSegmenter set(false); 130 TestSegmenterBench(mRuUtf16, false); 131 }); 132 133 MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakTHOld, [this] { 134 AutoSetSegmenter set(false); 135 TestSegmenterBench(mThUtf16, false); 136 }); 137 138 MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakTROld, [this] { 139 AutoSetSegmenter set(false); 140 TestSegmenterBench(mTrUtf16, false); 141 }); 142 143 MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakVIOld, [this] { 144 AutoSetSegmenter set(false); 145 TestSegmenterBench(mViUtf16, false); 146 }); 147 148 MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakAR, [this] { 149 AutoSetSegmenter set(false); 150 TestSegmenterBench(mArUtf16, false); 151 }); 152 153 MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakDE, [this] { 154 AutoSetSegmenter set(true); 155 TestSegmenterBench(mDeUtf16, false); 156 }); 157 158 MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakJA, [this] { 159 AutoSetSegmenter set(true); 160 TestSegmenterBench(mJaUtf16, true); 161 }); 162 163 MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakRU, [this] { 164 AutoSetSegmenter set(true); 165 TestSegmenterBench(mRuUtf16, false); 166 }); 167 168 MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakTH, [this] { 169 AutoSetSegmenter set(true); 170 // LSTM segmenter is too slow 171 TestSegmenterBench(mThUtf16, false, 3); 172 }); 173 174 MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakTR, [this] { 175 AutoSetSegmenter set(true); 176 TestSegmenterBench(mTrUtf16, false); 177 }); 178 179 MOZ_GTEST_BENCH_F(SegmenterPerf, PerfLineBreakVI, [this] { 180 AutoSetSegmenter set(true); 181 TestSegmenterBench(mViUtf16, false); 182 }); 183 184 class LBSink final : public nsILineBreakSink { 185 public: 186 LBSink() = default; 187 ~LBSink() = default; 188 189 virtual void SetBreaks(uint32_t, uint32_t, uint8_t*) override {} 190 virtual void SetCapitalization(uint32_t, uint32_t, bool*) override {} 191 }; 192 193 static void TestDOMSegmenterBench(const nsString& aStr, nsAtom* aLang, 194 size_t aCount = kIterations) { 195 LBSink sink; 196 bool trailingBreak; 197 198 for (size_t i = 0; i < aCount; i++) { 199 nsLineBreaker breaker; 200 breaker.AppendText(aLang, aStr.get(), aStr.Length(), 0, &sink); 201 breaker.Reset(&trailingBreak); 202 } 203 } 204 205 MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakAROld, [this] { 206 AutoSetSegmenter set(false); 207 TestDOMSegmenterBench(mArUtf16, mAr); 208 }); 209 210 MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakDEOld, [this] { 211 AutoSetSegmenter set(false); 212 TestDOMSegmenterBench(mDeUtf16, mDe); 213 }); 214 215 MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakJAOld, [this] { 216 AutoSetSegmenter set(false); 217 TestDOMSegmenterBench(mJaUtf16, mJa); 218 }); 219 220 MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakRUOld, [this] { 221 AutoSetSegmenter set(false); 222 TestDOMSegmenterBench(mRuUtf16, mRu); 223 }); 224 225 MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakTHOld, [this] { 226 AutoSetSegmenter set(false); 227 TestDOMSegmenterBench(mThUtf16, mTh); 228 }); 229 230 MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakTROld, [this] { 231 AutoSetSegmenter set(false); 232 TestDOMSegmenterBench(mTrUtf16, mTr); 233 }); 234 235 MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakVIOld, [this] { 236 AutoSetSegmenter set(false); 237 TestDOMSegmenterBench(mViUtf16, mVi); 238 }); 239 240 MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakAR, [this] { 241 AutoSetSegmenter set(true); 242 TestDOMSegmenterBench(mArUtf16, mAr); 243 }); 244 245 MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakDE, [this] { 246 AutoSetSegmenter set(true); 247 TestDOMSegmenterBench(mDeUtf16, mDe); 248 }); 249 250 MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakJA, [this] { 251 AutoSetSegmenter set(true); 252 TestDOMSegmenterBench(mJaUtf16, mJa); 253 }); 254 255 MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakRU, [this] { 256 AutoSetSegmenter set(true); 257 TestDOMSegmenterBench(mRuUtf16, mRu); 258 }); 259 260 MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakTH, [this] { 261 AutoSetSegmenter set(true); 262 // LSTM segmenter is too slow 263 TestDOMSegmenterBench(mThUtf16, mTh, 3); 264 }); 265 266 MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakTR, [this] { 267 AutoSetSegmenter set(true); 268 TestDOMSegmenterBench(mTrUtf16, mTr); 269 }); 270 271 MOZ_GTEST_BENCH_F(SegmenterPerf, PerfDOMLineBreakVI, [this] { 272 AutoSetSegmenter set(true); 273 TestDOMSegmenterBench(mViUtf16, mVi); 274 }); 275 276 } // namespace mozilla::intl