tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

SapiService.cpp (13327B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
      3 /* This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #include "SapiService.h"
      8 
      9 #include "mozilla/ClearOnShutdown.h"
     10 #include "mozilla/Preferences.h"
     11 #include "mozilla/ProfilerLabels.h"
     12 #include "mozilla/StaticPrefs_media.h"
     13 #include "mozilla/dom/nsSpeechTask.h"
     14 #include "mozilla/dom/nsSynthVoiceRegistry.h"
     15 #include "nsEscape.h"
     16 #include "nsISupports.h"
     17 #include "nsServiceManagerUtils.h"
     18 #include "nsXULAppAPI.h"
     19 
     20 namespace mozilla::dom {
     21 
     22 constexpr static WCHAR kSpCategoryOneCoreVoices[] =
     23    L"HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Speech_OneCore\\Voices";
     24 
     25 StaticRefPtr<SapiService> SapiService::sSingleton;
     26 
     27 class SapiCallback final : public nsISpeechTaskCallback {
     28 public:
     29  SapiCallback(nsISpeechTask* aTask, ISpVoice* aSapiClient,
     30               uint32_t aTextOffset, uint32_t aSpeakTextLen)
     31      : mTask(aTask),
     32        mSapiClient(aSapiClient),
     33        mTextOffset(aTextOffset),
     34        mSpeakTextLen(aSpeakTextLen),
     35        mCurrentIndex(0),
     36        mStreamNum(0) {
     37    mStartingTime = TimeStamp::Now();
     38  }
     39 
     40  NS_DECL_CYCLE_COLLECTING_ISUPPORTS
     41  NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(SapiCallback, nsISpeechTaskCallback)
     42 
     43  NS_DECL_NSISPEECHTASKCALLBACK
     44 
     45  ULONG GetStreamNum() const { return mStreamNum; }
     46  void SetStreamNum(ULONG aValue) { mStreamNum = aValue; }
     47 
     48  void OnSpeechEvent(const SPEVENT& speechEvent);
     49 
     50 private:
     51  ~SapiCallback() {}
     52 
     53  float GetTimeDurationFromStart() const {
     54    TimeDuration duration = TimeStamp::Now() - mStartingTime;
     55    return duration.ToSeconds();
     56  }
     57 
     58  // This pointer is used to dispatch events
     59  nsCOMPtr<nsISpeechTask> mTask;
     60  RefPtr<ISpVoice> mSapiClient;
     61 
     62  uint32_t mTextOffset;
     63  uint32_t mSpeakTextLen;
     64 
     65  // Used for calculating the time taken to speak the utterance
     66  TimeStamp mStartingTime;
     67  uint32_t mCurrentIndex;
     68 
     69  ULONG mStreamNum;
     70 };
     71 
     72 NS_IMPL_CYCLE_COLLECTION(SapiCallback, mTask);
     73 
     74 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SapiCallback)
     75  NS_INTERFACE_MAP_ENTRY(nsISpeechTaskCallback)
     76  NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTaskCallback)
     77 NS_INTERFACE_MAP_END
     78 
     79 NS_IMPL_CYCLE_COLLECTING_ADDREF(SapiCallback)
     80 NS_IMPL_CYCLE_COLLECTING_RELEASE(SapiCallback)
     81 
     82 NS_IMETHODIMP
     83 SapiCallback::OnPause() {
     84  if (FAILED(mSapiClient->Pause())) {
     85    return NS_ERROR_FAILURE;
     86  }
     87  if (!mTask) {
     88    // When calling pause() on child porcess, it may not receive end event
     89    // from chrome process yet.
     90    return NS_ERROR_FAILURE;
     91  }
     92  mTask->DispatchPause(GetTimeDurationFromStart(), mCurrentIndex);
     93  return NS_OK;
     94 }
     95 
     96 NS_IMETHODIMP
     97 SapiCallback::OnResume() {
     98  if (FAILED(mSapiClient->Resume())) {
     99    return NS_ERROR_FAILURE;
    100  }
    101  if (!mTask) {
    102    // When calling resume() on child porcess, it may not receive end event
    103    // from chrome process yet.
    104    return NS_ERROR_FAILURE;
    105  }
    106  mTask->DispatchResume(GetTimeDurationFromStart(), mCurrentIndex);
    107  return NS_OK;
    108 }
    109 
    110 NS_IMETHODIMP
    111 SapiCallback::OnCancel() {
    112  // After cancel, mCurrentIndex may be updated.
    113  // At cancel case, use mCurrentIndex for DispatchEnd.
    114  mSpeakTextLen = 0;
    115  // Purge all the previous utterances and speak an empty string
    116  if (FAILED(mSapiClient->Speak(nullptr, SPF_PURGEBEFORESPEAK, nullptr))) {
    117    return NS_ERROR_FAILURE;
    118  }
    119  return NS_OK;
    120 }
    121 
    122 NS_IMETHODIMP
    123 SapiCallback::OnVolumeChanged(float aVolume) {
    124  mSapiClient->SetVolume(static_cast<USHORT>(aVolume * 100));
    125  return NS_OK;
    126 }
    127 
    128 void SapiCallback::OnSpeechEvent(const SPEVENT& speechEvent) {
    129  switch (speechEvent.eEventId) {
    130    case SPEI_START_INPUT_STREAM:
    131      mTask->DispatchStart();
    132      break;
    133    case SPEI_END_INPUT_STREAM:
    134      if (mSpeakTextLen) {
    135        mCurrentIndex = mSpeakTextLen;
    136      }
    137      mTask->DispatchEnd(GetTimeDurationFromStart(), mCurrentIndex);
    138      mTask = nullptr;
    139      break;
    140    case SPEI_TTS_BOOKMARK:
    141      mCurrentIndex = static_cast<ULONG>(speechEvent.lParam) - mTextOffset;
    142      mTask->DispatchBoundary(u"mark"_ns, GetTimeDurationFromStart(),
    143                              mCurrentIndex, 0, 0);
    144      break;
    145    case SPEI_WORD_BOUNDARY:
    146      mCurrentIndex = static_cast<ULONG>(speechEvent.lParam) - mTextOffset;
    147      mTask->DispatchBoundary(u"word"_ns, GetTimeDurationFromStart(),
    148                              mCurrentIndex,
    149                              static_cast<ULONG>(speechEvent.wParam), 1);
    150      break;
    151    case SPEI_SENTENCE_BOUNDARY:
    152      mCurrentIndex = static_cast<ULONG>(speechEvent.lParam) - mTextOffset;
    153      mTask->DispatchBoundary(u"sentence"_ns, GetTimeDurationFromStart(),
    154                              mCurrentIndex,
    155                              static_cast<ULONG>(speechEvent.wParam), 1);
    156      break;
    157    default:
    158      break;
    159  }
    160 }
    161 
    162 // static
    163 void __stdcall SapiService::SpeechEventCallback(WPARAM aWParam,
    164                                                LPARAM aLParam) {
    165  RefPtr<ISpVoice> spVoice = (ISpVoice*)aWParam;
    166  RefPtr<SapiService> service = (SapiService*)aLParam;
    167 
    168  SPEVENT speechEvent;
    169  while (spVoice->GetEvents(1, &speechEvent, nullptr) == S_OK) {
    170    for (size_t i = 0; i < service->mCallbacks.Length(); i++) {
    171      RefPtr<SapiCallback> callback = service->mCallbacks[i];
    172      if (callback->GetStreamNum() == speechEvent.ulStreamNum) {
    173        callback->OnSpeechEvent(speechEvent);
    174        if (speechEvent.eEventId == SPEI_END_INPUT_STREAM) {
    175          service->mCallbacks.RemoveElementAt(i);
    176        }
    177        break;
    178      }
    179    }
    180  }
    181 }
    182 
    183 NS_INTERFACE_MAP_BEGIN(SapiService)
    184  NS_INTERFACE_MAP_ENTRY(nsISpeechService)
    185  NS_INTERFACE_MAP_ENTRY(nsIObserver)
    186  NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechService)
    187 NS_INTERFACE_MAP_END
    188 
    189 NS_IMPL_ADDREF(SapiService)
    190 NS_IMPL_RELEASE(SapiService)
    191 
    192 SapiService::SapiService() : mInitialized(false) {}
    193 
    194 SapiService::~SapiService() {}
    195 
    196 bool SapiService::Init() {
    197  AUTO_PROFILER_LABEL("SapiService::Init", OTHER);
    198 
    199  MOZ_ASSERT(!mInitialized);
    200 
    201  if (Preferences::GetBool("media.webspeech.synth.test") ||
    202      !StaticPrefs::media_webspeech_synth_enabled()) {
    203    // When enabled, we shouldn't add OS backend (Bug 1160844)
    204    return false;
    205  }
    206 
    207  // Get all the voices from sapi and register in the SynthVoiceRegistry
    208  if (!RegisterVoices()) {
    209    return false;
    210  }
    211 
    212  mInitialized = true;
    213  return true;
    214 }
    215 
    216 already_AddRefed<ISpVoice> SapiService::InitSapiInstance() {
    217  RefPtr<ISpVoice> spVoice;
    218  if (FAILED(CoCreateInstance(CLSID_SpVoice, nullptr, CLSCTX_ALL, IID_ISpVoice,
    219                              getter_AddRefs(spVoice)))) {
    220    return nullptr;
    221  }
    222 
    223  // Set interest for all the events we are interested in
    224  ULONGLONG eventMask = SPFEI(SPEI_START_INPUT_STREAM) |
    225                        SPFEI(SPEI_TTS_BOOKMARK) | SPFEI(SPEI_WORD_BOUNDARY) |
    226                        SPFEI(SPEI_SENTENCE_BOUNDARY) |
    227                        SPFEI(SPEI_END_INPUT_STREAM);
    228 
    229  if (FAILED(spVoice->SetInterest(eventMask, eventMask))) {
    230    return nullptr;
    231  }
    232 
    233  // Set the callback function for receiving the events
    234  spVoice->SetNotifyCallbackFunction(
    235      (SPNOTIFYCALLBACK*)SapiService::SpeechEventCallback,
    236      (WPARAM)spVoice.get(), (LPARAM)this);
    237 
    238  return spVoice.forget();
    239 }
    240 
    241 bool SapiService::RegisterVoices() {
    242  nsCOMPtr<nsISynthVoiceRegistry> registry =
    243      do_GetService(NS_SYNTHVOICEREGISTRY_CONTRACTID);
    244  if (!registry) {
    245    return false;
    246  }
    247  bool result = RegisterVoices(registry, kSpCategoryOneCoreVoices);
    248  result |= RegisterVoices(registry, SPCAT_VOICES);
    249  if (result) {
    250    registry->NotifyVoicesChanged();
    251  }
    252  return result;
    253 }
    254 
    255 bool SapiService::RegisterVoices(nsCOMPtr<nsISynthVoiceRegistry>& registry,
    256                                 const WCHAR* categoryId) {
    257  nsresult rv;
    258 
    259  RefPtr<ISpObjectTokenCategory> category;
    260  if (FAILED(CoCreateInstance(CLSID_SpObjectTokenCategory, nullptr, CLSCTX_ALL,
    261                              IID_ISpObjectTokenCategory,
    262                              getter_AddRefs(category)))) {
    263    return false;
    264  }
    265  if (FAILED(category->SetId(categoryId, FALSE))) {
    266    return false;
    267  }
    268 
    269  RefPtr<IEnumSpObjectTokens> voiceTokens;
    270  if (FAILED(category->EnumTokens(nullptr, nullptr,
    271                                  getter_AddRefs(voiceTokens)))) {
    272    return false;
    273  }
    274 
    275  WCHAR locale[LOCALE_NAME_MAX_LENGTH];
    276  while (true) {
    277    RefPtr<ISpObjectToken> voiceToken;
    278    if (voiceTokens->Next(1, getter_AddRefs(voiceToken), nullptr) != S_OK) {
    279      break;
    280    }
    281 
    282    RefPtr<ISpDataKey> attributes;
    283    if (FAILED(
    284            voiceToken->OpenKey(L"Attributes", getter_AddRefs(attributes)))) {
    285      continue;
    286    }
    287 
    288    WCHAR* language = nullptr;
    289    if (FAILED(attributes->GetStringValue(L"Language", &language))) {
    290      continue;
    291    }
    292 
    293    // Language attribute is LCID by hex.  So we need convert to locale
    294    // name.
    295    nsAutoString hexLcid;
    296    LCID lcid = wcstol(language, nullptr, 16);
    297    CoTaskMemFree(language);
    298    if (NS_WARN_IF(
    299            !LCIDToLocaleName(lcid, locale, LOCALE_NAME_MAX_LENGTH, 0))) {
    300      continue;
    301    }
    302 
    303    WCHAR* description = nullptr;
    304    if (FAILED(voiceToken->GetStringValue(nullptr, &description))) {
    305      continue;
    306    }
    307 
    308    nsAutoString uri;
    309    uri.AssignLiteral("urn:moz-tts:sapi:");
    310    uri.Append(description);
    311    uri.AppendLiteral("?");
    312    uri.Append(locale);
    313 
    314    // This service can only speak one utterance at a time, se we set
    315    // aQueuesUtterances to true in order to track global state and schedule
    316    // access to this service.
    317    rv = registry->AddVoice(this, uri, nsDependentString(description),
    318                            nsDependentString(locale), true, true);
    319    CoTaskMemFree(description);
    320    if (NS_FAILED(rv)) {
    321      continue;
    322    }
    323 
    324    mVoices.InsertOrUpdate(uri, std::move(voiceToken));
    325  }
    326 
    327  return true;
    328 }
    329 
    330 NS_IMETHODIMP
    331 SapiService::Speak(const nsAString& aText, const nsAString& aUri, float aVolume,
    332                   float aRate, float aPitch, nsISpeechTask* aTask) {
    333  NS_ENSURE_TRUE(mInitialized, NS_ERROR_NOT_AVAILABLE);
    334 
    335  RefPtr<ISpObjectToken> voiceToken;
    336  if (!mVoices.Get(aUri, getter_AddRefs(voiceToken))) {
    337    return NS_ERROR_NOT_AVAILABLE;
    338  }
    339 
    340  RefPtr<ISpVoice> spVoice = InitSapiInstance();
    341  if (!spVoice) {
    342    return NS_ERROR_FAILURE;
    343  }
    344 
    345  if (FAILED(spVoice->SetVoice(voiceToken))) {
    346    return NS_ERROR_FAILURE;
    347  }
    348 
    349  if (FAILED(spVoice->SetVolume(static_cast<USHORT>(aVolume * 100)))) {
    350    return NS_ERROR_FAILURE;
    351  }
    352 
    353  // The max supported rate in SAPI engines is 3x, and the min is 1/3x. It is
    354  // expressed by an integer. 0 being normal rate, -10 is 1/3 and 10 is 3x.
    355  // Values below and above that are allowed, but the engine may clip the rate
    356  // to its maximum capable value.
    357  // "Each increment between -10 and +10 is logarithmically distributed such
    358  //  that incrementing or decrementing by 1 is multiplying or dividing the
    359  //  rate by the 10th root of 3"
    360  // https://msdn.microsoft.com/en-us/library/ee431826(v=vs.85).aspx
    361  long rate = aRate != 0 ? static_cast<long>(10 * log10(aRate) / log10(3)) : 0;
    362  if (FAILED(spVoice->SetRate(rate))) {
    363    return NS_ERROR_FAILURE;
    364  }
    365 
    366  // Set the pitch using xml
    367  nsAutoString xml;
    368  xml.AssignLiteral("<pitch absmiddle=\"");
    369  // absmiddle doesn't allow float type
    370  xml.AppendInt(static_cast<int32_t>(aPitch * 10.0f - 10.0f));
    371  xml.AppendLiteral("\">");
    372  uint32_t textOffset = xml.Length();
    373 
    374  for (size_t i = 0; i < aText.Length(); i++) {
    375    switch (aText[i]) {
    376      case '&':
    377        xml.AppendLiteral("&amp;");
    378        break;
    379      case '<':
    380        xml.AppendLiteral("&lt;");
    381        break;
    382      case '>':
    383        xml.AppendLiteral("&gt;");
    384        break;
    385      default:
    386        xml.Append(aText[i]);
    387        break;
    388    }
    389  }
    390 
    391  xml.AppendLiteral("</pitch>");
    392 
    393  RefPtr<SapiCallback> callback =
    394      new SapiCallback(aTask, spVoice, textOffset, aText.Length());
    395 
    396  // The last three parameters doesn't matter for an indirect service
    397  nsresult rv = aTask->Setup(callback);
    398  if (NS_FAILED(rv)) {
    399    return rv;
    400  }
    401 
    402  ULONG streamNum;
    403  if (FAILED(spVoice->Speak(xml.get(), SPF_ASYNC, &streamNum))) {
    404    aTask->Setup(nullptr);
    405    return NS_ERROR_FAILURE;
    406  }
    407 
    408  callback->SetStreamNum(streamNum);
    409  // streamNum reassigns same value when last stream is finished even if
    410  // callback for stream end isn't called
    411  // So we cannot use data hashtable and has to add it to vector at last.
    412  mCallbacks.AppendElement(callback);
    413 
    414  return NS_OK;
    415 }
    416 
    417 NS_IMETHODIMP
    418 SapiService::Observe(nsISupports* aSubject, const char* aTopic,
    419                     const char16_t* aData) {
    420  return NS_OK;
    421 }
    422 
    423 SapiService* SapiService::GetInstance() {
    424  MOZ_ASSERT(NS_IsMainThread());
    425  if (XRE_GetProcessType() != GeckoProcessType_Default) {
    426    MOZ_ASSERT(false, "SapiService can only be started on main gecko process");
    427    return nullptr;
    428  }
    429 
    430  if (!sSingleton) {
    431    RefPtr<SapiService> service = new SapiService();
    432    if (service->Init()) {
    433      sSingleton = service;
    434      ClearOnShutdown(&sSingleton);
    435    }
    436  }
    437  return sSingleton;
    438 }
    439 
    440 already_AddRefed<SapiService> SapiService::GetInstanceForService() {
    441  RefPtr<SapiService> sapiService = GetInstance();
    442  return sapiService.forget();
    443 }
    444 
    445 }  // namespace mozilla::dom