tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

SpeechDispatcherService.cpp (17139B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
      3 /* This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #include "SpeechDispatcherService.h"
      8 
      9 #include <math.h>
     10 
     11 #include "mozilla/ClearOnShutdown.h"
     12 #include "mozilla/Preferences.h"
     13 #include "mozilla/StaticPrefs_media.h"
     14 #include "mozilla/dom/nsSpeechTask.h"
     15 #include "mozilla/dom/nsSynthVoiceRegistry.h"
     16 #include "nsEscape.h"
     17 #include "nsISupports.h"
     18 #include "nsPrintfCString.h"
     19 #include "nsReadableUtils.h"
     20 #include "nsServiceManagerUtils.h"
     21 #include "nsThreadUtils.h"
     22 #include "nsXULAppAPI.h"
     23 #include "prlink.h"
     24 
     25 #define URI_PREFIX "urn:moz-tts:speechd:"
     26 
     27 #define MAX_RATE static_cast<float>(2.5)
     28 #define MIN_RATE static_cast<float>(0.5)
     29 
     30 // Some structures for libspeechd
     31 typedef enum {
     32  SPD_EVENT_BEGIN,
     33  SPD_EVENT_END,
     34  SPD_EVENT_INDEX_MARK,
     35  SPD_EVENT_CANCEL,
     36  SPD_EVENT_PAUSE,
     37  SPD_EVENT_RESUME
     38 } SPDNotificationType;
     39 
     40 typedef enum {
     41  SPD_BEGIN = 1,
     42  SPD_END = 2,
     43  SPD_INDEX_MARKS = 4,
     44  SPD_CANCEL = 8,
     45  SPD_PAUSE = 16,
     46  SPD_RESUME = 32,
     47 
     48  SPD_ALL = 0x3f
     49 } SPDNotification;
     50 
     51 typedef enum { SPD_MODE_SINGLE = 0, SPD_MODE_THREADED = 1 } SPDConnectionMode;
     52 
     53 typedef void (*SPDCallback)(size_t msg_id, size_t client_id,
     54                            SPDNotificationType state);
     55 
     56 typedef void (*SPDCallbackIM)(size_t msg_id, size_t client_id,
     57                              SPDNotificationType state, char* index_mark);
     58 
     59 struct SPDConnection {
     60  SPDCallback callback_begin;
     61  SPDCallback callback_end;
     62  SPDCallback callback_cancel;
     63  SPDCallback callback_pause;
     64  SPDCallback callback_resume;
     65  SPDCallbackIM callback_im;
     66 
     67  /* partial, more private fields in structure */
     68 };
     69 
     70 struct SPDVoice {
     71  char* name;
     72  char* language;
     73  char* variant;
     74 };
     75 
     76 typedef enum {
     77  SPD_IMPORTANT = 1,
     78  SPD_MESSAGE = 2,
     79  SPD_TEXT = 3,
     80  SPD_NOTIFICATION = 4,
     81  SPD_PROGRESS = 5
     82 } SPDPriority;
     83 
     84 #define SPEECHD_FUNCTIONS                                           \
     85  FUNC(spd_open, SPDConnection*,                                    \
     86       (const char*, const char*, const char*, SPDConnectionMode))  \
     87  FUNC(spd_close, void, (SPDConnection*))                           \
     88  FUNC(spd_list_synthesis_voices, SPDVoice**, (SPDConnection*))     \
     89  FUNC(spd_say, int, (SPDConnection*, SPDPriority, const char*))    \
     90  FUNC(spd_cancel, int, (SPDConnection*))                           \
     91  FUNC(spd_set_volume, int, (SPDConnection*, int))                  \
     92  FUNC(spd_set_voice_rate, int, (SPDConnection*, int))              \
     93  FUNC(spd_set_voice_pitch, int, (SPDConnection*, int))             \
     94  FUNC(spd_set_synthesis_voice, int, (SPDConnection*, const char*)) \
     95  FUNC(spd_set_notification_on, int, (SPDConnection*, SPDNotification))
     96 
     97 #define FUNC(name, type, params)      \
     98  typedef type(*_##name##_fn) params; \
     99  static _##name##_fn _##name;
    100 
    101 SPEECHD_FUNCTIONS
    102 
    103 #undef FUNC
    104 
    105 #define spd_open _spd_open
    106 #define spd_close _spd_close
    107 #define spd_list_synthesis_voices _spd_list_synthesis_voices
    108 #define spd_say _spd_say
    109 #define spd_cancel _spd_cancel
    110 #define spd_set_volume _spd_set_volume
    111 #define spd_set_voice_rate _spd_set_voice_rate
    112 #define spd_set_voice_pitch _spd_set_voice_pitch
    113 #define spd_set_synthesis_voice _spd_set_synthesis_voice
    114 #define spd_set_notification_on _spd_set_notification_on
    115 
    116 static PRLibrary* speechdLib = nullptr;
    117 
    118 typedef void (*nsSpeechDispatcherFunc)();
    119 struct nsSpeechDispatcherDynamicFunction {
    120  const char* functionName;
    121  nsSpeechDispatcherFunc* function;
    122 };
    123 
    124 namespace mozilla::dom {
    125 
    126 StaticRefPtr<SpeechDispatcherService> SpeechDispatcherService::sSingleton;
    127 
    128 class SpeechDispatcherVoice {
    129 public:
    130  SpeechDispatcherVoice(const nsAString& aName, const nsAString& aLanguage)
    131      : mName(aName), mLanguage(aLanguage) {}
    132 
    133  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(SpeechDispatcherVoice)
    134 
    135  // Voice name
    136  nsString mName;
    137 
    138  // Voice language, in BCP-47 syntax
    139  nsString mLanguage;
    140 
    141 private:
    142  ~SpeechDispatcherVoice() = default;
    143 };
    144 
    145 class SpeechDispatcherCallback final : public nsISpeechTaskCallback {
    146 public:
    147  SpeechDispatcherCallback(nsISpeechTask* aTask,
    148                           SpeechDispatcherService* aService)
    149      : mTask(aTask), mService(aService) {}
    150 
    151  NS_DECL_CYCLE_COLLECTING_ISUPPORTS
    152  NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(SpeechDispatcherCallback,
    153                                           nsISpeechTaskCallback)
    154 
    155  NS_DECL_NSISPEECHTASKCALLBACK
    156 
    157  bool OnSpeechEvent(SPDNotificationType state);
    158 
    159 private:
    160  ~SpeechDispatcherCallback() = default;
    161 
    162  // This pointer is used to dispatch events
    163  nsCOMPtr<nsISpeechTask> mTask;
    164 
    165  // By holding a strong reference to the service we guarantee that it won't be
    166  // destroyed before this runnable.
    167  RefPtr<SpeechDispatcherService> mService;
    168 
    169  TimeStamp mStartTime;
    170 };
    171 
    172 NS_IMPL_CYCLE_COLLECTION(SpeechDispatcherCallback, mTask);
    173 
    174 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechDispatcherCallback)
    175  NS_INTERFACE_MAP_ENTRY(nsISpeechTaskCallback)
    176  NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTaskCallback)
    177 NS_INTERFACE_MAP_END
    178 
    179 NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechDispatcherCallback)
    180 NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechDispatcherCallback)
    181 
    182 NS_IMETHODIMP
    183 SpeechDispatcherCallback::OnPause() {
    184  // XXX: Speech dispatcher does not pause immediately, but waits for the speech
    185  // to reach an index mark so that it could resume from that offset.
    186  // There is no support for word or sentence boundaries, so index marks would
    187  // only occur in explicit SSML marks, and we don't support that yet.
    188  // What in actuality happens, is that if you call spd_pause(), it will speak
    189  // the utterance in its entirety, dispatch an end event, and then put speechd
    190  // in a 'paused' state. Since it is after the utterance ended, we don't get
    191  // that state change, and our speech api is in an unrecoverable state.
    192  // So, since it is useless anyway, I am not implementing pause.
    193  return NS_OK;
    194 }
    195 
    196 NS_IMETHODIMP
    197 SpeechDispatcherCallback::OnResume() {
    198  // XXX: Unsupported, see OnPause().
    199  return NS_OK;
    200 }
    201 
    202 NS_IMETHODIMP
    203 SpeechDispatcherCallback::OnCancel() {
    204  if (spd_cancel(mService->mSpeechdClient) < 0) {
    205    return NS_ERROR_FAILURE;
    206  }
    207 
    208  return NS_OK;
    209 }
    210 
    211 NS_IMETHODIMP
    212 SpeechDispatcherCallback::OnVolumeChanged(float aVolume) {
    213  // XXX: This currently does not change the volume mid-utterance, but it
    214  // doesn't do anything bad either. So we could put this here with the hopes
    215  // that speechd supports this in the future.
    216  if (spd_set_volume(mService->mSpeechdClient,
    217                     static_cast<int>(aVolume * 100)) < 0) {
    218    return NS_ERROR_FAILURE;
    219  }
    220 
    221  return NS_OK;
    222 }
    223 
    224 bool SpeechDispatcherCallback::OnSpeechEvent(SPDNotificationType state) {
    225  bool remove = false;
    226 
    227  switch (state) {
    228    case SPD_EVENT_BEGIN:
    229      mStartTime = TimeStamp::Now();
    230      mTask->DispatchStart();
    231      break;
    232 
    233    case SPD_EVENT_PAUSE:
    234      mTask->DispatchPause((TimeStamp::Now() - mStartTime).ToSeconds(), 0);
    235      break;
    236 
    237    case SPD_EVENT_RESUME:
    238      mTask->DispatchResume((TimeStamp::Now() - mStartTime).ToSeconds(), 0);
    239      break;
    240 
    241    case SPD_EVENT_CANCEL:
    242    case SPD_EVENT_END:
    243      mTask->DispatchEnd((TimeStamp::Now() - mStartTime).ToSeconds(), 0);
    244      remove = true;
    245      break;
    246 
    247    case SPD_EVENT_INDEX_MARK:
    248      // Not yet supported
    249      break;
    250 
    251    default:
    252      break;
    253  }
    254 
    255  return remove;
    256 }
    257 
    258 static void speechd_cb(size_t msg_id, size_t client_id,
    259                       SPDNotificationType state) {
    260  SpeechDispatcherService* service =
    261      SpeechDispatcherService::GetInstance(false);
    262 
    263  if (service) {
    264    NS_DispatchToMainThread(NewRunnableMethod<uint32_t, SPDNotificationType>(
    265        "dom::SpeechDispatcherService::EventNotify", service,
    266        &SpeechDispatcherService::EventNotify, static_cast<uint32_t>(msg_id),
    267        state));
    268  }
    269 }
    270 
    271 NS_INTERFACE_MAP_BEGIN(SpeechDispatcherService)
    272  NS_INTERFACE_MAP_ENTRY(nsISpeechService)
    273  NS_INTERFACE_MAP_ENTRY(nsIObserver)
    274  NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIObserver)
    275 NS_INTERFACE_MAP_END
    276 
    277 NS_IMPL_ADDREF(SpeechDispatcherService)
    278 NS_IMPL_RELEASE(SpeechDispatcherService)
    279 
    280 SpeechDispatcherService::SpeechDispatcherService()
    281    : mInitialized(false), mSpeechdClient(nullptr) {}
    282 
    283 void SpeechDispatcherService::Init() {
    284  if (!StaticPrefs::media_webspeech_synth_enabled() ||
    285      Preferences::GetBool("media.webspeech.synth.test")) {
    286    return;
    287  }
    288 
    289  // While speech dispatcher has a "threaded" mode, only spd_say() is async.
    290  // Since synchronous socket i/o could impact startup time, we do
    291  // initialization in a separate thread.
    292  DebugOnly<nsresult> rv =
    293      NS_NewNamedThread("speechd init", getter_AddRefs(mInitThread));
    294  MOZ_ASSERT(NS_SUCCEEDED(rv));
    295  rv = mInitThread->Dispatch(
    296      NewRunnableMethod("dom::SpeechDispatcherService::Setup", this,
    297                        &SpeechDispatcherService::Setup),
    298      NS_DISPATCH_NORMAL);
    299  MOZ_ASSERT(NS_SUCCEEDED(rv));
    300 }
    301 
    302 SpeechDispatcherService::~SpeechDispatcherService() {
    303  if (mInitThread) {
    304    mInitThread->Shutdown();
    305  }
    306 
    307  if (mSpeechdClient) {
    308    spd_close(mSpeechdClient);
    309  }
    310 }
    311 
    312 void SpeechDispatcherService::Setup() {
    313 #define FUNC(name, type, params) {#name, (nsSpeechDispatcherFunc*)&_##name},
    314  static const nsSpeechDispatcherDynamicFunction kSpeechDispatcherSymbols[] = {
    315      SPEECHD_FUNCTIONS};
    316 #undef FUNC
    317 
    318  MOZ_ASSERT(!mInitialized);
    319 
    320  speechdLib = PR_LoadLibrary("libspeechd.so.2");
    321 
    322  if (!speechdLib) {
    323    NS_WARNING("Failed to load speechd library");
    324    NotifyError(u"lib-missing"_ns);
    325    return;
    326  }
    327 
    328  if (!PR_FindFunctionSymbol(speechdLib, "spd_get_volume")) {
    329    // There is no version getter function, so we rely on a symbol that was
    330    // introduced in release 0.8.2 in order to check for ABI compatibility.
    331    NS_WARNING("Unsupported version of speechd detected");
    332    NotifyError(u"lib-too-old"_ns);
    333    return;
    334  }
    335 
    336  for (uint32_t i = 0; i < std::size(kSpeechDispatcherSymbols); i++) {
    337    *kSpeechDispatcherSymbols[i].function = PR_FindFunctionSymbol(
    338        speechdLib, kSpeechDispatcherSymbols[i].functionName);
    339 
    340    if (!*kSpeechDispatcherSymbols[i].function) {
    341      NS_WARNING(nsPrintfCString("Failed to find speechd symbol for'%s'",
    342                                 kSpeechDispatcherSymbols[i].functionName)
    343                     .get());
    344      NotifyError(u"missing-symbol"_ns);
    345      return;
    346    }
    347  }
    348 
    349  mSpeechdClient =
    350      spd_open("firefox", "web speech api", "who", SPD_MODE_THREADED);
    351  if (!mSpeechdClient) {
    352    NS_WARNING("Failed to call spd_open");
    353    NotifyError(u"open-fail"_ns);
    354    return;
    355  }
    356 
    357  // Get all the voices from sapi and register in the SynthVoiceRegistry
    358  SPDVoice** list = spd_list_synthesis_voices(mSpeechdClient);
    359 
    360  mSpeechdClient->callback_begin = speechd_cb;
    361  mSpeechdClient->callback_end = speechd_cb;
    362  mSpeechdClient->callback_cancel = speechd_cb;
    363  mSpeechdClient->callback_pause = speechd_cb;
    364  mSpeechdClient->callback_resume = speechd_cb;
    365 
    366  spd_set_notification_on(mSpeechdClient, SPD_BEGIN);
    367  spd_set_notification_on(mSpeechdClient, SPD_END);
    368  spd_set_notification_on(mSpeechdClient, SPD_CANCEL);
    369 
    370  if (list != NULL) {
    371    for (int i = 0; list[i]; i++) {
    372      nsAutoString uri;
    373 
    374      uri.AssignLiteral(URI_PREFIX);
    375      nsAutoCString name;
    376      NS_EscapeURL(list[i]->name, -1,
    377                   esc_OnlyNonASCII | esc_Spaces | esc_AlwaysCopy, name);
    378      uri.Append(NS_ConvertUTF8toUTF16(name));
    379 
    380      uri.AppendLiteral("?");
    381 
    382      nsAutoCString lang(list[i]->language);
    383 
    384      uri.Append(NS_ConvertUTF8toUTF16(lang));
    385 
    386      mVoices.InsertOrUpdate(uri, MakeRefPtr<SpeechDispatcherVoice>(
    387                                      NS_ConvertUTF8toUTF16(list[i]->name),
    388                                      NS_ConvertUTF8toUTF16(lang)));
    389    }
    390  }
    391 
    392  if (mVoices.Count() == 0) {
    393    NotifyError(u"no-voices"_ns);
    394  }
    395 
    396  NS_DispatchToMainThread(
    397      NewRunnableMethod("dom::SpeechDispatcherService::RegisterVoices", this,
    398                        &SpeechDispatcherService::RegisterVoices));
    399 
    400  // mInitialized = true;
    401 }
    402 
    403 // private methods
    404 
    405 void SpeechDispatcherService::NotifyError(const nsString& aError) {
    406  if (!NS_IsMainThread()) {
    407    NS_DispatchToMainThread(NewRunnableMethod<const nsString>(
    408        "dom::SpeechDispatcherService::NotifyError", this,
    409        &SpeechDispatcherService::NotifyError, aError));
    410    return;
    411  }
    412 
    413  RefPtr<nsSynthVoiceRegistry> registry = nsSynthVoiceRegistry::GetInstance();
    414  DebugOnly<nsresult> rv = registry->NotifyVoicesError(aError);
    415 }
    416 
    417 void SpeechDispatcherService::RegisterVoices() {
    418  RefPtr<nsSynthVoiceRegistry> registry = nsSynthVoiceRegistry::GetInstance();
    419  for (const auto& entry : mVoices) {
    420    const RefPtr<SpeechDispatcherVoice>& voice = entry.GetData();
    421 
    422    // This service can only speak one utterance at a time, so we set
    423    // aQueuesUtterances to true in order to track global state and schedule
    424    // access to this service.
    425    DebugOnly<nsresult> rv =
    426        registry->AddVoice(this, entry.GetKey(), voice->mName, voice->mLanguage,
    427                           voice->mName.EqualsLiteral("default"), true);
    428 
    429    NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Failed to add voice");
    430  }
    431 
    432  mInitThread->Shutdown();
    433  mInitThread = nullptr;
    434 
    435  mInitialized = true;
    436 
    437  registry->NotifyVoicesChanged();
    438 }
    439 
    440 // nsIObserver
    441 
    442 NS_IMETHODIMP
    443 SpeechDispatcherService::Observe(nsISupports* aSubject, const char* aTopic,
    444                                 const char16_t* aData) {
    445  return NS_OK;
    446 }
    447 
    448 // nsISpeechService
    449 
    450 // TODO: Support SSML
    451 NS_IMETHODIMP
    452 SpeechDispatcherService::Speak(const nsAString& aText, const nsAString& aUri,
    453                               float aVolume, float aRate, float aPitch,
    454                               nsISpeechTask* aTask) {
    455  if (NS_WARN_IF(!mInitialized)) {
    456    return NS_ERROR_NOT_AVAILABLE;
    457  }
    458 
    459  RefPtr<SpeechDispatcherCallback> callback =
    460      new SpeechDispatcherCallback(aTask, this);
    461 
    462  bool found = false;
    463  SpeechDispatcherVoice* voice = mVoices.GetWeak(aUri, &found);
    464 
    465  if (NS_WARN_IF(!(found))) {
    466    return NS_ERROR_NOT_AVAILABLE;
    467  }
    468 
    469  spd_set_synthesis_voice(mSpeechdClient,
    470                          NS_ConvertUTF16toUTF8(voice->mName).get());
    471 
    472  // We provide a volume of 0.0 to 1.0, speech-dispatcher expects 0 - 100.
    473  spd_set_volume(mSpeechdClient, static_cast<int>(aVolume * 100));
    474 
    475  // aRate is a value of 0.1 (0.1x) to 10 (10x) with 1 (1x) being normal rate.
    476  // speechd expects -100 to 100 with 0 being normal rate.
    477  float rate = 0;
    478  if (aRate > 1) {
    479    // Each step to 100 is logarithmically distributed up to 2.5x.
    480    rate = log10(std::min(aRate, MAX_RATE)) / log10(MAX_RATE) * 100;
    481  } else if (aRate < 1) {
    482    // Each step to -100 is logarithmically distributed down to 0.5x.
    483    rate = log10(std::max(aRate, MIN_RATE)) / log10(MIN_RATE) * -100;
    484  }
    485 
    486  spd_set_voice_rate(mSpeechdClient, static_cast<int>(rate));
    487 
    488  // We provide a pitch of 0 to 2 with 1 being the default.
    489  // speech-dispatcher expects -100 to 100 with 0 being default.
    490  spd_set_voice_pitch(mSpeechdClient, static_cast<int>((aPitch - 1) * 100));
    491 
    492  nsresult rv = aTask->Setup(callback);
    493 
    494  if (NS_FAILED(rv)) {
    495    return rv;
    496  }
    497 
    498  if (aText.Length()) {
    499    int msg_id = spd_say(mSpeechdClient, SPD_MESSAGE,
    500                         NS_ConvertUTF16toUTF8(aText).get());
    501 
    502    if (msg_id < 0) {
    503      return NS_ERROR_FAILURE;
    504    }
    505 
    506    mCallbacks.InsertOrUpdate(msg_id, std::move(callback));
    507  } else {
    508    // Speech dispatcher does not work well with empty strings.
    509    // In that case, don't send empty string to speechd,
    510    // and just emulate a speechd start and end event.
    511    NS_DispatchToMainThread(NewRunnableMethod<SPDNotificationType>(
    512        "dom::SpeechDispatcherCallback::OnSpeechEvent", callback,
    513        &SpeechDispatcherCallback::OnSpeechEvent, SPD_EVENT_BEGIN));
    514 
    515    NS_DispatchToMainThread(NewRunnableMethod<SPDNotificationType>(
    516        "dom::SpeechDispatcherCallback::OnSpeechEvent", callback,
    517        &SpeechDispatcherCallback::OnSpeechEvent, SPD_EVENT_END));
    518  }
    519 
    520  return NS_OK;
    521 }
    522 
    523 SpeechDispatcherService* SpeechDispatcherService::GetInstance(bool create) {
    524  if (XRE_GetProcessType() != GeckoProcessType_Default) {
    525    MOZ_ASSERT(
    526        false,
    527        "SpeechDispatcherService can only be started on main gecko process");
    528    return nullptr;
    529  }
    530 
    531  if (!sSingleton && create) {
    532    sSingleton = new SpeechDispatcherService();
    533    sSingleton->Init();
    534    ClearOnShutdown(&sSingleton);
    535  }
    536 
    537  return sSingleton;
    538 }
    539 
    540 already_AddRefed<SpeechDispatcherService>
    541 SpeechDispatcherService::GetInstanceForService() {
    542  MOZ_ASSERT(NS_IsMainThread());
    543  RefPtr<SpeechDispatcherService> sapiService = GetInstance();
    544  return sapiService.forget();
    545 }
    546 
    547 void SpeechDispatcherService::EventNotify(uint32_t aMsgId, uint32_t aState) {
    548  SpeechDispatcherCallback* callback = mCallbacks.GetWeak(aMsgId);
    549 
    550  if (callback) {
    551    if (callback->OnSpeechEvent((SPDNotificationType)aState)) {
    552      mCallbacks.Remove(aMsgId);
    553    }
    554  }
    555 }
    556 
    557 }  // namespace mozilla::dom