SpeechDispatcherService.cpp (17139B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #include "SpeechDispatcherService.h" 8 9 #include <math.h> 10 11 #include "mozilla/ClearOnShutdown.h" 12 #include "mozilla/Preferences.h" 13 #include "mozilla/StaticPrefs_media.h" 14 #include "mozilla/dom/nsSpeechTask.h" 15 #include "mozilla/dom/nsSynthVoiceRegistry.h" 16 #include "nsEscape.h" 17 #include "nsISupports.h" 18 #include "nsPrintfCString.h" 19 #include "nsReadableUtils.h" 20 #include "nsServiceManagerUtils.h" 21 #include "nsThreadUtils.h" 22 #include "nsXULAppAPI.h" 23 #include "prlink.h" 24 25 #define URI_PREFIX "urn:moz-tts:speechd:" 26 27 #define MAX_RATE static_cast<float>(2.5) 28 #define MIN_RATE static_cast<float>(0.5) 29 30 // Some structures for libspeechd 31 typedef enum { 32 SPD_EVENT_BEGIN, 33 SPD_EVENT_END, 34 SPD_EVENT_INDEX_MARK, 35 SPD_EVENT_CANCEL, 36 SPD_EVENT_PAUSE, 37 SPD_EVENT_RESUME 38 } SPDNotificationType; 39 40 typedef enum { 41 SPD_BEGIN = 1, 42 SPD_END = 2, 43 SPD_INDEX_MARKS = 4, 44 SPD_CANCEL = 8, 45 SPD_PAUSE = 16, 46 SPD_RESUME = 32, 47 48 SPD_ALL = 0x3f 49 } SPDNotification; 50 51 typedef enum { SPD_MODE_SINGLE = 0, SPD_MODE_THREADED = 1 } SPDConnectionMode; 52 53 typedef void (*SPDCallback)(size_t msg_id, size_t client_id, 54 SPDNotificationType state); 55 56 typedef void (*SPDCallbackIM)(size_t msg_id, size_t client_id, 57 SPDNotificationType state, char* index_mark); 58 59 struct SPDConnection { 60 SPDCallback callback_begin; 61 SPDCallback callback_end; 62 SPDCallback callback_cancel; 63 SPDCallback callback_pause; 64 SPDCallback callback_resume; 65 SPDCallbackIM callback_im; 66 67 /* partial, more private fields in structure */ 68 }; 69 70 struct SPDVoice { 71 char* name; 72 char* language; 73 char* variant; 74 }; 75 76 typedef enum { 77 SPD_IMPORTANT = 1, 78 SPD_MESSAGE = 2, 79 SPD_TEXT = 3, 80 SPD_NOTIFICATION = 4, 81 SPD_PROGRESS = 5 82 } SPDPriority; 83 84 #define SPEECHD_FUNCTIONS \ 85 FUNC(spd_open, SPDConnection*, \ 86 (const char*, const char*, const char*, SPDConnectionMode)) \ 87 FUNC(spd_close, void, (SPDConnection*)) \ 88 FUNC(spd_list_synthesis_voices, SPDVoice**, (SPDConnection*)) \ 89 FUNC(spd_say, int, (SPDConnection*, SPDPriority, const char*)) \ 90 FUNC(spd_cancel, int, (SPDConnection*)) \ 91 FUNC(spd_set_volume, int, (SPDConnection*, int)) \ 92 FUNC(spd_set_voice_rate, int, (SPDConnection*, int)) \ 93 FUNC(spd_set_voice_pitch, int, (SPDConnection*, int)) \ 94 FUNC(spd_set_synthesis_voice, int, (SPDConnection*, const char*)) \ 95 FUNC(spd_set_notification_on, int, (SPDConnection*, SPDNotification)) 96 97 #define FUNC(name, type, params) \ 98 typedef type(*_##name##_fn) params; \ 99 static _##name##_fn _##name; 100 101 SPEECHD_FUNCTIONS 102 103 #undef FUNC 104 105 #define spd_open _spd_open 106 #define spd_close _spd_close 107 #define spd_list_synthesis_voices _spd_list_synthesis_voices 108 #define spd_say _spd_say 109 #define spd_cancel _spd_cancel 110 #define spd_set_volume _spd_set_volume 111 #define spd_set_voice_rate _spd_set_voice_rate 112 #define spd_set_voice_pitch _spd_set_voice_pitch 113 #define spd_set_synthesis_voice _spd_set_synthesis_voice 114 #define spd_set_notification_on _spd_set_notification_on 115 116 static PRLibrary* speechdLib = nullptr; 117 118 typedef void (*nsSpeechDispatcherFunc)(); 119 struct nsSpeechDispatcherDynamicFunction { 120 const char* functionName; 121 nsSpeechDispatcherFunc* function; 122 }; 123 124 namespace mozilla::dom { 125 126 StaticRefPtr<SpeechDispatcherService> SpeechDispatcherService::sSingleton; 127 128 class SpeechDispatcherVoice { 129 public: 130 SpeechDispatcherVoice(const nsAString& aName, const nsAString& aLanguage) 131 : mName(aName), mLanguage(aLanguage) {} 132 133 NS_INLINE_DECL_THREADSAFE_REFCOUNTING(SpeechDispatcherVoice) 134 135 // Voice name 136 nsString mName; 137 138 // Voice language, in BCP-47 syntax 139 nsString mLanguage; 140 141 private: 142 ~SpeechDispatcherVoice() = default; 143 }; 144 145 class SpeechDispatcherCallback final : public nsISpeechTaskCallback { 146 public: 147 SpeechDispatcherCallback(nsISpeechTask* aTask, 148 SpeechDispatcherService* aService) 149 : mTask(aTask), mService(aService) {} 150 151 NS_DECL_CYCLE_COLLECTING_ISUPPORTS 152 NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(SpeechDispatcherCallback, 153 nsISpeechTaskCallback) 154 155 NS_DECL_NSISPEECHTASKCALLBACK 156 157 bool OnSpeechEvent(SPDNotificationType state); 158 159 private: 160 ~SpeechDispatcherCallback() = default; 161 162 // This pointer is used to dispatch events 163 nsCOMPtr<nsISpeechTask> mTask; 164 165 // By holding a strong reference to the service we guarantee that it won't be 166 // destroyed before this runnable. 167 RefPtr<SpeechDispatcherService> mService; 168 169 TimeStamp mStartTime; 170 }; 171 172 NS_IMPL_CYCLE_COLLECTION(SpeechDispatcherCallback, mTask); 173 174 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechDispatcherCallback) 175 NS_INTERFACE_MAP_ENTRY(nsISpeechTaskCallback) 176 NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTaskCallback) 177 NS_INTERFACE_MAP_END 178 179 NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechDispatcherCallback) 180 NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechDispatcherCallback) 181 182 NS_IMETHODIMP 183 SpeechDispatcherCallback::OnPause() { 184 // XXX: Speech dispatcher does not pause immediately, but waits for the speech 185 // to reach an index mark so that it could resume from that offset. 186 // There is no support for word or sentence boundaries, so index marks would 187 // only occur in explicit SSML marks, and we don't support that yet. 188 // What in actuality happens, is that if you call spd_pause(), it will speak 189 // the utterance in its entirety, dispatch an end event, and then put speechd 190 // in a 'paused' state. Since it is after the utterance ended, we don't get 191 // that state change, and our speech api is in an unrecoverable state. 192 // So, since it is useless anyway, I am not implementing pause. 193 return NS_OK; 194 } 195 196 NS_IMETHODIMP 197 SpeechDispatcherCallback::OnResume() { 198 // XXX: Unsupported, see OnPause(). 199 return NS_OK; 200 } 201 202 NS_IMETHODIMP 203 SpeechDispatcherCallback::OnCancel() { 204 if (spd_cancel(mService->mSpeechdClient) < 0) { 205 return NS_ERROR_FAILURE; 206 } 207 208 return NS_OK; 209 } 210 211 NS_IMETHODIMP 212 SpeechDispatcherCallback::OnVolumeChanged(float aVolume) { 213 // XXX: This currently does not change the volume mid-utterance, but it 214 // doesn't do anything bad either. So we could put this here with the hopes 215 // that speechd supports this in the future. 216 if (spd_set_volume(mService->mSpeechdClient, 217 static_cast<int>(aVolume * 100)) < 0) { 218 return NS_ERROR_FAILURE; 219 } 220 221 return NS_OK; 222 } 223 224 bool SpeechDispatcherCallback::OnSpeechEvent(SPDNotificationType state) { 225 bool remove = false; 226 227 switch (state) { 228 case SPD_EVENT_BEGIN: 229 mStartTime = TimeStamp::Now(); 230 mTask->DispatchStart(); 231 break; 232 233 case SPD_EVENT_PAUSE: 234 mTask->DispatchPause((TimeStamp::Now() - mStartTime).ToSeconds(), 0); 235 break; 236 237 case SPD_EVENT_RESUME: 238 mTask->DispatchResume((TimeStamp::Now() - mStartTime).ToSeconds(), 0); 239 break; 240 241 case SPD_EVENT_CANCEL: 242 case SPD_EVENT_END: 243 mTask->DispatchEnd((TimeStamp::Now() - mStartTime).ToSeconds(), 0); 244 remove = true; 245 break; 246 247 case SPD_EVENT_INDEX_MARK: 248 // Not yet supported 249 break; 250 251 default: 252 break; 253 } 254 255 return remove; 256 } 257 258 static void speechd_cb(size_t msg_id, size_t client_id, 259 SPDNotificationType state) { 260 SpeechDispatcherService* service = 261 SpeechDispatcherService::GetInstance(false); 262 263 if (service) { 264 NS_DispatchToMainThread(NewRunnableMethod<uint32_t, SPDNotificationType>( 265 "dom::SpeechDispatcherService::EventNotify", service, 266 &SpeechDispatcherService::EventNotify, static_cast<uint32_t>(msg_id), 267 state)); 268 } 269 } 270 271 NS_INTERFACE_MAP_BEGIN(SpeechDispatcherService) 272 NS_INTERFACE_MAP_ENTRY(nsISpeechService) 273 NS_INTERFACE_MAP_ENTRY(nsIObserver) 274 NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIObserver) 275 NS_INTERFACE_MAP_END 276 277 NS_IMPL_ADDREF(SpeechDispatcherService) 278 NS_IMPL_RELEASE(SpeechDispatcherService) 279 280 SpeechDispatcherService::SpeechDispatcherService() 281 : mInitialized(false), mSpeechdClient(nullptr) {} 282 283 void SpeechDispatcherService::Init() { 284 if (!StaticPrefs::media_webspeech_synth_enabled() || 285 Preferences::GetBool("media.webspeech.synth.test")) { 286 return; 287 } 288 289 // While speech dispatcher has a "threaded" mode, only spd_say() is async. 290 // Since synchronous socket i/o could impact startup time, we do 291 // initialization in a separate thread. 292 DebugOnly<nsresult> rv = 293 NS_NewNamedThread("speechd init", getter_AddRefs(mInitThread)); 294 MOZ_ASSERT(NS_SUCCEEDED(rv)); 295 rv = mInitThread->Dispatch( 296 NewRunnableMethod("dom::SpeechDispatcherService::Setup", this, 297 &SpeechDispatcherService::Setup), 298 NS_DISPATCH_NORMAL); 299 MOZ_ASSERT(NS_SUCCEEDED(rv)); 300 } 301 302 SpeechDispatcherService::~SpeechDispatcherService() { 303 if (mInitThread) { 304 mInitThread->Shutdown(); 305 } 306 307 if (mSpeechdClient) { 308 spd_close(mSpeechdClient); 309 } 310 } 311 312 void SpeechDispatcherService::Setup() { 313 #define FUNC(name, type, params) {#name, (nsSpeechDispatcherFunc*)&_##name}, 314 static const nsSpeechDispatcherDynamicFunction kSpeechDispatcherSymbols[] = { 315 SPEECHD_FUNCTIONS}; 316 #undef FUNC 317 318 MOZ_ASSERT(!mInitialized); 319 320 speechdLib = PR_LoadLibrary("libspeechd.so.2"); 321 322 if (!speechdLib) { 323 NS_WARNING("Failed to load speechd library"); 324 NotifyError(u"lib-missing"_ns); 325 return; 326 } 327 328 if (!PR_FindFunctionSymbol(speechdLib, "spd_get_volume")) { 329 // There is no version getter function, so we rely on a symbol that was 330 // introduced in release 0.8.2 in order to check for ABI compatibility. 331 NS_WARNING("Unsupported version of speechd detected"); 332 NotifyError(u"lib-too-old"_ns); 333 return; 334 } 335 336 for (uint32_t i = 0; i < std::size(kSpeechDispatcherSymbols); i++) { 337 *kSpeechDispatcherSymbols[i].function = PR_FindFunctionSymbol( 338 speechdLib, kSpeechDispatcherSymbols[i].functionName); 339 340 if (!*kSpeechDispatcherSymbols[i].function) { 341 NS_WARNING(nsPrintfCString("Failed to find speechd symbol for'%s'", 342 kSpeechDispatcherSymbols[i].functionName) 343 .get()); 344 NotifyError(u"missing-symbol"_ns); 345 return; 346 } 347 } 348 349 mSpeechdClient = 350 spd_open("firefox", "web speech api", "who", SPD_MODE_THREADED); 351 if (!mSpeechdClient) { 352 NS_WARNING("Failed to call spd_open"); 353 NotifyError(u"open-fail"_ns); 354 return; 355 } 356 357 // Get all the voices from sapi and register in the SynthVoiceRegistry 358 SPDVoice** list = spd_list_synthesis_voices(mSpeechdClient); 359 360 mSpeechdClient->callback_begin = speechd_cb; 361 mSpeechdClient->callback_end = speechd_cb; 362 mSpeechdClient->callback_cancel = speechd_cb; 363 mSpeechdClient->callback_pause = speechd_cb; 364 mSpeechdClient->callback_resume = speechd_cb; 365 366 spd_set_notification_on(mSpeechdClient, SPD_BEGIN); 367 spd_set_notification_on(mSpeechdClient, SPD_END); 368 spd_set_notification_on(mSpeechdClient, SPD_CANCEL); 369 370 if (list != NULL) { 371 for (int i = 0; list[i]; i++) { 372 nsAutoString uri; 373 374 uri.AssignLiteral(URI_PREFIX); 375 nsAutoCString name; 376 NS_EscapeURL(list[i]->name, -1, 377 esc_OnlyNonASCII | esc_Spaces | esc_AlwaysCopy, name); 378 uri.Append(NS_ConvertUTF8toUTF16(name)); 379 380 uri.AppendLiteral("?"); 381 382 nsAutoCString lang(list[i]->language); 383 384 uri.Append(NS_ConvertUTF8toUTF16(lang)); 385 386 mVoices.InsertOrUpdate(uri, MakeRefPtr<SpeechDispatcherVoice>( 387 NS_ConvertUTF8toUTF16(list[i]->name), 388 NS_ConvertUTF8toUTF16(lang))); 389 } 390 } 391 392 if (mVoices.Count() == 0) { 393 NotifyError(u"no-voices"_ns); 394 } 395 396 NS_DispatchToMainThread( 397 NewRunnableMethod("dom::SpeechDispatcherService::RegisterVoices", this, 398 &SpeechDispatcherService::RegisterVoices)); 399 400 // mInitialized = true; 401 } 402 403 // private methods 404 405 void SpeechDispatcherService::NotifyError(const nsString& aError) { 406 if (!NS_IsMainThread()) { 407 NS_DispatchToMainThread(NewRunnableMethod<const nsString>( 408 "dom::SpeechDispatcherService::NotifyError", this, 409 &SpeechDispatcherService::NotifyError, aError)); 410 return; 411 } 412 413 RefPtr<nsSynthVoiceRegistry> registry = nsSynthVoiceRegistry::GetInstance(); 414 DebugOnly<nsresult> rv = registry->NotifyVoicesError(aError); 415 } 416 417 void SpeechDispatcherService::RegisterVoices() { 418 RefPtr<nsSynthVoiceRegistry> registry = nsSynthVoiceRegistry::GetInstance(); 419 for (const auto& entry : mVoices) { 420 const RefPtr<SpeechDispatcherVoice>& voice = entry.GetData(); 421 422 // This service can only speak one utterance at a time, so we set 423 // aQueuesUtterances to true in order to track global state and schedule 424 // access to this service. 425 DebugOnly<nsresult> rv = 426 registry->AddVoice(this, entry.GetKey(), voice->mName, voice->mLanguage, 427 voice->mName.EqualsLiteral("default"), true); 428 429 NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Failed to add voice"); 430 } 431 432 mInitThread->Shutdown(); 433 mInitThread = nullptr; 434 435 mInitialized = true; 436 437 registry->NotifyVoicesChanged(); 438 } 439 440 // nsIObserver 441 442 NS_IMETHODIMP 443 SpeechDispatcherService::Observe(nsISupports* aSubject, const char* aTopic, 444 const char16_t* aData) { 445 return NS_OK; 446 } 447 448 // nsISpeechService 449 450 // TODO: Support SSML 451 NS_IMETHODIMP 452 SpeechDispatcherService::Speak(const nsAString& aText, const nsAString& aUri, 453 float aVolume, float aRate, float aPitch, 454 nsISpeechTask* aTask) { 455 if (NS_WARN_IF(!mInitialized)) { 456 return NS_ERROR_NOT_AVAILABLE; 457 } 458 459 RefPtr<SpeechDispatcherCallback> callback = 460 new SpeechDispatcherCallback(aTask, this); 461 462 bool found = false; 463 SpeechDispatcherVoice* voice = mVoices.GetWeak(aUri, &found); 464 465 if (NS_WARN_IF(!(found))) { 466 return NS_ERROR_NOT_AVAILABLE; 467 } 468 469 spd_set_synthesis_voice(mSpeechdClient, 470 NS_ConvertUTF16toUTF8(voice->mName).get()); 471 472 // We provide a volume of 0.0 to 1.0, speech-dispatcher expects 0 - 100. 473 spd_set_volume(mSpeechdClient, static_cast<int>(aVolume * 100)); 474 475 // aRate is a value of 0.1 (0.1x) to 10 (10x) with 1 (1x) being normal rate. 476 // speechd expects -100 to 100 with 0 being normal rate. 477 float rate = 0; 478 if (aRate > 1) { 479 // Each step to 100 is logarithmically distributed up to 2.5x. 480 rate = log10(std::min(aRate, MAX_RATE)) / log10(MAX_RATE) * 100; 481 } else if (aRate < 1) { 482 // Each step to -100 is logarithmically distributed down to 0.5x. 483 rate = log10(std::max(aRate, MIN_RATE)) / log10(MIN_RATE) * -100; 484 } 485 486 spd_set_voice_rate(mSpeechdClient, static_cast<int>(rate)); 487 488 // We provide a pitch of 0 to 2 with 1 being the default. 489 // speech-dispatcher expects -100 to 100 with 0 being default. 490 spd_set_voice_pitch(mSpeechdClient, static_cast<int>((aPitch - 1) * 100)); 491 492 nsresult rv = aTask->Setup(callback); 493 494 if (NS_FAILED(rv)) { 495 return rv; 496 } 497 498 if (aText.Length()) { 499 int msg_id = spd_say(mSpeechdClient, SPD_MESSAGE, 500 NS_ConvertUTF16toUTF8(aText).get()); 501 502 if (msg_id < 0) { 503 return NS_ERROR_FAILURE; 504 } 505 506 mCallbacks.InsertOrUpdate(msg_id, std::move(callback)); 507 } else { 508 // Speech dispatcher does not work well with empty strings. 509 // In that case, don't send empty string to speechd, 510 // and just emulate a speechd start and end event. 511 NS_DispatchToMainThread(NewRunnableMethod<SPDNotificationType>( 512 "dom::SpeechDispatcherCallback::OnSpeechEvent", callback, 513 &SpeechDispatcherCallback::OnSpeechEvent, SPD_EVENT_BEGIN)); 514 515 NS_DispatchToMainThread(NewRunnableMethod<SPDNotificationType>( 516 "dom::SpeechDispatcherCallback::OnSpeechEvent", callback, 517 &SpeechDispatcherCallback::OnSpeechEvent, SPD_EVENT_END)); 518 } 519 520 return NS_OK; 521 } 522 523 SpeechDispatcherService* SpeechDispatcherService::GetInstance(bool create) { 524 if (XRE_GetProcessType() != GeckoProcessType_Default) { 525 MOZ_ASSERT( 526 false, 527 "SpeechDispatcherService can only be started on main gecko process"); 528 return nullptr; 529 } 530 531 if (!sSingleton && create) { 532 sSingleton = new SpeechDispatcherService(); 533 sSingleton->Init(); 534 ClearOnShutdown(&sSingleton); 535 } 536 537 return sSingleton; 538 } 539 540 already_AddRefed<SpeechDispatcherService> 541 SpeechDispatcherService::GetInstanceForService() { 542 MOZ_ASSERT(NS_IsMainThread()); 543 RefPtr<SpeechDispatcherService> sapiService = GetInstance(); 544 return sapiService.forget(); 545 } 546 547 void SpeechDispatcherService::EventNotify(uint32_t aMsgId, uint32_t aState) { 548 SpeechDispatcherCallback* callback = mCallbacks.GetWeak(aMsgId); 549 550 if (callback) { 551 if (callback->OnSpeechEvent((SPDNotificationType)aState)) { 552 mCallbacks.Remove(aMsgId); 553 } 554 } 555 } 556 557 } // namespace mozilla::dom