OSXSpeechSynthesizerService.mm (13865B)
1 /* -*- Mode: Objective-C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 * 2 -*- */ 3 /* vim: set ts=2 sw=2 et tw=80: */ 4 /* This Source Code Form is subject to the terms of the Mozilla Public 5 * License, v. 2.0. If a copy of the MPL was not distributed with this 6 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 7 8 #include "OSXSpeechSynthesizerService.h" 9 #include "mozilla/Assertions.h" 10 #include "mozilla/ClearOnShutdown.h" 11 #include "mozilla/Preferences.h" 12 #include "mozilla/StaticPrefs_media.h" 13 #include "mozilla/dom/nsSpeechTask.h" 14 #include "mozilla/dom/nsSynthVoiceRegistry.h" 15 #include "nsCocoaUtils.h" 16 #include "nsISupports.h" 17 #include "nsIThread.h" 18 #include "nsObjCExceptions.h" 19 #include "nsServiceManagerUtils.h" 20 #include "nsThreadUtils.h" 21 #include "nsXULAppAPI.h" 22 23 #import <Cocoa/Cocoa.h> 24 25 @class SpeechDelegate; 26 27 // We can escape the default delimiters ("[[" and "]]") by temporarily 28 // changing the delimiters just before they appear, and changing them back 29 // just after. 30 #define DLIM_ESCAPE_START "[[dlim (( ))]]" 31 #define DLIM_ESCAPE_END "((dlim [[ ]]))" 32 33 using namespace mozilla; 34 35 class SpeechTaskCallback final : public nsISpeechTaskCallback { 36 public: 37 SpeechTaskCallback(nsISpeechTask* aTask, NSSpeechSynthesizer* aSynth, 38 const nsTArray<size_t>& aOffsets); 39 40 NS_DECL_CYCLE_COLLECTING_ISUPPORTS 41 NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(SpeechTaskCallback, 42 nsISpeechTaskCallback) 43 44 NS_DECL_NSISPEECHTASKCALLBACK 45 46 void OnWillSpeakWord(uint32_t aIndex, uint32_t aLength); 47 void OnError(uint32_t aIndex); 48 void OnDidFinishSpeaking(); 49 50 private: 51 virtual ~SpeechTaskCallback(); 52 53 float GetTimeDurationFromStart(); 54 55 nsCOMPtr<nsISpeechTask> mTask; 56 NSSpeechSynthesizer* mSpeechSynthesizer; 57 SpeechDelegate* mDelegate; 58 TimeStamp mStartingTime; 59 uint32_t mCurrentIndex; 60 nsTArray<size_t> mOffsets; 61 }; 62 63 @interface SpeechDelegate : NSObject <NSSpeechSynthesizerDelegate> { 64 @private 65 SpeechTaskCallback* mCallback; 66 } 67 68 - (id)initWithCallback:(SpeechTaskCallback*)aCallback; 69 @end 70 71 @implementation SpeechDelegate 72 - (id)initWithCallback:(SpeechTaskCallback*)aCallback { 73 [super init]; 74 mCallback = aCallback; 75 return self; 76 } 77 78 - (void)speechSynthesizer:(NSSpeechSynthesizer*)aSender 79 willSpeakWord:(NSRange)aRange 80 ofString:(NSString*)aString { 81 mCallback->OnWillSpeakWord(aRange.location, aRange.length); 82 } 83 84 - (void)speechSynthesizer:(NSSpeechSynthesizer*)aSender 85 didFinishSpeaking:(BOOL)aFinishedSpeaking { 86 mCallback->OnDidFinishSpeaking(); 87 } 88 89 - (void)speechSynthesizer:(NSSpeechSynthesizer*)aSender 90 didEncounterErrorAtIndex:(NSUInteger)aCharacterIndex 91 ofString:(NSString*)aString 92 message:(NSString*)aMessage { 93 mCallback->OnError(aCharacterIndex); 94 } 95 @end 96 97 NS_IMPL_CYCLE_COLLECTION(SpeechTaskCallback, mTask); 98 99 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechTaskCallback) 100 NS_INTERFACE_MAP_ENTRY(nsISpeechTaskCallback) 101 NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTaskCallback) 102 NS_INTERFACE_MAP_END 103 104 NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechTaskCallback) 105 NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechTaskCallback) 106 107 SpeechTaskCallback::SpeechTaskCallback(nsISpeechTask* aTask, 108 NSSpeechSynthesizer* aSynth, 109 const nsTArray<size_t>& aOffsets) 110 : mTask(aTask), 111 mSpeechSynthesizer(aSynth), 112 mCurrentIndex(0), 113 mOffsets(aOffsets.Clone()) { 114 mDelegate = [[SpeechDelegate alloc] initWithCallback:this]; 115 [mSpeechSynthesizer setDelegate:mDelegate]; 116 mStartingTime = TimeStamp::Now(); 117 } 118 119 SpeechTaskCallback::~SpeechTaskCallback() { 120 [mSpeechSynthesizer setDelegate:nil]; 121 [mDelegate release]; 122 [mSpeechSynthesizer release]; 123 } 124 125 NS_IMETHODIMP 126 SpeechTaskCallback::OnCancel() { 127 NS_OBJC_BEGIN_TRY_BLOCK_RETURN; 128 129 [mSpeechSynthesizer stopSpeaking]; 130 return NS_OK; 131 132 NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE); 133 } 134 135 NS_IMETHODIMP 136 SpeechTaskCallback::OnPause() { 137 NS_OBJC_BEGIN_TRY_BLOCK_RETURN; 138 139 [mSpeechSynthesizer pauseSpeakingAtBoundary:NSSpeechImmediateBoundary]; 140 if (!mTask) { 141 // When calling pause() on child porcess, it may not receive end event 142 // from chrome process yet. 143 return NS_ERROR_FAILURE; 144 } 145 mTask->DispatchPause(GetTimeDurationFromStart(), mCurrentIndex); 146 return NS_OK; 147 148 NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE); 149 } 150 151 NS_IMETHODIMP 152 SpeechTaskCallback::OnResume() { 153 NS_OBJC_BEGIN_TRY_BLOCK_RETURN; 154 155 [mSpeechSynthesizer continueSpeaking]; 156 if (!mTask) { 157 // When calling resume() on child porcess, it may not receive end event 158 // from chrome process yet. 159 return NS_ERROR_FAILURE; 160 } 161 mTask->DispatchResume(GetTimeDurationFromStart(), mCurrentIndex); 162 return NS_OK; 163 164 NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE); 165 } 166 167 NS_IMETHODIMP 168 SpeechTaskCallback::OnVolumeChanged(float aVolume) { 169 NS_OBJC_BEGIN_TRY_BLOCK_RETURN; 170 171 [mSpeechSynthesizer setObject:[NSNumber numberWithFloat:aVolume] 172 forProperty:NSSpeechVolumeProperty 173 error:nil]; 174 return NS_OK; 175 176 NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE); 177 } 178 179 float SpeechTaskCallback::GetTimeDurationFromStart() { 180 TimeDuration duration = TimeStamp::Now() - mStartingTime; 181 return duration.ToSeconds(); 182 } 183 184 void SpeechTaskCallback::OnWillSpeakWord(uint32_t aIndex, uint32_t aLength) { 185 mCurrentIndex = aIndex < mOffsets.Length() ? mOffsets[aIndex] : mCurrentIndex; 186 if (!mTask) { 187 return; 188 } 189 mTask->DispatchBoundary(u"word"_ns, GetTimeDurationFromStart(), mCurrentIndex, 190 aLength, 1); 191 } 192 193 void SpeechTaskCallback::OnError(uint32_t aIndex) { 194 if (!mTask) { 195 return; 196 } 197 mTask->DispatchError(GetTimeDurationFromStart(), aIndex); 198 } 199 200 void SpeechTaskCallback::OnDidFinishSpeaking() { 201 mTask->DispatchEnd(GetTimeDurationFromStart(), mCurrentIndex); 202 // no longer needed 203 [mSpeechSynthesizer setDelegate:nil]; 204 mTask = nullptr; 205 } 206 207 namespace mozilla { 208 namespace dom { 209 210 struct OSXVoice { 211 OSXVoice() : mIsDefault(false) {} 212 213 nsString mUri; 214 nsString mName; 215 nsString mLocale; 216 bool mIsDefault; 217 }; 218 219 class RegisterVoicesRunnable final : public Runnable { 220 public: 221 RegisterVoicesRunnable(OSXSpeechSynthesizerService* aSpeechService, 222 nsTArray<OSXVoice>& aList) 223 : Runnable("RegisterVoicesRunnable"), 224 mSpeechService(aSpeechService), 225 mVoices(aList) {} 226 227 NS_IMETHOD Run() override; 228 229 private: 230 ~RegisterVoicesRunnable() override = default; 231 232 // This runnable always use sync mode. It is unnecesarry to reference object 233 OSXSpeechSynthesizerService* mSpeechService; 234 nsTArray<OSXVoice>& mVoices; 235 }; 236 237 NS_IMETHODIMP 238 RegisterVoicesRunnable::Run() { 239 nsresult rv; 240 nsCOMPtr<nsISynthVoiceRegistry> registry = 241 do_GetService(NS_SYNTHVOICEREGISTRY_CONTRACTID, &rv); 242 if (!registry) { 243 return rv; 244 } 245 246 for (OSXVoice voice : mVoices) { 247 rv = registry->AddVoice(mSpeechService, voice.mUri, voice.mName, 248 voice.mLocale, true, false); 249 if (NS_WARN_IF(NS_FAILED(rv))) { 250 continue; 251 } 252 253 if (voice.mIsDefault) { 254 registry->SetDefaultVoice(voice.mUri, true); 255 } 256 } 257 258 registry->NotifyVoicesChanged(); 259 260 return NS_OK; 261 } 262 263 class EnumVoicesRunnable final : public Runnable { 264 public: 265 explicit EnumVoicesRunnable(OSXSpeechSynthesizerService* aSpeechService) 266 : Runnable("EnumVoicesRunnable"), mSpeechService(aSpeechService) {} 267 268 NS_IMETHOD Run() override; 269 270 private: 271 ~EnumVoicesRunnable() override = default; 272 273 RefPtr<OSXSpeechSynthesizerService> mSpeechService; 274 }; 275 276 NS_IMETHODIMP 277 EnumVoicesRunnable::Run() { 278 NS_OBJC_BEGIN_TRY_BLOCK_RETURN; 279 280 AutoTArray<OSXVoice, 64> list; 281 282 NSArray* voices = [NSSpeechSynthesizer availableVoices]; 283 NSString* defaultVoice = [NSSpeechSynthesizer defaultVoice]; 284 285 for (NSString* voice in voices) { 286 OSXVoice item; 287 288 NSDictionary* attr = [NSSpeechSynthesizer attributesForVoice:voice]; 289 290 nsAutoString identifier; 291 nsCocoaUtils::GetStringForNSString([attr objectForKey:NSVoiceIdentifier], 292 identifier); 293 294 nsCocoaUtils::GetStringForNSString([attr objectForKey:NSVoiceName], 295 item.mName); 296 297 nsCocoaUtils::GetStringForNSString( 298 [attr objectForKey:NSVoiceLocaleIdentifier], item.mLocale); 299 item.mLocale.ReplaceChar('_', '-'); 300 301 item.mUri.AssignLiteral("urn:moz-tts:osx:"); 302 item.mUri.Append(identifier); 303 304 if ([voice isEqualToString:defaultVoice]) { 305 item.mIsDefault = true; 306 } 307 308 list.AppendElement(item); 309 } 310 311 RefPtr<RegisterVoicesRunnable> runnable = 312 new RegisterVoicesRunnable(mSpeechService, list); 313 NS_DispatchAndSpinEventLoopUntilComplete("EnumVoicesRunnable"_ns, 314 GetMainThreadSerialEventTarget(), 315 runnable.forget()); 316 317 return NS_OK; 318 319 NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE); 320 } 321 322 StaticRefPtr<OSXSpeechSynthesizerService> 323 OSXSpeechSynthesizerService::sSingleton; 324 325 NS_INTERFACE_MAP_BEGIN(OSXSpeechSynthesizerService) 326 NS_INTERFACE_MAP_ENTRY(nsISpeechService) 327 NS_INTERFACE_MAP_ENTRY(nsIObserver) 328 NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechService) 329 NS_INTERFACE_MAP_END 330 331 NS_IMPL_ADDREF(OSXSpeechSynthesizerService) 332 NS_IMPL_RELEASE(OSXSpeechSynthesizerService) 333 334 OSXSpeechSynthesizerService::OSXSpeechSynthesizerService() 335 : mInitialized(false) {} 336 337 bool OSXSpeechSynthesizerService::Init() { 338 if (Preferences::GetBool("media.webspeech.synth.test") || 339 !StaticPrefs::media_webspeech_synth_enabled()) { 340 // When test is enabled, we shouldn't add OS backend (Bug 1160844) 341 return false; 342 } 343 344 nsCOMPtr<nsIThread> thread; 345 if (NS_FAILED(NS_NewNamedThread("SpeechWorker", getter_AddRefs(thread)))) { 346 return false; 347 } 348 349 // Get all the voices and register in the SynthVoiceRegistry 350 nsCOMPtr<nsIRunnable> runnable = new EnumVoicesRunnable(this); 351 thread->Dispatch(runnable, NS_DISPATCH_NORMAL); 352 353 mInitialized = true; 354 return true; 355 } 356 357 NS_IMETHODIMP 358 OSXSpeechSynthesizerService::Speak(const nsAString& aText, 359 const nsAString& aUri, float aVolume, 360 float aRate, float aPitch, 361 nsISpeechTask* aTask) { 362 NS_OBJC_BEGIN_TRY_BLOCK_RETURN; 363 364 MOZ_ASSERT(StringBeginsWith(aUri, u"urn:moz-tts:osx:"_ns), 365 "OSXSpeechSynthesizerService doesn't allow this voice URI"); 366 367 NSSpeechSynthesizer* synth = [[NSSpeechSynthesizer alloc] init]; 368 // strlen("urn:moz-tts:osx:") == 16 369 NSString* identifier = nsCocoaUtils::ToNSString(Substring(aUri, 16)); 370 [synth setVoice:identifier]; 371 372 // default rate is 180-220 373 [synth setObject:[NSNumber numberWithInt:aRate * 200] 374 forProperty:NSSpeechRateProperty 375 error:nil]; 376 // volume allows 0.0-1.0 377 [synth setObject:[NSNumber numberWithFloat:aVolume] 378 forProperty:NSSpeechVolumeProperty 379 error:nil]; 380 // Use default pitch value to calculate this 381 NSNumber* defaultPitch = [synth objectForProperty:NSSpeechPitchBaseProperty 382 error:nil]; 383 if (defaultPitch) { 384 int newPitch = [defaultPitch intValue] * (aPitch / 2 + 0.5); 385 [synth setObject:[NSNumber numberWithInt:newPitch] 386 forProperty:NSSpeechPitchBaseProperty 387 error:nil]; 388 } 389 390 nsAutoString escapedText; 391 // We need to map the the offsets from the given text to the escaped text. 392 // The index of the offsets array is the position in the escaped text, 393 // the element value is the position in the user-supplied text. 394 nsTArray<size_t> offsets; 395 offsets.SetCapacity(aText.Length()); 396 397 // This loop looks for occurances of "[[" or "]]", escapes them, and 398 // populates the offsets array to supply a map to the original offsets. 399 for (size_t i = 0; i < aText.Length(); i++) { 400 if (aText.Length() > i + 1 && ((aText[i] == ']' && aText[i + 1] == ']') || 401 (aText[i] == '[' && aText[i + 1] == '['))) { 402 escapedText.AppendLiteral(DLIM_ESCAPE_START); 403 offsets.AppendElements(strlen(DLIM_ESCAPE_START)); 404 escapedText.Append(aText[i]); 405 offsets.AppendElement(i); 406 escapedText.Append(aText[++i]); 407 offsets.AppendElement(i); 408 escapedText.AppendLiteral(DLIM_ESCAPE_END); 409 offsets.AppendElements(strlen(DLIM_ESCAPE_END)); 410 } else { 411 escapedText.Append(aText[i]); 412 offsets.AppendElement(i); 413 } 414 } 415 416 RefPtr<SpeechTaskCallback> callback = 417 new SpeechTaskCallback(aTask, synth, offsets); 418 nsresult rv = aTask->Setup(callback); 419 NS_ENSURE_SUCCESS(rv, rv); 420 421 NSString* text = nsCocoaUtils::ToNSString(escapedText); 422 BOOL success = [synth startSpeakingString:text]; 423 NS_ENSURE_TRUE(success, NS_ERROR_FAILURE); 424 425 aTask->DispatchStart(); 426 return NS_OK; 427 428 NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE); 429 } 430 431 NS_IMETHODIMP 432 OSXSpeechSynthesizerService::Observe(nsISupports* aSubject, const char* aTopic, 433 const char16_t* aData) { 434 return NS_OK; 435 } 436 437 OSXSpeechSynthesizerService* OSXSpeechSynthesizerService::GetInstance() { 438 MOZ_ASSERT(NS_IsMainThread()); 439 if (XRE_GetProcessType() != GeckoProcessType_Default) { 440 return nullptr; 441 } 442 443 if (!sSingleton) { 444 RefPtr<OSXSpeechSynthesizerService> speechService = 445 new OSXSpeechSynthesizerService(); 446 if (speechService->Init()) { 447 sSingleton = speechService; 448 ClearOnShutdown(&sSingleton); 449 } 450 } 451 return sSingleton; 452 } 453 454 already_AddRefed<OSXSpeechSynthesizerService> 455 OSXSpeechSynthesizerService::GetInstanceForService() { 456 RefPtr<OSXSpeechSynthesizerService> speechService = GetInstance(); 457 return speechService.forget(); 458 } 459 460 } // namespace dom 461 } // namespace mozilla