nsSpeechTask.cpp (10770B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim:set ts=2 sw=2 sts=2 et cindent: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #include "nsSpeechTask.h" 8 9 #include "AudioChannelService.h" 10 #include "AudioSegment.h" 11 #include "SharedBuffer.h" 12 #include "SpeechSynthesis.h" 13 #include "nsGlobalWindowInner.h" 14 #include "nsSynthVoiceRegistry.h" 15 #include "nsXULAppAPI.h" 16 17 #undef LOG 18 extern mozilla::LogModule* GetSpeechSynthLog(); 19 #define LOG(type, msg) MOZ_LOG(GetSpeechSynthLog(), type, msg) 20 21 #define AUDIO_TRACK 1 22 23 namespace mozilla::dom { 24 25 // nsSpeechTask 26 27 NS_IMPL_CYCLE_COLLECTION_WEAK(nsSpeechTask, mSpeechSynthesis, mUtterance, 28 mCallback) 29 30 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsSpeechTask) 31 NS_INTERFACE_MAP_ENTRY(nsISpeechTask) 32 NS_INTERFACE_MAP_ENTRY(nsIAudioChannelAgentCallback) 33 NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference) 34 NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTask) 35 NS_INTERFACE_MAP_END 36 37 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsSpeechTask) 38 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsSpeechTask) 39 40 nsSpeechTask::nsSpeechTask(SpeechSynthesisUtterance* aUtterance, 41 bool aShouldResistFingerprinting) 42 : mUtterance(aUtterance), 43 mInited(false), 44 mPrePaused(false), 45 mPreCanceled(false), 46 mCallback(nullptr), 47 mShouldResistFingerprinting(aShouldResistFingerprinting), 48 mState(STATE_PENDING) { 49 mText = aUtterance->mText; 50 mVolume = aUtterance->Volume(); 51 } 52 53 nsSpeechTask::nsSpeechTask(float aVolume, const nsAString& aText, 54 bool aShouldResistFingerprinting) 55 : mUtterance(nullptr), 56 mVolume(aVolume), 57 mText(aText), 58 mInited(false), 59 mPrePaused(false), 60 mPreCanceled(false), 61 mCallback(nullptr), 62 mShouldResistFingerprinting(aShouldResistFingerprinting), 63 mState(STATE_PENDING) {} 64 65 nsSpeechTask::~nsSpeechTask() { LOG(LogLevel::Debug, ("~nsSpeechTask")); } 66 67 void nsSpeechTask::Init() { mInited = true; } 68 69 void nsSpeechTask::SetChosenVoiceURI(const nsAString& aUri) { 70 mChosenVoiceURI = aUri; 71 } 72 73 NS_IMETHODIMP 74 nsSpeechTask::Setup(nsISpeechTaskCallback* aCallback) { 75 MOZ_ASSERT(XRE_IsParentProcess()); 76 77 LOG(LogLevel::Debug, ("nsSpeechTask::Setup")); 78 79 mCallback = aCallback; 80 81 return NS_OK; 82 } 83 84 NS_IMETHODIMP 85 nsSpeechTask::DispatchStart() { 86 nsSynthVoiceRegistry::GetInstance()->SetIsSpeaking(true); 87 return DispatchStartImpl(); 88 } 89 90 nsresult nsSpeechTask::DispatchStartImpl() { 91 return DispatchStartImpl(mChosenVoiceURI); 92 } 93 94 nsresult nsSpeechTask::DispatchStartImpl(const nsAString& aUri) { 95 LOG(LogLevel::Debug, ("nsSpeechTask::DispatchStartImpl")); 96 97 MOZ_ASSERT(mUtterance); 98 if (NS_WARN_IF(mState != STATE_PENDING)) { 99 return NS_ERROR_NOT_AVAILABLE; 100 } 101 102 CreateAudioChannelAgent(); 103 104 mState = STATE_SPEAKING; 105 mUtterance->mChosenVoiceURI = aUri; 106 mUtterance->DispatchSpeechSynthesisEvent(u"start"_ns, 0, nullptr, 0, u""_ns); 107 108 return NS_OK; 109 } 110 111 NS_IMETHODIMP 112 nsSpeechTask::DispatchEnd(float aElapsedTime, uint32_t aCharIndex) { 113 // After we end, no callback functions should go through. 114 mCallback = nullptr; 115 116 if (!mPreCanceled) { 117 nsSynthVoiceRegistry::GetInstance()->SpeakNext(); 118 } 119 120 return DispatchEndImpl(aElapsedTime, aCharIndex); 121 } 122 123 nsresult nsSpeechTask::DispatchEndImpl(float aElapsedTime, 124 uint32_t aCharIndex) { 125 LOG(LogLevel::Debug, ("nsSpeechTask::DispatchEndImpl")); 126 127 DestroyAudioChannelAgent(); 128 129 MOZ_ASSERT(mUtterance); 130 if (NS_WARN_IF(mState == STATE_ENDED)) { 131 return NS_ERROR_NOT_AVAILABLE; 132 } 133 134 RefPtr<SpeechSynthesisUtterance> utterance = mUtterance; 135 136 if (mSpeechSynthesis) { 137 mSpeechSynthesis->OnEnd(this); 138 } 139 140 mState = STATE_ENDED; 141 utterance->DispatchSpeechSynthesisEvent(u"end"_ns, aCharIndex, nullptr, 142 aElapsedTime, u""_ns); 143 144 return NS_OK; 145 } 146 147 NS_IMETHODIMP 148 nsSpeechTask::DispatchPause(float aElapsedTime, uint32_t aCharIndex) { 149 return DispatchPauseImpl(aElapsedTime, aCharIndex); 150 } 151 152 nsresult nsSpeechTask::DispatchPauseImpl(float aElapsedTime, 153 uint32_t aCharIndex) { 154 LOG(LogLevel::Debug, ("nsSpeechTask::DispatchPauseImpl")); 155 MOZ_ASSERT(mUtterance); 156 if (NS_WARN_IF(mUtterance->mPaused)) { 157 return NS_ERROR_NOT_AVAILABLE; 158 } 159 if (NS_WARN_IF(mState == STATE_ENDED)) { 160 return NS_ERROR_NOT_AVAILABLE; 161 } 162 163 mUtterance->mPaused = true; 164 if (mState == STATE_SPEAKING) { 165 mUtterance->DispatchSpeechSynthesisEvent(u"pause"_ns, aCharIndex, nullptr, 166 aElapsedTime, u""_ns); 167 } 168 169 return NS_OK; 170 } 171 172 NS_IMETHODIMP 173 nsSpeechTask::DispatchResume(float aElapsedTime, uint32_t aCharIndex) { 174 return DispatchResumeImpl(aElapsedTime, aCharIndex); 175 } 176 177 nsresult nsSpeechTask::DispatchResumeImpl(float aElapsedTime, 178 uint32_t aCharIndex) { 179 LOG(LogLevel::Debug, ("nsSpeechTask::DispatchResumeImpl")); 180 MOZ_ASSERT(mUtterance); 181 if (NS_WARN_IF(!(mUtterance->mPaused))) { 182 return NS_ERROR_NOT_AVAILABLE; 183 } 184 if (NS_WARN_IF(mState == STATE_ENDED)) { 185 return NS_ERROR_NOT_AVAILABLE; 186 } 187 188 mUtterance->mPaused = false; 189 if (mState == STATE_SPEAKING) { 190 mUtterance->DispatchSpeechSynthesisEvent(u"resume"_ns, aCharIndex, nullptr, 191 aElapsedTime, u""_ns); 192 } 193 194 return NS_OK; 195 } 196 197 void nsSpeechTask::ForceError(float aElapsedTime, uint32_t aCharIndex) { 198 DispatchError(aElapsedTime, aCharIndex); 199 } 200 201 NS_IMETHODIMP 202 nsSpeechTask::DispatchError(float aElapsedTime, uint32_t aCharIndex) { 203 if (!mPreCanceled) { 204 nsSynthVoiceRegistry::GetInstance()->SpeakNext(); 205 } 206 207 return DispatchErrorImpl(aElapsedTime, aCharIndex); 208 } 209 210 nsresult nsSpeechTask::DispatchErrorImpl(float aElapsedTime, 211 uint32_t aCharIndex) { 212 LOG(LogLevel::Debug, ("nsSpeechTask::DispatchErrorImpl")); 213 214 DestroyAudioChannelAgent(); 215 216 MOZ_ASSERT(mUtterance); 217 if (NS_WARN_IF(mState == STATE_ENDED)) { 218 return NS_ERROR_NOT_AVAILABLE; 219 } 220 221 if (mSpeechSynthesis) { 222 mSpeechSynthesis->OnEnd(this); 223 } 224 225 mState = STATE_ENDED; 226 mUtterance->DispatchSpeechSynthesisEvent(u"error"_ns, aCharIndex, nullptr, 227 aElapsedTime, u""_ns); 228 return NS_OK; 229 } 230 231 NS_IMETHODIMP 232 nsSpeechTask::DispatchBoundary(const nsAString& aName, float aElapsedTime, 233 uint32_t aCharIndex, uint32_t aCharLength, 234 uint8_t argc) { 235 return DispatchBoundaryImpl(aName, aElapsedTime, aCharIndex, aCharLength, 236 argc); 237 } 238 239 nsresult nsSpeechTask::DispatchBoundaryImpl(const nsAString& aName, 240 float aElapsedTime, 241 uint32_t aCharIndex, 242 uint32_t aCharLength, 243 uint8_t argc) { 244 MOZ_ASSERT(mUtterance); 245 if (NS_WARN_IF(mState != STATE_SPEAKING)) { 246 return NS_ERROR_NOT_AVAILABLE; 247 } 248 mUtterance->DispatchSpeechSynthesisEvent( 249 u"boundary"_ns, aCharIndex, 250 argc ? static_cast<Nullable<uint32_t> >(aCharLength) : nullptr, 251 aElapsedTime, aName); 252 253 return NS_OK; 254 } 255 256 NS_IMETHODIMP 257 nsSpeechTask::DispatchMark(const nsAString& aName, float aElapsedTime, 258 uint32_t aCharIndex) { 259 return DispatchMarkImpl(aName, aElapsedTime, aCharIndex); 260 } 261 262 nsresult nsSpeechTask::DispatchMarkImpl(const nsAString& aName, 263 float aElapsedTime, 264 uint32_t aCharIndex) { 265 MOZ_ASSERT(mUtterance); 266 if (NS_WARN_IF(mState != STATE_SPEAKING)) { 267 return NS_ERROR_NOT_AVAILABLE; 268 } 269 mUtterance->DispatchSpeechSynthesisEvent(u"mark"_ns, aCharIndex, nullptr, 270 aElapsedTime, aName); 271 return NS_OK; 272 } 273 274 void nsSpeechTask::Pause() { 275 MOZ_ASSERT(XRE_IsParentProcess()); 276 277 if (mCallback) { 278 DebugOnly<nsresult> rv = mCallback->OnPause(); 279 NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Unable to call onPause() callback"); 280 } 281 282 if (!mInited) { 283 mPrePaused = true; 284 } 285 } 286 287 void nsSpeechTask::Resume() { 288 MOZ_ASSERT(XRE_IsParentProcess()); 289 290 if (mCallback) { 291 DebugOnly<nsresult> rv = mCallback->OnResume(); 292 NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), 293 "Unable to call onResume() callback"); 294 } 295 296 if (mPrePaused) { 297 mPrePaused = false; 298 nsSynthVoiceRegistry::GetInstance()->ResumeQueue(); 299 } 300 } 301 302 void nsSpeechTask::Cancel() { 303 MOZ_ASSERT(XRE_IsParentProcess()); 304 305 LOG(LogLevel::Debug, ("nsSpeechTask::Cancel")); 306 307 if (mCallback) { 308 DebugOnly<nsresult> rv = mCallback->OnCancel(); 309 NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), 310 "Unable to call onCancel() callback"); 311 } 312 313 if (!mInited) { 314 mPreCanceled = true; 315 } 316 } 317 318 void nsSpeechTask::ForceEnd() { 319 if (!mInited) { 320 mPreCanceled = true; 321 } 322 323 DispatchEnd(0, 0); 324 } 325 326 void nsSpeechTask::SetSpeechSynthesis(SpeechSynthesis* aSpeechSynthesis) { 327 mSpeechSynthesis = aSpeechSynthesis; 328 } 329 330 void nsSpeechTask::CreateAudioChannelAgent() { 331 if (!mUtterance) { 332 return; 333 } 334 335 if (mAudioChannelAgent) { 336 mAudioChannelAgent->NotifyStoppedPlaying(); 337 } 338 339 mAudioChannelAgent = new AudioChannelAgent(); 340 mAudioChannelAgent->InitWithWeakCallback(mUtterance->GetOwnerWindow(), this); 341 342 nsresult rv = mAudioChannelAgent->NotifyStartedPlaying( 343 AudioChannelService::AudibleState::eAudible); 344 if (NS_WARN_IF(NS_FAILED(rv))) { 345 return; 346 } 347 348 mAudioChannelAgent->PullInitialUpdate(); 349 } 350 351 void nsSpeechTask::DestroyAudioChannelAgent() { 352 if (mAudioChannelAgent) { 353 mAudioChannelAgent->NotifyStoppedPlaying(); 354 mAudioChannelAgent = nullptr; 355 } 356 } 357 358 NS_IMETHODIMP 359 nsSpeechTask::WindowVolumeChanged(float aVolume, bool aMuted) { 360 SetAudioOutputVolume(aMuted ? 0.0 : mVolume * aVolume); 361 return NS_OK; 362 } 363 364 NS_IMETHODIMP 365 nsSpeechTask::WindowSuspendChanged(nsSuspendedTypes aSuspend) { 366 if (!mUtterance) { 367 return NS_OK; 368 } 369 370 if (aSuspend == nsISuspendedTypes::NONE_SUSPENDED && mUtterance->mPaused) { 371 Resume(); 372 } else if (aSuspend != nsISuspendedTypes::NONE_SUSPENDED && 373 !mUtterance->mPaused) { 374 Pause(); 375 } 376 return NS_OK; 377 } 378 379 NS_IMETHODIMP 380 nsSpeechTask::WindowAudioCaptureChanged(bool aCapture) { 381 // This is not supported yet. 382 return NS_OK; 383 } 384 385 void nsSpeechTask::SetAudioOutputVolume(float aVolume) { 386 if (mCallback) { 387 mCallback->OnVolumeChanged(aVolume); 388 } 389 } 390 391 } // namespace mozilla::dom