SpeechSynthesis.cpp (9854B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim:set ts=2 sw=2 sts=2 et cindent: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #include "SpeechSynthesis.h" 8 9 #include "mozilla/Logging.h" 10 #include "mozilla/dom/Document.h" 11 #include "mozilla/dom/Element.h" 12 #include "mozilla/dom/SpeechSynthesisBinding.h" 13 #include "mozilla/dom/WindowGlobalChild.h" 14 #include "nsContentUtils.h" 15 #include "nsGlobalWindowInner.h" 16 #include "nsIDocShell.h" 17 #include "nsISupportsPrimitives.h" 18 #include "nsSpeechTask.h" 19 #include "nsSynthVoiceRegistry.h" 20 21 #undef LOG 22 mozilla::LogModule* GetSpeechSynthLog() { 23 static mozilla::LazyLogModule sLog("SpeechSynthesis"); 24 25 return sLog; 26 } 27 #define LOG(type, msg) MOZ_LOG(GetSpeechSynthLog(), type, msg) 28 29 namespace mozilla::dom { 30 31 NS_IMPL_CYCLE_COLLECTION_CLASS(SpeechSynthesis) 32 33 NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN_INHERITED(SpeechSynthesis, 34 DOMEventTargetHelper) 35 NS_IMPL_CYCLE_COLLECTION_UNLINK(mCurrentTask) 36 NS_IMPL_CYCLE_COLLECTION_UNLINK(mSpeechQueue) 37 tmp->mVoiceCache.Clear(); 38 NS_IMPL_CYCLE_COLLECTION_UNLINK_WEAK_REFERENCE 39 NS_IMPL_CYCLE_COLLECTION_UNLINK_END 40 41 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN_INHERITED(SpeechSynthesis, 42 DOMEventTargetHelper) 43 NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mCurrentTask) 44 NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mSpeechQueue) 45 for (SpeechSynthesisVoice* voice : tmp->mVoiceCache.Values()) { 46 cb.NoteXPCOMChild(voice); 47 } 48 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END 49 50 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechSynthesis) 51 NS_INTERFACE_MAP_ENTRY(nsIObserver) 52 NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference) 53 NS_INTERFACE_MAP_END_INHERITING(DOMEventTargetHelper) 54 55 NS_IMPL_ADDREF_INHERITED(SpeechSynthesis, DOMEventTargetHelper) 56 NS_IMPL_RELEASE_INHERITED(SpeechSynthesis, DOMEventTargetHelper) 57 58 SpeechSynthesis::SpeechSynthesis(nsPIDOMWindowInner* aParent) 59 : DOMEventTargetHelper(aParent), 60 mHoldQueue(false), 61 mInnerID(aParent->WindowID()) { 62 MOZ_ASSERT(NS_IsMainThread()); 63 64 nsCOMPtr<nsIObserverService> obs = mozilla::services::GetObserverService(); 65 if (obs) { 66 obs->AddObserver(this, "inner-window-destroyed", true); 67 obs->AddObserver(this, "synth-voices-changed", true); 68 obs->AddObserver(this, "synth-voices-error", true); 69 } 70 } 71 72 SpeechSynthesis::~SpeechSynthesis() = default; 73 74 JSObject* SpeechSynthesis::WrapObject(JSContext* aCx, 75 JS::Handle<JSObject*> aGivenProto) { 76 return SpeechSynthesis_Binding::Wrap(aCx, this, aGivenProto); 77 } 78 79 bool SpeechSynthesis::Pending() const { 80 // If we don't have any task, nothing is pending. If we have only one task, 81 // check if that task is currently pending. If we have more than one task, 82 // then the tasks after the first one are definitely pending. 83 return mSpeechQueue.Length() > 1 || 84 (mSpeechQueue.Length() == 1 && 85 (!mCurrentTask || mCurrentTask->IsPending())); 86 } 87 88 bool SpeechSynthesis::Speaking() const { 89 // Check global speaking state if there is no active speaking task. 90 return (!mSpeechQueue.IsEmpty() && HasSpeakingTask()) || 91 nsSynthVoiceRegistry::GetInstance()->IsSpeaking(); 92 } 93 94 bool SpeechSynthesis::Paused() const { 95 return mHoldQueue || (mCurrentTask && mCurrentTask->IsPrePaused()) || 96 (!mSpeechQueue.IsEmpty() && mSpeechQueue.ElementAt(0)->IsPaused()); 97 } 98 99 bool SpeechSynthesis::HasEmptyQueue() const { 100 return mSpeechQueue.Length() == 0; 101 } 102 103 bool SpeechSynthesis::HasVoices() const { 104 uint32_t voiceCount = mVoiceCache.Count(); 105 if (voiceCount == 0) { 106 nsresult rv = 107 nsSynthVoiceRegistry::GetInstance()->GetVoiceCount(&voiceCount); 108 if (NS_WARN_IF(NS_FAILED(rv))) { 109 return false; 110 } 111 } 112 113 return voiceCount != 0; 114 } 115 116 void SpeechSynthesis::Speak(SpeechSynthesisUtterance& aUtterance) { 117 if (!mInnerID) { 118 return; 119 } 120 121 mSpeechQueue.AppendElement(&aUtterance); 122 123 if (mSpeechQueue.Length() == 1) { 124 RefPtr<WindowGlobalChild> wgc = 125 WindowGlobalChild::GetByInnerWindowId(mInnerID); 126 if (wgc) { 127 wgc->BlockBFCacheFor(BFCacheStatus::HAS_ACTIVE_SPEECH_SYNTHESIS); 128 } 129 130 // If we only have one item in the queue, we aren't pre-paused, and 131 // we have voices available, speak it. 132 if (!mCurrentTask && !mHoldQueue && HasVoices()) { 133 AdvanceQueue(); 134 } 135 } 136 } 137 138 void SpeechSynthesis::AdvanceQueue() { 139 LOG(LogLevel::Debug, 140 ("SpeechSynthesis::AdvanceQueue length=%zu", mSpeechQueue.Length())); 141 142 if (mSpeechQueue.IsEmpty()) { 143 return; 144 } 145 146 RefPtr<SpeechSynthesisUtterance> utterance = mSpeechQueue.ElementAt(0); 147 148 nsAutoString docLang; 149 nsCOMPtr<nsPIDOMWindowInner> window = GetOwnerWindow(); 150 if (Document* doc = window ? window->GetExtantDoc() : nullptr) { 151 if (Element* elm = doc->GetHtmlElement()) { 152 elm->GetLang(docLang); 153 } 154 } 155 156 mCurrentTask = 157 nsSynthVoiceRegistry::GetInstance()->SpeakUtterance(*utterance, docLang); 158 159 if (mCurrentTask) { 160 mCurrentTask->SetSpeechSynthesis(this); 161 } 162 } 163 164 void SpeechSynthesis::Cancel() { 165 if (!mSpeechQueue.IsEmpty() && HasSpeakingTask()) { 166 // Remove all queued utterances except for current one, we will remove it 167 // in OnEnd 168 mSpeechQueue.RemoveLastElements(mSpeechQueue.Length() - 1); 169 } else { 170 mSpeechQueue.Clear(); 171 } 172 173 if (mCurrentTask) { 174 mCurrentTask->Cancel(); 175 } 176 } 177 178 void SpeechSynthesis::Pause() { 179 if (Paused()) { 180 return; 181 } 182 183 if (!mSpeechQueue.IsEmpty() && HasSpeakingTask()) { 184 mCurrentTask->Pause(); 185 } else { 186 mHoldQueue = true; 187 } 188 } 189 190 void SpeechSynthesis::Resume() { 191 if (!Paused()) { 192 return; 193 } 194 195 mHoldQueue = false; 196 197 if (mCurrentTask) { 198 mCurrentTask->Resume(); 199 } else { 200 AdvanceQueue(); 201 } 202 } 203 204 void SpeechSynthesis::OnEnd(const nsSpeechTask* aTask) { 205 MOZ_ASSERT(mCurrentTask == aTask); 206 207 if (!mSpeechQueue.IsEmpty()) { 208 mSpeechQueue.RemoveElementAt(0); 209 if (mSpeechQueue.IsEmpty()) { 210 RefPtr<WindowGlobalChild> wgc = 211 WindowGlobalChild::GetByInnerWindowId(mInnerID); 212 if (wgc) { 213 wgc->UnblockBFCacheFor(BFCacheStatus::HAS_ACTIVE_SPEECH_SYNTHESIS); 214 } 215 } 216 } 217 218 mCurrentTask = nullptr; 219 AdvanceQueue(); 220 } 221 222 void SpeechSynthesis::GetVoices( 223 nsTArray<RefPtr<SpeechSynthesisVoice> >& aResult) { 224 aResult.Clear(); 225 uint32_t voiceCount = 0; 226 nsCOMPtr<nsPIDOMWindowInner> window = GetOwnerWindow(); 227 nsCOMPtr<nsIDocShell> docShell = window ? window->GetDocShell() : nullptr; 228 229 if (nsContentUtils::ShouldResistFingerprinting(docShell, 230 RFPTarget::SpeechSynthesis)) { 231 return; 232 } 233 234 nsresult rv = nsSynthVoiceRegistry::GetInstance()->GetVoiceCount(&voiceCount); 235 if (NS_WARN_IF(NS_FAILED(rv))) { 236 return; 237 } 238 239 nsISupports* voiceParent = NS_ISUPPORTS_CAST(nsIObserver*, this); 240 241 for (uint32_t i = 0; i < voiceCount; i++) { 242 nsAutoString uri; 243 rv = nsSynthVoiceRegistry::GetInstance()->GetVoice(i, uri); 244 245 if (NS_FAILED(rv)) { 246 NS_WARNING("Failed to retrieve voice from registry"); 247 continue; 248 } 249 250 SpeechSynthesisVoice* voice = mVoiceCache.GetWeak(uri); 251 252 if (!voice) { 253 voice = new SpeechSynthesisVoice(voiceParent, uri); 254 } 255 256 aResult.AppendElement(voice); 257 } 258 259 mVoiceCache.Clear(); 260 261 for (uint32_t i = 0; i < aResult.Length(); i++) { 262 SpeechSynthesisVoice* voice = aResult[i]; 263 mVoiceCache.InsertOrUpdate(voice->mUri, RefPtr{voice}); 264 } 265 } 266 267 // For testing purposes, allows us to cancel the current task that is 268 // misbehaving, and flush the queue. 269 void SpeechSynthesis::ForceEnd() { 270 if (mCurrentTask) { 271 mCurrentTask->ForceEnd(); 272 } 273 } 274 275 NS_IMETHODIMP 276 SpeechSynthesis::Observe(nsISupports* aSubject, const char* aTopic, 277 const char16_t* aData) { 278 MOZ_ASSERT(NS_IsMainThread()); 279 280 if (strcmp(aTopic, "inner-window-destroyed") == 0) { 281 nsCOMPtr<nsISupportsPRUint64> wrapper = do_QueryInterface(aSubject); 282 NS_ENSURE_TRUE(wrapper, NS_ERROR_FAILURE); 283 284 uint64_t innerID; 285 nsresult rv = wrapper->GetData(&innerID); 286 NS_ENSURE_SUCCESS(rv, rv); 287 288 if (innerID == mInnerID) { 289 mInnerID = 0; 290 Cancel(); 291 292 nsCOMPtr<nsIObserverService> obs = 293 mozilla::services::GetObserverService(); 294 if (obs) { 295 obs->RemoveObserver(this, "inner-window-destroyed"); 296 } 297 } 298 } else if (strcmp(aTopic, "synth-voices-changed") == 0) { 299 LOG(LogLevel::Debug, ("SpeechSynthesis::onvoiceschanged")); 300 nsCOMPtr<nsPIDOMWindowInner> window = GetOwnerWindow(); 301 nsCOMPtr<nsIDocShell> docShell = window ? window->GetDocShell() : nullptr; 302 303 if (!nsContentUtils::ShouldResistFingerprinting( 304 docShell, RFPTarget::SpeechSynthesis)) { 305 DispatchTrustedEvent(u"voiceschanged"_ns); 306 // If we have a pending item, and voices become available, speak it. 307 if (!mCurrentTask && !mHoldQueue && HasVoices()) { 308 AdvanceQueue(); 309 } 310 } 311 } else if (strcmp(aTopic, "synth-voices-error") == 0) { 312 NS_WARNING("SpeechSynthesis::Observe: synth-voices-error"); 313 LOG(LogLevel::Debug, ("SpeechSynthesis::onvoiceserror")); 314 nsCOMPtr<nsPIDOMWindowInner> window = GetOwnerWindow(); 315 316 nsCOMPtr<nsIObserverService> obs = services::GetObserverService(); 317 if (obs) { 318 obs->NotifyObservers(window, "chrome-synth-voices-error", aData); 319 } 320 321 if (!mSpeechQueue.IsEmpty()) { 322 for (RefPtr<SpeechSynthesisUtterance>& utterance : mSpeechQueue) { 323 utterance->DispatchSpeechSynthesisEvent(u"error"_ns, 0, nullptr, 0, 324 u""_ns); 325 } 326 mSpeechQueue.Clear(); 327 } 328 } 329 330 return NS_OK; 331 } 332 333 } // namespace mozilla::dom