tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

nsSpeechTask.cpp (10770B)


      1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
      3 /* This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #include "nsSpeechTask.h"
      8 
      9 #include "AudioChannelService.h"
     10 #include "AudioSegment.h"
     11 #include "SharedBuffer.h"
     12 #include "SpeechSynthesis.h"
     13 #include "nsGlobalWindowInner.h"
     14 #include "nsSynthVoiceRegistry.h"
     15 #include "nsXULAppAPI.h"
     16 
     17 #undef LOG
     18 extern mozilla::LogModule* GetSpeechSynthLog();
     19 #define LOG(type, msg) MOZ_LOG(GetSpeechSynthLog(), type, msg)
     20 
     21 #define AUDIO_TRACK 1
     22 
     23 namespace mozilla::dom {
     24 
     25 // nsSpeechTask
     26 
     27 NS_IMPL_CYCLE_COLLECTION_WEAK(nsSpeechTask, mSpeechSynthesis, mUtterance,
     28                              mCallback)
     29 
     30 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsSpeechTask)
     31  NS_INTERFACE_MAP_ENTRY(nsISpeechTask)
     32  NS_INTERFACE_MAP_ENTRY(nsIAudioChannelAgentCallback)
     33  NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference)
     34  NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTask)
     35 NS_INTERFACE_MAP_END
     36 
     37 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsSpeechTask)
     38 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsSpeechTask)
     39 
     40 nsSpeechTask::nsSpeechTask(SpeechSynthesisUtterance* aUtterance,
     41                           bool aShouldResistFingerprinting)
     42    : mUtterance(aUtterance),
     43      mInited(false),
     44      mPrePaused(false),
     45      mPreCanceled(false),
     46      mCallback(nullptr),
     47      mShouldResistFingerprinting(aShouldResistFingerprinting),
     48      mState(STATE_PENDING) {
     49  mText = aUtterance->mText;
     50  mVolume = aUtterance->Volume();
     51 }
     52 
     53 nsSpeechTask::nsSpeechTask(float aVolume, const nsAString& aText,
     54                           bool aShouldResistFingerprinting)
     55    : mUtterance(nullptr),
     56      mVolume(aVolume),
     57      mText(aText),
     58      mInited(false),
     59      mPrePaused(false),
     60      mPreCanceled(false),
     61      mCallback(nullptr),
     62      mShouldResistFingerprinting(aShouldResistFingerprinting),
     63      mState(STATE_PENDING) {}
     64 
     65 nsSpeechTask::~nsSpeechTask() { LOG(LogLevel::Debug, ("~nsSpeechTask")); }
     66 
     67 void nsSpeechTask::Init() { mInited = true; }
     68 
     69 void nsSpeechTask::SetChosenVoiceURI(const nsAString& aUri) {
     70  mChosenVoiceURI = aUri;
     71 }
     72 
     73 NS_IMETHODIMP
     74 nsSpeechTask::Setup(nsISpeechTaskCallback* aCallback) {
     75  MOZ_ASSERT(XRE_IsParentProcess());
     76 
     77  LOG(LogLevel::Debug, ("nsSpeechTask::Setup"));
     78 
     79  mCallback = aCallback;
     80 
     81  return NS_OK;
     82 }
     83 
     84 NS_IMETHODIMP
     85 nsSpeechTask::DispatchStart() {
     86  nsSynthVoiceRegistry::GetInstance()->SetIsSpeaking(true);
     87  return DispatchStartImpl();
     88 }
     89 
     90 nsresult nsSpeechTask::DispatchStartImpl() {
     91  return DispatchStartImpl(mChosenVoiceURI);
     92 }
     93 
     94 nsresult nsSpeechTask::DispatchStartImpl(const nsAString& aUri) {
     95  LOG(LogLevel::Debug, ("nsSpeechTask::DispatchStartImpl"));
     96 
     97  MOZ_ASSERT(mUtterance);
     98  if (NS_WARN_IF(mState != STATE_PENDING)) {
     99    return NS_ERROR_NOT_AVAILABLE;
    100  }
    101 
    102  CreateAudioChannelAgent();
    103 
    104  mState = STATE_SPEAKING;
    105  mUtterance->mChosenVoiceURI = aUri;
    106  mUtterance->DispatchSpeechSynthesisEvent(u"start"_ns, 0, nullptr, 0, u""_ns);
    107 
    108  return NS_OK;
    109 }
    110 
    111 NS_IMETHODIMP
    112 nsSpeechTask::DispatchEnd(float aElapsedTime, uint32_t aCharIndex) {
    113  // After we end, no callback functions should go through.
    114  mCallback = nullptr;
    115 
    116  if (!mPreCanceled) {
    117    nsSynthVoiceRegistry::GetInstance()->SpeakNext();
    118  }
    119 
    120  return DispatchEndImpl(aElapsedTime, aCharIndex);
    121 }
    122 
    123 nsresult nsSpeechTask::DispatchEndImpl(float aElapsedTime,
    124                                       uint32_t aCharIndex) {
    125  LOG(LogLevel::Debug, ("nsSpeechTask::DispatchEndImpl"));
    126 
    127  DestroyAudioChannelAgent();
    128 
    129  MOZ_ASSERT(mUtterance);
    130  if (NS_WARN_IF(mState == STATE_ENDED)) {
    131    return NS_ERROR_NOT_AVAILABLE;
    132  }
    133 
    134  RefPtr<SpeechSynthesisUtterance> utterance = mUtterance;
    135 
    136  if (mSpeechSynthesis) {
    137    mSpeechSynthesis->OnEnd(this);
    138  }
    139 
    140  mState = STATE_ENDED;
    141  utterance->DispatchSpeechSynthesisEvent(u"end"_ns, aCharIndex, nullptr,
    142                                          aElapsedTime, u""_ns);
    143 
    144  return NS_OK;
    145 }
    146 
    147 NS_IMETHODIMP
    148 nsSpeechTask::DispatchPause(float aElapsedTime, uint32_t aCharIndex) {
    149  return DispatchPauseImpl(aElapsedTime, aCharIndex);
    150 }
    151 
    152 nsresult nsSpeechTask::DispatchPauseImpl(float aElapsedTime,
    153                                         uint32_t aCharIndex) {
    154  LOG(LogLevel::Debug, ("nsSpeechTask::DispatchPauseImpl"));
    155  MOZ_ASSERT(mUtterance);
    156  if (NS_WARN_IF(mUtterance->mPaused)) {
    157    return NS_ERROR_NOT_AVAILABLE;
    158  }
    159  if (NS_WARN_IF(mState == STATE_ENDED)) {
    160    return NS_ERROR_NOT_AVAILABLE;
    161  }
    162 
    163  mUtterance->mPaused = true;
    164  if (mState == STATE_SPEAKING) {
    165    mUtterance->DispatchSpeechSynthesisEvent(u"pause"_ns, aCharIndex, nullptr,
    166                                             aElapsedTime, u""_ns);
    167  }
    168 
    169  return NS_OK;
    170 }
    171 
    172 NS_IMETHODIMP
    173 nsSpeechTask::DispatchResume(float aElapsedTime, uint32_t aCharIndex) {
    174  return DispatchResumeImpl(aElapsedTime, aCharIndex);
    175 }
    176 
    177 nsresult nsSpeechTask::DispatchResumeImpl(float aElapsedTime,
    178                                          uint32_t aCharIndex) {
    179  LOG(LogLevel::Debug, ("nsSpeechTask::DispatchResumeImpl"));
    180  MOZ_ASSERT(mUtterance);
    181  if (NS_WARN_IF(!(mUtterance->mPaused))) {
    182    return NS_ERROR_NOT_AVAILABLE;
    183  }
    184  if (NS_WARN_IF(mState == STATE_ENDED)) {
    185    return NS_ERROR_NOT_AVAILABLE;
    186  }
    187 
    188  mUtterance->mPaused = false;
    189  if (mState == STATE_SPEAKING) {
    190    mUtterance->DispatchSpeechSynthesisEvent(u"resume"_ns, aCharIndex, nullptr,
    191                                             aElapsedTime, u""_ns);
    192  }
    193 
    194  return NS_OK;
    195 }
    196 
    197 void nsSpeechTask::ForceError(float aElapsedTime, uint32_t aCharIndex) {
    198  DispatchError(aElapsedTime, aCharIndex);
    199 }
    200 
    201 NS_IMETHODIMP
    202 nsSpeechTask::DispatchError(float aElapsedTime, uint32_t aCharIndex) {
    203  if (!mPreCanceled) {
    204    nsSynthVoiceRegistry::GetInstance()->SpeakNext();
    205  }
    206 
    207  return DispatchErrorImpl(aElapsedTime, aCharIndex);
    208 }
    209 
    210 nsresult nsSpeechTask::DispatchErrorImpl(float aElapsedTime,
    211                                         uint32_t aCharIndex) {
    212  LOG(LogLevel::Debug, ("nsSpeechTask::DispatchErrorImpl"));
    213 
    214  DestroyAudioChannelAgent();
    215 
    216  MOZ_ASSERT(mUtterance);
    217  if (NS_WARN_IF(mState == STATE_ENDED)) {
    218    return NS_ERROR_NOT_AVAILABLE;
    219  }
    220 
    221  if (mSpeechSynthesis) {
    222    mSpeechSynthesis->OnEnd(this);
    223  }
    224 
    225  mState = STATE_ENDED;
    226  mUtterance->DispatchSpeechSynthesisEvent(u"error"_ns, aCharIndex, nullptr,
    227                                           aElapsedTime, u""_ns);
    228  return NS_OK;
    229 }
    230 
    231 NS_IMETHODIMP
    232 nsSpeechTask::DispatchBoundary(const nsAString& aName, float aElapsedTime,
    233                               uint32_t aCharIndex, uint32_t aCharLength,
    234                               uint8_t argc) {
    235  return DispatchBoundaryImpl(aName, aElapsedTime, aCharIndex, aCharLength,
    236                              argc);
    237 }
    238 
    239 nsresult nsSpeechTask::DispatchBoundaryImpl(const nsAString& aName,
    240                                            float aElapsedTime,
    241                                            uint32_t aCharIndex,
    242                                            uint32_t aCharLength,
    243                                            uint8_t argc) {
    244  MOZ_ASSERT(mUtterance);
    245  if (NS_WARN_IF(mState != STATE_SPEAKING)) {
    246    return NS_ERROR_NOT_AVAILABLE;
    247  }
    248  mUtterance->DispatchSpeechSynthesisEvent(
    249      u"boundary"_ns, aCharIndex,
    250      argc ? static_cast<Nullable<uint32_t> >(aCharLength) : nullptr,
    251      aElapsedTime, aName);
    252 
    253  return NS_OK;
    254 }
    255 
    256 NS_IMETHODIMP
    257 nsSpeechTask::DispatchMark(const nsAString& aName, float aElapsedTime,
    258                           uint32_t aCharIndex) {
    259  return DispatchMarkImpl(aName, aElapsedTime, aCharIndex);
    260 }
    261 
    262 nsresult nsSpeechTask::DispatchMarkImpl(const nsAString& aName,
    263                                        float aElapsedTime,
    264                                        uint32_t aCharIndex) {
    265  MOZ_ASSERT(mUtterance);
    266  if (NS_WARN_IF(mState != STATE_SPEAKING)) {
    267    return NS_ERROR_NOT_AVAILABLE;
    268  }
    269  mUtterance->DispatchSpeechSynthesisEvent(u"mark"_ns, aCharIndex, nullptr,
    270                                           aElapsedTime, aName);
    271  return NS_OK;
    272 }
    273 
    274 void nsSpeechTask::Pause() {
    275  MOZ_ASSERT(XRE_IsParentProcess());
    276 
    277  if (mCallback) {
    278    DebugOnly<nsresult> rv = mCallback->OnPause();
    279    NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Unable to call onPause() callback");
    280  }
    281 
    282  if (!mInited) {
    283    mPrePaused = true;
    284  }
    285 }
    286 
    287 void nsSpeechTask::Resume() {
    288  MOZ_ASSERT(XRE_IsParentProcess());
    289 
    290  if (mCallback) {
    291    DebugOnly<nsresult> rv = mCallback->OnResume();
    292    NS_WARNING_ASSERTION(NS_SUCCEEDED(rv),
    293                         "Unable to call onResume() callback");
    294  }
    295 
    296  if (mPrePaused) {
    297    mPrePaused = false;
    298    nsSynthVoiceRegistry::GetInstance()->ResumeQueue();
    299  }
    300 }
    301 
    302 void nsSpeechTask::Cancel() {
    303  MOZ_ASSERT(XRE_IsParentProcess());
    304 
    305  LOG(LogLevel::Debug, ("nsSpeechTask::Cancel"));
    306 
    307  if (mCallback) {
    308    DebugOnly<nsresult> rv = mCallback->OnCancel();
    309    NS_WARNING_ASSERTION(NS_SUCCEEDED(rv),
    310                         "Unable to call onCancel() callback");
    311  }
    312 
    313  if (!mInited) {
    314    mPreCanceled = true;
    315  }
    316 }
    317 
    318 void nsSpeechTask::ForceEnd() {
    319  if (!mInited) {
    320    mPreCanceled = true;
    321  }
    322 
    323  DispatchEnd(0, 0);
    324 }
    325 
    326 void nsSpeechTask::SetSpeechSynthesis(SpeechSynthesis* aSpeechSynthesis) {
    327  mSpeechSynthesis = aSpeechSynthesis;
    328 }
    329 
    330 void nsSpeechTask::CreateAudioChannelAgent() {
    331  if (!mUtterance) {
    332    return;
    333  }
    334 
    335  if (mAudioChannelAgent) {
    336    mAudioChannelAgent->NotifyStoppedPlaying();
    337  }
    338 
    339  mAudioChannelAgent = new AudioChannelAgent();
    340  mAudioChannelAgent->InitWithWeakCallback(mUtterance->GetOwnerWindow(), this);
    341 
    342  nsresult rv = mAudioChannelAgent->NotifyStartedPlaying(
    343      AudioChannelService::AudibleState::eAudible);
    344  if (NS_WARN_IF(NS_FAILED(rv))) {
    345    return;
    346  }
    347 
    348  mAudioChannelAgent->PullInitialUpdate();
    349 }
    350 
    351 void nsSpeechTask::DestroyAudioChannelAgent() {
    352  if (mAudioChannelAgent) {
    353    mAudioChannelAgent->NotifyStoppedPlaying();
    354    mAudioChannelAgent = nullptr;
    355  }
    356 }
    357 
    358 NS_IMETHODIMP
    359 nsSpeechTask::WindowVolumeChanged(float aVolume, bool aMuted) {
    360  SetAudioOutputVolume(aMuted ? 0.0 : mVolume * aVolume);
    361  return NS_OK;
    362 }
    363 
    364 NS_IMETHODIMP
    365 nsSpeechTask::WindowSuspendChanged(nsSuspendedTypes aSuspend) {
    366  if (!mUtterance) {
    367    return NS_OK;
    368  }
    369 
    370  if (aSuspend == nsISuspendedTypes::NONE_SUSPENDED && mUtterance->mPaused) {
    371    Resume();
    372  } else if (aSuspend != nsISuspendedTypes::NONE_SUSPENDED &&
    373             !mUtterance->mPaused) {
    374    Pause();
    375  }
    376  return NS_OK;
    377 }
    378 
    379 NS_IMETHODIMP
    380 nsSpeechTask::WindowAudioCaptureChanged(bool aCapture) {
    381  // This is not supported yet.
    382  return NS_OK;
    383 }
    384 
    385 void nsSpeechTask::SetAudioOutputVolume(float aVolume) {
    386  if (mCallback) {
    387    mCallback->OnVolumeChanged(aVolume);
    388  }
    389 }
    390 
    391 }  // namespace mozilla::dom