tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

SpeechRecognition.cpp (37973B)


      1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
      3 /* This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #include "SpeechRecognition.h"
      8 
      9 #include <algorithm>
     10 
     11 #include "AudioSegment.h"
     12 #include "MediaEnginePrefs.h"
     13 #include "SpeechTrackListener.h"
     14 #include "VideoUtils.h"
     15 #include "endpointer.h"
     16 #include "mozilla/AbstractThread.h"
     17 #include "mozilla/MediaManager.h"
     18 #include "mozilla/Preferences.h"
     19 #include "mozilla/ResultVariant.h"
     20 #include "mozilla/Services.h"
     21 #include "mozilla/StaticPrefs_media.h"
     22 #include "mozilla/dom/AudioStreamTrack.h"
     23 #include "mozilla/dom/BindingUtils.h"
     24 #include "mozilla/dom/Document.h"
     25 #include "mozilla/dom/Element.h"
     26 #include "mozilla/dom/MediaStreamError.h"
     27 #include "mozilla/dom/MediaStreamTrackBinding.h"
     28 #include "mozilla/dom/RootedDictionary.h"
     29 #include "mozilla/dom/SpeechGrammar.h"
     30 #include "mozilla/dom/SpeechRecognitionBinding.h"
     31 #include "mozilla/dom/SpeechRecognitionEvent.h"
     32 #include "nsCOMPtr.h"
     33 #include "nsComponentManagerUtils.h"
     34 #include "nsContentUtils.h"
     35 #include "nsCycleCollectionParticipant.h"
     36 #include "nsGlobalWindowInner.h"
     37 #include "nsIObserverService.h"
     38 #include "nsIPermissionManager.h"
     39 #include "nsIPrincipal.h"
     40 #include "nsPIDOMWindow.h"
     41 #include "nsQueryObject.h"
     42 #include "nsServiceManagerUtils.h"
     43 
     44 // Undo the windows.h damage
     45 #if defined(XP_WIN) && defined(GetMessage)
     46 #  undef GetMessage
     47 #endif
     48 
     49 namespace mozilla::dom {
     50 
     51 #define PREFERENCE_DEFAULT_RECOGNITION_SERVICE "media.webspeech.service.default"
     52 #define DEFAULT_RECOGNITION_SERVICE "online"
     53 
     54 #define PREFERENCE_ENDPOINTER_SILENCE_LENGTH "media.webspeech.silence_length"
     55 #define PREFERENCE_ENDPOINTER_LONG_SILENCE_LENGTH \
     56  "media.webspeech.long_silence_length"
     57 #define PREFERENCE_ENDPOINTER_LONG_SPEECH_LENGTH \
     58  "media.webspeech.long_speech_length"
     59 #define PREFERENCE_SPEECH_DETECTION_TIMEOUT_MS \
     60  "media.webspeech.recognition.timeout"
     61 
     62 static const uint32_t kSAMPLE_RATE = 16000;
     63 
     64 // number of frames corresponding to 300ms of audio to send to endpointer while
     65 // it's in environment estimation mode
     66 // kSAMPLE_RATE frames = 1s, kESTIMATION_FRAMES frames = 300ms
     67 static const uint32_t kESTIMATION_SAMPLES = 300 * kSAMPLE_RATE / 1000;
     68 
     69 LogModule* GetSpeechRecognitionLog() {
     70  static LazyLogModule sLog("SpeechRecognition");
     71  return sLog;
     72 }
     73 #define SR_LOG(...) \
     74  MOZ_LOG(GetSpeechRecognitionLog(), mozilla::LogLevel::Debug, (__VA_ARGS__))
     75 
     76 namespace {
     77 class SpeechRecognitionShutdownBlocker : public media::ShutdownBlocker {
     78 public:
     79  SpeechRecognitionShutdownBlocker(SpeechRecognition* aRecognition,
     80                                   const nsString& aName)
     81      : media::ShutdownBlocker(aName), mRecognition(aRecognition) {}
     82 
     83  NS_IMETHOD BlockShutdown(nsIAsyncShutdownClient*) override {
     84    MOZ_ASSERT(NS_IsMainThread());
     85    // AbortSilently will eventually clear the blocker.
     86    mRecognition->Abort();
     87    return NS_OK;
     88  }
     89 
     90 private:
     91  const RefPtr<SpeechRecognition> mRecognition;
     92 };
     93 
     94 enum class ServiceCreationError {
     95  ServiceNotFound,
     96 };
     97 
     98 Result<nsCOMPtr<nsISpeechRecognitionService>, ServiceCreationError>
     99 CreateSpeechRecognitionService(nsPIDOMWindowInner* aWindow,
    100                               SpeechRecognition* aRecognition,
    101                               const nsAString& aLang) {
    102  nsAutoCString speechRecognitionServiceCID;
    103 
    104  nsAutoCString prefValue;
    105  Preferences::GetCString(PREFERENCE_DEFAULT_RECOGNITION_SERVICE, prefValue);
    106  nsAutoCString speechRecognitionService;
    107 
    108  if (!prefValue.IsEmpty()) {
    109    speechRecognitionService = prefValue;
    110  } else {
    111    speechRecognitionService = DEFAULT_RECOGNITION_SERVICE;
    112  }
    113 
    114  if (StaticPrefs::media_webspeech_test_fake_recognition_service()) {
    115    speechRecognitionServiceCID =
    116        NS_SPEECH_RECOGNITION_SERVICE_CONTRACTID_PREFIX "fake";
    117  } else {
    118    speechRecognitionServiceCID =
    119        nsLiteralCString(NS_SPEECH_RECOGNITION_SERVICE_CONTRACTID_PREFIX) +
    120        speechRecognitionService;
    121  }
    122 
    123  nsresult rv;
    124  nsCOMPtr<nsISpeechRecognitionService> recognitionService;
    125  recognitionService =
    126      do_CreateInstance(speechRecognitionServiceCID.get(), &rv);
    127  if (!recognitionService) {
    128    return Err(ServiceCreationError::ServiceNotFound);
    129  }
    130 
    131  return recognitionService;
    132 }
    133 }  // namespace
    134 
    135 NS_IMPL_CYCLE_COLLECTION_WEAK_PTR_INHERITED(SpeechRecognition,
    136                                            DOMEventTargetHelper, mStream,
    137                                            mTrack, mRecognitionService,
    138                                            mSpeechGrammarList, mListener)
    139 
    140 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechRecognition)
    141  NS_INTERFACE_MAP_ENTRY(nsIObserver)
    142 NS_INTERFACE_MAP_END_INHERITING(DOMEventTargetHelper)
    143 
    144 NS_IMPL_ADDREF_INHERITED(SpeechRecognition, DOMEventTargetHelper)
    145 NS_IMPL_RELEASE_INHERITED(SpeechRecognition, DOMEventTargetHelper)
    146 
    147 NS_IMPL_CYCLE_COLLECTION_INHERITED(SpeechRecognition::TrackListener,
    148                                   DOMMediaStream::TrackListener,
    149                                   mSpeechRecognition)
    150 NS_IMPL_ADDREF_INHERITED(SpeechRecognition::TrackListener,
    151                         DOMMediaStream::TrackListener)
    152 NS_IMPL_RELEASE_INHERITED(SpeechRecognition::TrackListener,
    153                          DOMMediaStream::TrackListener)
    154 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechRecognition::TrackListener)
    155 NS_INTERFACE_MAP_END_INHERITING(DOMMediaStream::TrackListener)
    156 
    157 SpeechRecognition::SpeechRecognition(nsPIDOMWindowInner* aOwnerWindow)
    158    : DOMEventTargetHelper(aOwnerWindow),
    159      mEndpointer(kSAMPLE_RATE),
    160      mAudioSamplesPerChunk(mEndpointer.FrameSize()),
    161      mSpeechDetectionTimer(NS_NewTimer()),
    162      mSpeechGrammarList(new SpeechGrammarList(GetOwnerGlobal())),
    163      mContinuous(false),
    164      mInterimResults(false),
    165      mMaxAlternatives(1) {
    166  SR_LOG("created SpeechRecognition");
    167 
    168  if (StaticPrefs::media_webspeech_test_enable()) {
    169    nsCOMPtr<nsIObserverService> obs = services::GetObserverService();
    170    obs->AddObserver(this, SPEECH_RECOGNITION_TEST_EVENT_REQUEST_TOPIC, false);
    171    obs->AddObserver(this, SPEECH_RECOGNITION_TEST_END_TOPIC, false);
    172  }
    173 
    174  mEndpointer.set_speech_input_complete_silence_length(
    175      Preferences::GetInt(PREFERENCE_ENDPOINTER_SILENCE_LENGTH, 1250000));
    176  mEndpointer.set_long_speech_input_complete_silence_length(
    177      Preferences::GetInt(PREFERENCE_ENDPOINTER_LONG_SILENCE_LENGTH, 2500000));
    178  mEndpointer.set_long_speech_length(
    179      Preferences::GetInt(PREFERENCE_ENDPOINTER_SILENCE_LENGTH, 3 * 1000000));
    180 
    181  mSpeechDetectionTimeoutMs =
    182      Preferences::GetInt(PREFERENCE_SPEECH_DETECTION_TIMEOUT_MS, 10000);
    183 
    184  Reset();
    185 }
    186 
    187 SpeechRecognition::~SpeechRecognition() = default;
    188 
    189 bool SpeechRecognition::StateBetween(FSMState begin, FSMState end) {
    190  return mCurrentState >= begin && mCurrentState <= end;
    191 }
    192 
    193 void SpeechRecognition::SetState(FSMState state) {
    194  mCurrentState = state;
    195  SR_LOG("Transitioned to state %s", GetName(mCurrentState));
    196 }
    197 
    198 JSObject* SpeechRecognition::WrapObject(JSContext* aCx,
    199                                        JS::Handle<JSObject*> aGivenProto) {
    200  return SpeechRecognition_Binding::Wrap(aCx, this, aGivenProto);
    201 }
    202 
    203 already_AddRefed<SpeechRecognition> SpeechRecognition::Constructor(
    204    const GlobalObject& aGlobal, ErrorResult& aRv) {
    205  nsCOMPtr<nsPIDOMWindowInner> win = do_QueryInterface(aGlobal.GetAsSupports());
    206  if (!win) {
    207    aRv.Throw(NS_ERROR_FAILURE);
    208    return nullptr;
    209  }
    210 
    211  RefPtr<SpeechRecognition> object = new SpeechRecognition(win);
    212  return object.forget();
    213 }
    214 
    215 void SpeechRecognition::ProcessEvent(SpeechEvent* aEvent) {
    216  SR_LOG("Processing %s, current state is %s", GetName(aEvent),
    217         GetName(mCurrentState));
    218 
    219  if (mAborted && aEvent->mType != EVENT_ABORT) {
    220    // ignore all events while aborting
    221    return;
    222  }
    223 
    224  Transition(aEvent);
    225 }
    226 
    227 void SpeechRecognition::Transition(SpeechEvent* aEvent) {
    228  switch (mCurrentState) {
    229    case STATE_IDLE:
    230      switch (aEvent->mType) {
    231        case EVENT_START:
    232          // TODO: may want to time out if we wait too long
    233          // for user to approve
    234          WaitForAudioData(aEvent);
    235          break;
    236        case EVENT_STOP:
    237        case EVENT_ABORT:
    238        case EVENT_AUDIO_DATA:
    239        case EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT:
    240        case EVENT_RECOGNITIONSERVICE_FINAL_RESULT:
    241          DoNothing(aEvent);
    242          break;
    243        case EVENT_AUDIO_ERROR:
    244        case EVENT_RECOGNITIONSERVICE_ERROR:
    245          AbortError(aEvent);
    246          break;
    247        default:
    248          MOZ_CRASH("Invalid event");
    249      }
    250      break;
    251    case STATE_STARTING:
    252      switch (aEvent->mType) {
    253        case EVENT_AUDIO_DATA:
    254          StartedAudioCapture(aEvent);
    255          break;
    256        case EVENT_AUDIO_ERROR:
    257        case EVENT_RECOGNITIONSERVICE_ERROR:
    258          AbortError(aEvent);
    259          break;
    260        case EVENT_ABORT:
    261          AbortSilently(aEvent);
    262          break;
    263        case EVENT_STOP:
    264          ResetAndEnd();
    265          break;
    266        case EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT:
    267        case EVENT_RECOGNITIONSERVICE_FINAL_RESULT:
    268          DoNothing(aEvent);
    269          break;
    270        case EVENT_START:
    271          SR_LOG("STATE_STARTING: Unhandled event %s", GetName(aEvent));
    272          MOZ_CRASH();
    273        default:
    274          MOZ_CRASH("Invalid event");
    275      }
    276      break;
    277    case STATE_ESTIMATING:
    278      switch (aEvent->mType) {
    279        case EVENT_AUDIO_DATA:
    280          WaitForEstimation(aEvent);
    281          break;
    282        case EVENT_STOP:
    283          StopRecordingAndRecognize(aEvent);
    284          break;
    285        case EVENT_ABORT:
    286          AbortSilently(aEvent);
    287          break;
    288        case EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT:
    289        case EVENT_RECOGNITIONSERVICE_FINAL_RESULT:
    290        case EVENT_RECOGNITIONSERVICE_ERROR:
    291          DoNothing(aEvent);
    292          break;
    293        case EVENT_AUDIO_ERROR:
    294          AbortError(aEvent);
    295          break;
    296        case EVENT_START:
    297          SR_LOG("STATE_ESTIMATING: Unhandled event %d", aEvent->mType);
    298          MOZ_CRASH();
    299        default:
    300          MOZ_CRASH("Invalid event");
    301      }
    302      break;
    303    case STATE_WAITING_FOR_SPEECH:
    304      switch (aEvent->mType) {
    305        case EVENT_AUDIO_DATA:
    306          DetectSpeech(aEvent);
    307          break;
    308        case EVENT_STOP:
    309          StopRecordingAndRecognize(aEvent);
    310          break;
    311        case EVENT_ABORT:
    312          AbortSilently(aEvent);
    313          break;
    314        case EVENT_AUDIO_ERROR:
    315          AbortError(aEvent);
    316          break;
    317        case EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT:
    318        case EVENT_RECOGNITIONSERVICE_FINAL_RESULT:
    319        case EVENT_RECOGNITIONSERVICE_ERROR:
    320          DoNothing(aEvent);
    321          break;
    322        case EVENT_START:
    323          SR_LOG("STATE_STARTING: Unhandled event %s", GetName(aEvent));
    324          MOZ_CRASH();
    325        default:
    326          MOZ_CRASH("Invalid event");
    327      }
    328      break;
    329    case STATE_RECOGNIZING:
    330      switch (aEvent->mType) {
    331        case EVENT_AUDIO_DATA:
    332          WaitForSpeechEnd(aEvent);
    333          break;
    334        case EVENT_STOP:
    335          StopRecordingAndRecognize(aEvent);
    336          break;
    337        case EVENT_AUDIO_ERROR:
    338        case EVENT_RECOGNITIONSERVICE_ERROR:
    339          AbortError(aEvent);
    340          break;
    341        case EVENT_ABORT:
    342          AbortSilently(aEvent);
    343          break;
    344        case EVENT_RECOGNITIONSERVICE_FINAL_RESULT:
    345        case EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT:
    346          DoNothing(aEvent);
    347          break;
    348        case EVENT_START:
    349          SR_LOG("STATE_RECOGNIZING: Unhandled aEvent %s", GetName(aEvent));
    350          MOZ_CRASH();
    351        default:
    352          MOZ_CRASH("Invalid event");
    353      }
    354      break;
    355    case STATE_WAITING_FOR_RESULT:
    356      switch (aEvent->mType) {
    357        case EVENT_STOP:
    358          DoNothing(aEvent);
    359          break;
    360        case EVENT_AUDIO_ERROR:
    361        case EVENT_RECOGNITIONSERVICE_ERROR:
    362          AbortError(aEvent);
    363          break;
    364        case EVENT_RECOGNITIONSERVICE_FINAL_RESULT:
    365          NotifyFinalResult(aEvent);
    366          break;
    367        case EVENT_AUDIO_DATA:
    368          DoNothing(aEvent);
    369          break;
    370        case EVENT_ABORT:
    371          AbortSilently(aEvent);
    372          break;
    373        case EVENT_START:
    374        case EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT:
    375          SR_LOG("STATE_WAITING_FOR_RESULT: Unhandled aEvent %s",
    376                 GetName(aEvent));
    377          MOZ_CRASH();
    378        default:
    379          MOZ_CRASH("Invalid event");
    380      }
    381      break;
    382    case STATE_ABORTING:
    383      switch (aEvent->mType) {
    384        case EVENT_STOP:
    385        case EVENT_ABORT:
    386        case EVENT_AUDIO_DATA:
    387        case EVENT_AUDIO_ERROR:
    388        case EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT:
    389        case EVENT_RECOGNITIONSERVICE_FINAL_RESULT:
    390        case EVENT_RECOGNITIONSERVICE_ERROR:
    391          DoNothing(aEvent);
    392          break;
    393        case EVENT_START:
    394          SR_LOG("STATE_ABORTING: Unhandled aEvent %s", GetName(aEvent));
    395          MOZ_CRASH();
    396        default:
    397          MOZ_CRASH("Invalid event");
    398      }
    399      break;
    400    default:
    401      MOZ_CRASH("Invalid state");
    402  }
    403 }
    404 
    405 /*
    406 * Handle a segment of recorded audio data.
    407 * Returns the number of samples that were processed.
    408 */
    409 uint32_t SpeechRecognition::ProcessAudioSegment(AudioSegment* aSegment,
    410                                                TrackRate aTrackRate) {
    411  AudioSegment::ChunkIterator iterator(*aSegment);
    412  uint32_t samples = 0;
    413  while (!iterator.IsEnded()) {
    414    float out;
    415    mEndpointer.ProcessAudio(*iterator, &out);
    416    samples += iterator->GetDuration();
    417    iterator.Next();
    418  }
    419 
    420  // we need to call the nsISpeechRecognitionService::ProcessAudioSegment
    421  // in a separate thread so that any eventual encoding or pre-processing
    422  // of the audio does not block the main thread
    423  nsresult rv = mEncodeTaskQueue->Dispatch(NS_NewRunnableFunction(
    424      "nsISpeechRecognitionService::ProcessAudioSegment",
    425      [=, service = mRecognitionService,
    426       segment = std::move(*aSegment)]() mutable {
    427        service->ProcessAudioSegment(&segment, aTrackRate);
    428      }));
    429 
    430  MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv));
    431  (void)rv;
    432  return samples;
    433 }
    434 
    435 /****************************************************************************
    436 * FSM Transition functions
    437 *
    438 * If a transition function may cause a DOM event to be fired,
    439 * it may also be re-entered, since the event handler may cause the
    440 * event loop to spin and new SpeechEvents to be processed.
    441 *
    442 * Rules:
    443 * 1) These methods should call SetState as soon as possible.
    444 * 2) If these methods dispatch DOM events, or call methods that dispatch
    445 * DOM events, that should be done as late as possible.
    446 * 3) If anything must happen after dispatching a DOM event, make sure
    447 * the state is still what the method expected it to be.
    448 ****************************************************************************/
    449 
    450 void SpeechRecognition::Reset() {
    451  SetState(STATE_IDLE);
    452 
    453  // This breaks potential ref-cycles.
    454  mRecognitionService = nullptr;
    455 
    456  ++mStreamGeneration;
    457  if (mStream) {
    458    mStream->UnregisterTrackListener(mListener);
    459    mStream = nullptr;
    460    mListener = nullptr;
    461  }
    462  mTrack = nullptr;
    463  mTrackIsOwned = false;
    464  mStopRecordingPromise = nullptr;
    465  mEncodeTaskQueue = nullptr;
    466  mEstimationSamples = 0;
    467  mBufferedSamples = 0;
    468  mSpeechDetectionTimer->Cancel();
    469  mAborted = false;
    470 }
    471 
    472 void SpeechRecognition::ResetAndEnd() {
    473  Reset();
    474  DispatchTrustedEvent(u"end"_ns);
    475 }
    476 
    477 void SpeechRecognition::WaitForAudioData(SpeechEvent* aEvent) {
    478  SetState(STATE_STARTING);
    479 }
    480 
    481 void SpeechRecognition::StartedAudioCapture(SpeechEvent* aEvent) {
    482  SetState(STATE_ESTIMATING);
    483 
    484  mEndpointer.SetEnvironmentEstimationMode();
    485  mEstimationSamples +=
    486      ProcessAudioSegment(aEvent->mAudioSegment, aEvent->mTrackRate);
    487 
    488  DispatchTrustedEvent(u"audiostart"_ns);
    489  if (mCurrentState == STATE_ESTIMATING) {
    490    DispatchTrustedEvent(u"start"_ns);
    491  }
    492 }
    493 
    494 void SpeechRecognition::StopRecordingAndRecognize(SpeechEvent* aEvent) {
    495  SetState(STATE_WAITING_FOR_RESULT);
    496 
    497  MOZ_ASSERT(mRecognitionService, "Service deleted before recording done");
    498 
    499  // This will run SoundEnd on the service just before StopRecording begins
    500  // shutting the encode thread down.
    501  mSpeechListener->mRemovedPromise->Then(
    502      GetCurrentSerialEventTarget(), __func__,
    503      [service = mRecognitionService] { service->SoundEnd(); });
    504 
    505  StopRecording();
    506 }
    507 
    508 void SpeechRecognition::WaitForEstimation(SpeechEvent* aEvent) {
    509  SetState(STATE_ESTIMATING);
    510 
    511  mEstimationSamples +=
    512      ProcessAudioSegment(aEvent->mAudioSegment, aEvent->mTrackRate);
    513  if (mEstimationSamples > kESTIMATION_SAMPLES) {
    514    mEndpointer.SetUserInputMode();
    515    SetState(STATE_WAITING_FOR_SPEECH);
    516  }
    517 }
    518 
    519 void SpeechRecognition::DetectSpeech(SpeechEvent* aEvent) {
    520  SetState(STATE_WAITING_FOR_SPEECH);
    521 
    522  ProcessAudioSegment(aEvent->mAudioSegment, aEvent->mTrackRate);
    523  if (mEndpointer.DidStartReceivingSpeech()) {
    524    mSpeechDetectionTimer->Cancel();
    525    SetState(STATE_RECOGNIZING);
    526    DispatchTrustedEvent(u"speechstart"_ns);
    527  }
    528 }
    529 
    530 void SpeechRecognition::WaitForSpeechEnd(SpeechEvent* aEvent) {
    531  SetState(STATE_RECOGNIZING);
    532 
    533  ProcessAudioSegment(aEvent->mAudioSegment, aEvent->mTrackRate);
    534  if (mEndpointer.speech_input_complete()) {
    535    DispatchTrustedEvent(u"speechend"_ns);
    536 
    537    if (mCurrentState == STATE_RECOGNIZING) {
    538      // FIXME: StopRecordingAndRecognize should only be called for single
    539      // shot services for continuous we should just inform the service
    540      StopRecordingAndRecognize(aEvent);
    541    }
    542  }
    543 }
    544 
    545 void SpeechRecognition::NotifyFinalResult(SpeechEvent* aEvent) {
    546  ResetAndEnd();
    547 
    548  RootedDictionary<SpeechRecognitionEventInit> init(RootingCx());
    549  init.mBubbles = true;
    550  init.mCancelable = false;
    551  // init.mResultIndex = 0;
    552  init.mResults = aEvent->mRecognitionResultList;
    553  init.mInterpretation = JS::NullValue();
    554  // init.mEmma = nullptr;
    555 
    556  RefPtr<SpeechRecognitionEvent> event =
    557      SpeechRecognitionEvent::Constructor(this, u"result"_ns, init);
    558  event->SetTrusted(true);
    559 
    560  DispatchEvent(*event);
    561 }
    562 
    563 void SpeechRecognition::DoNothing(SpeechEvent* aEvent) {}
    564 
    565 void SpeechRecognition::AbortSilently(SpeechEvent* aEvent) {
    566  if (mRecognitionService) {
    567    if (mTrack) {
    568      // This will run Abort on the service just before StopRecording begins
    569      // shutting the encode thread down.
    570      mSpeechListener->mRemovedPromise->Then(
    571          GetCurrentSerialEventTarget(), __func__,
    572          [service = mRecognitionService] { service->Abort(); });
    573    } else {
    574      // Recording hasn't started yet. We can just call Abort().
    575      mRecognitionService->Abort();
    576    }
    577  }
    578 
    579  StopRecording()->Then(
    580      GetCurrentSerialEventTarget(), __func__,
    581      [self = RefPtr<SpeechRecognition>(this), this] { ResetAndEnd(); });
    582 
    583  SetState(STATE_ABORTING);
    584 }
    585 
    586 void SpeechRecognition::AbortError(SpeechEvent* aEvent) {
    587  AbortSilently(aEvent);
    588  NotifyError(aEvent);
    589 }
    590 
    591 void SpeechRecognition::NotifyError(SpeechEvent* aEvent) {
    592  aEvent->mError->SetTrusted(true);
    593 
    594  DispatchEvent(*aEvent->mError);
    595 }
    596 
    597 /**************************************
    598 * Event triggers and other functions *
    599 **************************************/
    600 NS_IMETHODIMP
    601 SpeechRecognition::StartRecording(RefPtr<AudioStreamTrack>& aTrack) {
    602  // hold a reference so that the underlying track doesn't get collected.
    603  mTrack = aTrack;
    604  MOZ_ASSERT(!mTrack->Ended());
    605 
    606  mSpeechListener = SpeechTrackListener::Create(this);
    607  mTrack->AddListener(mSpeechListener);
    608 
    609  nsString blockerName;
    610  blockerName.AppendPrintf("SpeechRecognition %p shutdown", this);
    611  mShutdownBlocker =
    612      MakeAndAddRef<SpeechRecognitionShutdownBlocker>(this, blockerName);
    613  media::MustGetShutdownBarrier()->AddBlocker(
    614      mShutdownBlocker, NS_LITERAL_STRING_FROM_CSTRING(__FILE__), __LINE__,
    615      u"SpeechRecognition shutdown"_ns);
    616 
    617  mEndpointer.StartSession();
    618 
    619  return mSpeechDetectionTimer->Init(this, mSpeechDetectionTimeoutMs,
    620                                     nsITimer::TYPE_ONE_SHOT);
    621 }
    622 
    623 RefPtr<GenericNonExclusivePromise> SpeechRecognition::StopRecording() {
    624  if (!mTrack) {
    625    // Recording wasn't started, or has already been stopped.
    626    if (mStream) {
    627      // Ensure we don't start recording because a track became available
    628      // before we get reset.
    629      mStream->UnregisterTrackListener(mListener);
    630      mListener = nullptr;
    631    }
    632    return GenericNonExclusivePromise::CreateAndResolve(true, __func__);
    633  }
    634 
    635  if (mStopRecordingPromise) {
    636    return mStopRecordingPromise;
    637  }
    638 
    639  mTrack->RemoveListener(mSpeechListener);
    640  if (mTrackIsOwned) {
    641    mTrack->Stop();
    642  }
    643 
    644  mEndpointer.EndSession();
    645  DispatchTrustedEvent(u"audioend"_ns);
    646 
    647  // Block shutdown until the speech track listener has been removed from the
    648  // MSG, as it holds a reference to us, and we reference the world, which we
    649  // don't want to leak.
    650  mStopRecordingPromise =
    651      mSpeechListener->mRemovedPromise
    652          ->Then(
    653              GetCurrentSerialEventTarget(), __func__,
    654              [self = RefPtr<SpeechRecognition>(this), this] {
    655                SR_LOG("Shutting down encoding thread");
    656                return mEncodeTaskQueue->BeginShutdown();
    657              },
    658              [] {
    659                MOZ_CRASH("Unexpected rejection");
    660                return ShutdownPromise::CreateAndResolve(false, __func__);
    661              })
    662          ->Then(
    663              GetCurrentSerialEventTarget(), __func__,
    664              [self = RefPtr<SpeechRecognition>(this), this] {
    665                media::MustGetShutdownBarrier()->RemoveBlocker(
    666                    mShutdownBlocker);
    667                mShutdownBlocker = nullptr;
    668 
    669                MOZ_DIAGNOSTIC_ASSERT(mCurrentState != STATE_IDLE);
    670                return GenericNonExclusivePromise::CreateAndResolve(true,
    671                                                                    __func__);
    672              },
    673              [] {
    674                MOZ_CRASH("Unexpected rejection");
    675                return GenericNonExclusivePromise::CreateAndResolve(false,
    676                                                                    __func__);
    677              });
    678  return mStopRecordingPromise;
    679 }
    680 
    681 NS_IMETHODIMP
    682 SpeechRecognition::Observe(nsISupports* aSubject, const char* aTopic,
    683                           const char16_t* aData) {
    684  MOZ_ASSERT(NS_IsMainThread(), "Observer invoked off the main thread");
    685 
    686  if (!strcmp(aTopic, NS_TIMER_CALLBACK_TOPIC) &&
    687      StateBetween(STATE_IDLE, STATE_WAITING_FOR_SPEECH)) {
    688    DispatchError(SpeechRecognition::EVENT_AUDIO_ERROR,
    689                  SpeechRecognitionErrorCode::No_speech,
    690                  "No speech detected (timeout)");
    691  } else if (!strcmp(aTopic, SPEECH_RECOGNITION_TEST_END_TOPIC)) {
    692    nsCOMPtr<nsIObserverService> obs = services::GetObserverService();
    693    obs->RemoveObserver(this, SPEECH_RECOGNITION_TEST_EVENT_REQUEST_TOPIC);
    694    obs->RemoveObserver(this, SPEECH_RECOGNITION_TEST_END_TOPIC);
    695  } else if (StaticPrefs::media_webspeech_test_fake_fsm_events() &&
    696             !strcmp(aTopic, SPEECH_RECOGNITION_TEST_EVENT_REQUEST_TOPIC)) {
    697    ProcessTestEventRequest(aSubject, nsDependentString(aData));
    698  }
    699 
    700  return NS_OK;
    701 }
    702 
    703 void SpeechRecognition::ProcessTestEventRequest(nsISupports* aSubject,
    704                                                const nsAString& aEventName) {
    705  if (aEventName.EqualsLiteral("EVENT_ABORT")) {
    706    Abort();
    707  } else if (aEventName.EqualsLiteral("EVENT_AUDIO_ERROR")) {
    708    DispatchError(
    709        SpeechRecognition::EVENT_AUDIO_ERROR,
    710        SpeechRecognitionErrorCode::Audio_capture,  // TODO different codes?
    711        "AUDIO_ERROR test event");
    712  } else {
    713    NS_ASSERTION(StaticPrefs::media_webspeech_test_fake_recognition_service(),
    714                 "Got request for fake recognition service event, but "
    715                 "media.webspeech.test.fake_recognition_service is unset");
    716 
    717    // let the fake recognition service handle the request
    718  }
    719 }
    720 
    721 already_AddRefed<SpeechGrammarList> SpeechRecognition::Grammars() const {
    722  RefPtr<SpeechGrammarList> speechGrammarList = mSpeechGrammarList;
    723  return speechGrammarList.forget();
    724 }
    725 
    726 void SpeechRecognition::SetGrammars(SpeechGrammarList& aArg) {
    727  mSpeechGrammarList = &aArg;
    728 }
    729 
    730 void SpeechRecognition::GetLang(nsString& aRetVal) const { aRetVal = mLang; }
    731 
    732 void SpeechRecognition::SetLang(const nsAString& aArg) { mLang = aArg; }
    733 
    734 bool SpeechRecognition::GetContinuous(ErrorResult& aRv) const {
    735  return mContinuous;
    736 }
    737 
    738 void SpeechRecognition::SetContinuous(bool aArg, ErrorResult& aRv) {
    739  mContinuous = aArg;
    740 }
    741 
    742 bool SpeechRecognition::InterimResults() const { return mInterimResults; }
    743 
    744 void SpeechRecognition::SetInterimResults(bool aArg) { mInterimResults = aArg; }
    745 
    746 uint32_t SpeechRecognition::MaxAlternatives() const { return mMaxAlternatives; }
    747 
    748 void SpeechRecognition::SetMaxAlternatives(uint32_t aArg) {
    749  mMaxAlternatives = aArg;
    750 }
    751 
    752 void SpeechRecognition::GetServiceURI(nsString& aRetVal,
    753                                      ErrorResult& aRv) const {
    754  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
    755 }
    756 
    757 void SpeechRecognition::SetServiceURI(const nsAString& aArg, ErrorResult& aRv) {
    758  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
    759 }
    760 
    761 void SpeechRecognition::Start(const Optional<NonNull<DOMMediaStream>>& aStream,
    762                              CallerType aCallerType, ErrorResult& aRv) {
    763  if (mCurrentState != STATE_IDLE) {
    764    aRv.Throw(NS_ERROR_DOM_INVALID_STATE_ERR);
    765    return;
    766  }
    767 
    768  if (!SetRecognitionService(aRv)) {
    769    return;
    770  }
    771 
    772  if (!ValidateAndSetGrammarList(aRv)) {
    773    return;
    774  }
    775 
    776  mEncodeTaskQueue =
    777      TaskQueue::Create(GetMediaThreadPool(MediaThreadType::WEBRTC_WORKER),
    778                        "WebSpeechEncoderThread");
    779 
    780  nsresult rv;
    781  rv = mRecognitionService->Initialize(this);
    782  if (NS_WARN_IF(NS_FAILED(rv))) {
    783    return;
    784  }
    785 
    786  MediaStreamConstraints constraints;
    787  constraints.mAudio.SetAsBoolean() = true;
    788 
    789  MOZ_ASSERT(!mListener);
    790  mListener = new TrackListener(this);
    791 
    792  if (aStream.WasPassed()) {
    793    mStream = &aStream.Value();
    794    mTrackIsOwned = false;
    795    mStream->RegisterTrackListener(mListener);
    796    nsTArray<RefPtr<AudioStreamTrack>> tracks;
    797    mStream->GetAudioTracks(tracks);
    798    for (const RefPtr<AudioStreamTrack>& track : tracks) {
    799      if (!track->Ended()) {
    800        NotifyTrackAdded(track);
    801        break;
    802      }
    803    }
    804  } else {
    805    mTrackIsOwned = true;
    806    nsPIDOMWindowInner* win = GetOwnerWindow();
    807    if (!win || !win->IsFullyActive()) {
    808      aRv.ThrowInvalidStateError("The document is not fully active.");
    809      return;
    810    }
    811    AutoNoJSAPI nojsapi;
    812    RefPtr<SpeechRecognition> self(this);
    813    MediaManager::Get()
    814        ->GetUserMedia(win, constraints, aCallerType)
    815        ->Then(
    816            GetCurrentSerialEventTarget(), __func__,
    817            [this, self,
    818             generation = mStreamGeneration](RefPtr<DOMMediaStream>&& aStream) {
    819              nsTArray<RefPtr<AudioStreamTrack>> tracks;
    820              aStream->GetAudioTracks(tracks);
    821              if (mAborted || mCurrentState != STATE_STARTING ||
    822                  mStreamGeneration != generation) {
    823                // We were probably aborted. Exit early.
    824                for (const RefPtr<AudioStreamTrack>& track : tracks) {
    825                  track->Stop();
    826                }
    827                return;
    828              }
    829              mStream = std::move(aStream);
    830              mStream->RegisterTrackListener(mListener);
    831              for (const RefPtr<AudioStreamTrack>& track : tracks) {
    832                if (!track->Ended()) {
    833                  NotifyTrackAdded(track);
    834                }
    835              }
    836            },
    837            [this, self,
    838             generation = mStreamGeneration](RefPtr<MediaMgrError>&& error) {
    839              if (mAborted || mCurrentState != STATE_STARTING ||
    840                  mStreamGeneration != generation) {
    841                // We were probably aborted. Exit early.
    842                return;
    843              }
    844              SpeechRecognitionErrorCode errorCode;
    845 
    846              if (error->mName == MediaMgrError::Name::NotAllowedError) {
    847                errorCode = SpeechRecognitionErrorCode::Not_allowed;
    848              } else {
    849                errorCode = SpeechRecognitionErrorCode::Audio_capture;
    850              }
    851              DispatchError(SpeechRecognition::EVENT_AUDIO_ERROR, errorCode,
    852                            error->mMessage);
    853            });
    854  }
    855 
    856  RefPtr<SpeechEvent> event = new SpeechEvent(this, EVENT_START);
    857  NS_DispatchToMainThread(event);
    858 }
    859 
    860 bool SpeechRecognition::SetRecognitionService(ErrorResult& aRv) {
    861  if (!GetOwnerWindow()) {
    862    aRv.Throw(NS_ERROR_DOM_INVALID_STATE_ERR);
    863    return false;
    864  }
    865 
    866  // See:
    867  // https://dvcs.w3.org/hg/speech-api/raw-file/tip/webspeechapi.html#dfn-lang
    868  nsAutoString lang;
    869  if (!mLang.IsEmpty()) {
    870    lang = mLang;
    871  } else {
    872    nsCOMPtr<Document> document = GetOwnerWindow()->GetExtantDoc();
    873    if (!document) {
    874      aRv.Throw(NS_ERROR_DOM_INVALID_STATE_ERR);
    875      return false;
    876    }
    877    nsCOMPtr<Element> element = document->GetRootElement();
    878    if (!element) {
    879      aRv.Throw(NS_ERROR_DOM_INVALID_STATE_ERR);
    880      return false;
    881    }
    882 
    883    nsAutoString lang;
    884    element->GetLang(lang);
    885  }
    886 
    887  auto result = CreateSpeechRecognitionService(GetOwnerWindow(), this, lang);
    888 
    889  if (result.isErr()) {
    890    switch (result.unwrapErr()) {
    891      case ServiceCreationError::ServiceNotFound:
    892        aRv.Throw(NS_ERROR_DOM_INVALID_STATE_ERR);
    893        break;
    894      default:
    895        MOZ_CRASH("Unknown error");
    896    }
    897    return false;
    898  }
    899 
    900  mRecognitionService = result.unwrap();
    901  MOZ_DIAGNOSTIC_ASSERT(mRecognitionService);
    902  return true;
    903 }
    904 
    905 bool SpeechRecognition::ValidateAndSetGrammarList(ErrorResult& aRv) {
    906  if (!mSpeechGrammarList) {
    907    aRv.Throw(NS_ERROR_DOM_INVALID_STATE_ERR);
    908    return false;
    909  }
    910 
    911  uint32_t grammarListLength = mSpeechGrammarList->Length();
    912  for (uint32_t count = 0; count < grammarListLength; ++count) {
    913    RefPtr<SpeechGrammar> speechGrammar = mSpeechGrammarList->Item(count, aRv);
    914    if (aRv.Failed()) {
    915      return false;
    916    }
    917    if (NS_FAILED(mRecognitionService->ValidateAndSetGrammarList(
    918            speechGrammar.get(), nullptr))) {
    919      aRv.Throw(NS_ERROR_DOM_INVALID_STATE_ERR);
    920      return false;
    921    }
    922  }
    923 
    924  return true;
    925 }
    926 
    927 void SpeechRecognition::Stop() {
    928  RefPtr<SpeechEvent> event = new SpeechEvent(this, EVENT_STOP);
    929  NS_DispatchToMainThread(event);
    930 }
    931 
    932 void SpeechRecognition::Abort() {
    933  if (mAborted) {
    934    return;
    935  }
    936 
    937  mAborted = true;
    938 
    939  RefPtr<SpeechEvent> event = new SpeechEvent(this, EVENT_ABORT);
    940  NS_DispatchToMainThread(event);
    941 }
    942 
    943 void SpeechRecognition::NotifyTrackAdded(
    944    const RefPtr<MediaStreamTrack>& aTrack) {
    945  if (mTrack) {
    946    return;
    947  }
    948 
    949  RefPtr<AudioStreamTrack> audioTrack = aTrack->AsAudioStreamTrack();
    950  if (!audioTrack) {
    951    return;
    952  }
    953 
    954  if (audioTrack->Ended()) {
    955    return;
    956  }
    957 
    958  StartRecording(audioTrack);
    959 }
    960 
    961 void SpeechRecognition::DispatchError(EventType aErrorType,
    962                                      SpeechRecognitionErrorCode aErrorCode,
    963                                      const nsACString& aMessage) {
    964  MOZ_ASSERT(NS_IsMainThread());
    965  MOZ_ASSERT(aErrorType == EVENT_RECOGNITIONSERVICE_ERROR ||
    966                 aErrorType == EVENT_AUDIO_ERROR,
    967             "Invalid error type!");
    968 
    969  RefPtr<SpeechRecognitionError> srError =
    970      new SpeechRecognitionError(nullptr, nullptr, nullptr);
    971 
    972  srError->InitSpeechRecognitionError(u"error"_ns, true, false, aErrorCode,
    973                                      aMessage);
    974 
    975  RefPtr<SpeechEvent> event = new SpeechEvent(this, aErrorType);
    976  event->mError = srError;
    977  NS_DispatchToMainThread(event);
    978 }
    979 
    980 /*
    981 * Buffer audio samples into mAudioSamplesBuffer until aBufferSize.
    982 * Updates mBufferedSamples and returns the number of samples that were
    983 * buffered.
    984 */
    985 uint32_t SpeechRecognition::FillSamplesBuffer(const int16_t* aSamples,
    986                                              uint32_t aSampleCount) {
    987  MOZ_ASSERT(mBufferedSamples < mAudioSamplesPerChunk);
    988  MOZ_ASSERT(mAudioSamplesBuffer);
    989 
    990  int16_t* samplesBuffer = static_cast<int16_t*>(mAudioSamplesBuffer->Data());
    991  size_t samplesToCopy =
    992      std::min(aSampleCount, mAudioSamplesPerChunk - mBufferedSamples);
    993 
    994  PodCopy(samplesBuffer + mBufferedSamples, aSamples, samplesToCopy);
    995 
    996  mBufferedSamples += samplesToCopy;
    997  return samplesToCopy;
    998 }
    999 
   1000 /*
   1001 * Split a samples buffer starting of a given size into
   1002 * chunks of equal size. The chunks are stored in the array
   1003 * received as argument.
   1004 * Returns the offset of the end of the last chunk that was
   1005 * created.
   1006 */
   1007 uint32_t SpeechRecognition::SplitSamplesBuffer(
   1008    const int16_t* aSamplesBuffer, uint32_t aSampleCount,
   1009    nsTArray<RefPtr<SharedBuffer>>& aResult) {
   1010  uint32_t chunkStart = 0;
   1011 
   1012  while (chunkStart + mAudioSamplesPerChunk <= aSampleCount) {
   1013    CheckedInt<size_t> bufferSize(sizeof(int16_t));
   1014    bufferSize *= mAudioSamplesPerChunk;
   1015    RefPtr<SharedBuffer> chunk = SharedBuffer::Create(bufferSize);
   1016 
   1017    PodCopy(static_cast<short*>(chunk->Data()), aSamplesBuffer + chunkStart,
   1018            mAudioSamplesPerChunk);
   1019 
   1020    aResult.AppendElement(chunk.forget());
   1021    chunkStart += mAudioSamplesPerChunk;
   1022  }
   1023 
   1024  return chunkStart;
   1025 }
   1026 
   1027 AudioSegment* SpeechRecognition::CreateAudioSegment(
   1028    nsTArray<RefPtr<SharedBuffer>>& aChunks) {
   1029  AudioSegment* segment = new AudioSegment();
   1030  for (uint32_t i = 0; i < aChunks.Length(); ++i) {
   1031    RefPtr<SharedBuffer> buffer = aChunks[i];
   1032    const int16_t* chunkData = static_cast<const int16_t*>(buffer->Data());
   1033 
   1034    AutoTArray<const int16_t*, 1> channels;
   1035    channels.AppendElement(chunkData);
   1036    segment->AppendFrames(buffer.forget(), channels, mAudioSamplesPerChunk,
   1037                          PRINCIPAL_HANDLE_NONE);
   1038  }
   1039 
   1040  return segment;
   1041 }
   1042 
   1043 void SpeechRecognition::FeedAudioData(
   1044    nsMainThreadPtrHandle<SpeechRecognition>& aRecognition,
   1045    already_AddRefed<SharedBuffer> aSamples, uint32_t aDuration,
   1046    MediaTrackListener* aProvider, TrackRate aTrackRate) {
   1047  NS_ASSERTION(!NS_IsMainThread(),
   1048               "FeedAudioData should not be called in the main thread");
   1049 
   1050  // Endpointer expects to receive samples in chunks whose size is a
   1051  // multiple of its frame size.
   1052  // Since we can't assume we will receive the frames in appropriate-sized
   1053  // chunks, we must buffer and split them in chunks of mAudioSamplesPerChunk
   1054  // (a multiple of Endpointer's frame size) before feeding to Endpointer.
   1055 
   1056  // ensure aSamples is deleted
   1057  RefPtr<SharedBuffer> refSamples = aSamples;
   1058 
   1059  uint32_t samplesIndex = 0;
   1060  const int16_t* samples = static_cast<int16_t*>(refSamples->Data());
   1061  AutoTArray<RefPtr<SharedBuffer>, 5> chunksToSend;
   1062 
   1063  // fill up our buffer and make a chunk out of it, if possible
   1064  if (mBufferedSamples > 0) {
   1065    samplesIndex += FillSamplesBuffer(samples, aDuration);
   1066 
   1067    if (mBufferedSamples == mAudioSamplesPerChunk) {
   1068      chunksToSend.AppendElement(mAudioSamplesBuffer.forget());
   1069      mBufferedSamples = 0;
   1070    }
   1071  }
   1072 
   1073  // create sample chunks of correct size
   1074  if (samplesIndex < aDuration) {
   1075    samplesIndex += SplitSamplesBuffer(samples + samplesIndex,
   1076                                       aDuration - samplesIndex, chunksToSend);
   1077  }
   1078 
   1079  // buffer remaining samples
   1080  if (samplesIndex < aDuration) {
   1081    mBufferedSamples = 0;
   1082    CheckedInt<size_t> bufferSize(sizeof(int16_t));
   1083    bufferSize *= mAudioSamplesPerChunk;
   1084    mAudioSamplesBuffer = SharedBuffer::Create(bufferSize);
   1085 
   1086    FillSamplesBuffer(samples + samplesIndex, aDuration - samplesIndex);
   1087  }
   1088 
   1089  AudioSegment* segment = CreateAudioSegment(chunksToSend);
   1090  RefPtr<SpeechEvent> event = new SpeechEvent(aRecognition, EVENT_AUDIO_DATA);
   1091  event->mAudioSegment = segment;
   1092  event->mProvider = aProvider;
   1093  event->mTrackRate = aTrackRate;
   1094  NS_DispatchToMainThread(event);
   1095 }
   1096 
   1097 const char* SpeechRecognition::GetName(FSMState aId) {
   1098  static const char* names[] = {
   1099      "STATE_IDLE",        "STATE_STARTING",
   1100      "STATE_ESTIMATING",  "STATE_WAITING_FOR_SPEECH",
   1101      "STATE_RECOGNIZING", "STATE_WAITING_FOR_RESULT",
   1102      "STATE_ABORTING",
   1103  };
   1104 
   1105  MOZ_ASSERT(aId < STATE_COUNT);
   1106  MOZ_ASSERT(std::size(names) == STATE_COUNT);
   1107  return names[aId];
   1108 }
   1109 
   1110 const char* SpeechRecognition::GetName(SpeechEvent* aEvent) {
   1111  static const char* names[] = {"EVENT_START",
   1112                                "EVENT_STOP",
   1113                                "EVENT_ABORT",
   1114                                "EVENT_AUDIO_DATA",
   1115                                "EVENT_AUDIO_ERROR",
   1116                                "EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT",
   1117                                "EVENT_RECOGNITIONSERVICE_FINAL_RESULT",
   1118                                "EVENT_RECOGNITIONSERVICE_ERROR"};
   1119 
   1120  MOZ_ASSERT(aEvent->mType < EVENT_COUNT);
   1121  MOZ_ASSERT(std::size(names) == EVENT_COUNT);
   1122  return names[aEvent->mType];
   1123 }
   1124 
   1125 TaskQueue* SpeechRecognition::GetTaskQueueForEncoding() const {
   1126  MOZ_ASSERT(NS_IsMainThread());
   1127  return mEncodeTaskQueue;
   1128 }
   1129 
   1130 SpeechEvent::SpeechEvent(SpeechRecognition* aRecognition,
   1131                         SpeechRecognition::EventType aType)
   1132    : Runnable("dom::SpeechEvent"),
   1133      mAudioSegment(nullptr),
   1134      mRecognitionResultList(nullptr),
   1135      mError(nullptr),
   1136      mRecognition(new nsMainThreadPtrHolder<SpeechRecognition>(
   1137          "SpeechEvent::SpeechEvent", aRecognition)),
   1138      mType(aType),
   1139      mTrackRate(0) {}
   1140 
   1141 SpeechEvent::SpeechEvent(nsMainThreadPtrHandle<SpeechRecognition>& aRecognition,
   1142                         SpeechRecognition::EventType aType)
   1143    : Runnable("dom::SpeechEvent"),
   1144      mAudioSegment(nullptr),
   1145      mRecognitionResultList(nullptr),
   1146      mError(nullptr),
   1147      mRecognition(aRecognition),
   1148      mType(aType),
   1149      mTrackRate(0) {}
   1150 
   1151 SpeechEvent::~SpeechEvent() { delete mAudioSegment; }
   1152 
   1153 NS_IMETHODIMP
   1154 SpeechEvent::Run() {
   1155  mRecognition->ProcessEvent(this);
   1156  return NS_OK;
   1157 }
   1158 
   1159 }  // namespace mozilla::dom