tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

OnlineSpeechRecognitionService.h (4104B)


      1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
      3 /* This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #ifndef mozilla_dom_OnlineRecognitionService_h
      8 #define mozilla_dom_OnlineRecognitionService_h
      9 
     10 #include "ContainerWriter.h"
     11 #include "OpusTrackEncoder.h"
     12 #include "nsCOMPtr.h"
     13 #include "nsISpeechRecognitionService.h"
     14 #include "nsIStreamListener.h"
     15 #include "nsTArray.h"
     16 #include "speex/speex_resampler.h"
     17 
     18 #define NS_ONLINE_SPEECH_RECOGNITION_SERVICE_CID \
     19  {0x0ff5ce56,                                   \
     20   0x5b09,                                       \
     21   0x4db8,                                       \
     22   {0xad, 0xc6, 0x82, 0x66, 0xaf, 0x95, 0xf8, 0x64}};
     23 
     24 namespace mozilla {
     25 
     26 namespace ipc {
     27 class PrincipalInfo;
     28 }  // namespace ipc
     29 
     30 /**
     31 * Online implementation of the nsISpeechRecognitionService interface
     32 */
     33 class OnlineSpeechRecognitionService : public nsISpeechRecognitionService,
     34                                       public nsIStreamListener {
     35 public:
     36  // Add XPCOM glue code
     37  NS_DECL_THREADSAFE_ISUPPORTS
     38  NS_DECL_NSISPEECHRECOGNITIONSERVICE
     39  NS_DECL_NSIREQUESTOBSERVER
     40  NS_DECL_NSISTREAMLISTENER
     41 
     42  /**
     43   * Listener responsible for handling the events raised by the TrackEncoder
     44   */
     45  class SpeechEncoderListener : public TrackEncoderListener {
     46   public:
     47    explicit SpeechEncoderListener(OnlineSpeechRecognitionService* aService)
     48        : mService(aService), mOwningThread(AbstractThread::GetCurrent()) {}
     49 
     50    void Started(TrackEncoder* aEncoder) override {}
     51 
     52    void Initialized(TrackEncoder* aEncoder) override {
     53      MOZ_ASSERT(mOwningThread->IsCurrentThreadIn());
     54      mService->EncoderInitialized();
     55    }
     56 
     57    void Error(TrackEncoder* aEncoder) override {
     58      MOZ_ASSERT(mOwningThread->IsCurrentThreadIn());
     59      mService->EncoderError();
     60    }
     61 
     62   private:
     63    const RefPtr<OnlineSpeechRecognitionService> mService;
     64    const RefPtr<AbstractThread> mOwningThread;
     65  };
     66 
     67  /**
     68   * Default constructs a OnlineSpeechRecognitionService
     69   */
     70  OnlineSpeechRecognitionService();
     71 
     72  /**
     73   * Called by SpeechEncoderListener when the AudioTrackEncoder has been
     74   * initialized.
     75   */
     76  void EncoderInitialized();
     77 
     78  /**
     79   * Called after the AudioTrackEncoder has encoded all data for us to wrap in a
     80   * container and pass along.
     81   */
     82  void EncoderFinished();
     83 
     84  /**
     85   * Called by SpeechEncoderListener when the AudioTrackEncoder has
     86   * encountered an error.
     87   */
     88  void EncoderError();
     89 
     90 private:
     91  /**
     92   * Private destructor to prevent bypassing of reference counting
     93   */
     94  virtual ~OnlineSpeechRecognitionService();
     95 
     96  /** The associated SpeechRecognition */
     97  nsMainThreadPtrHandle<dom::SpeechRecognition> mRecognition;
     98 
     99  /**
    100   * Builds a mock SpeechRecognitionResultList
    101   */
    102  dom::SpeechRecognitionResultList* BuildMockResultList();
    103 
    104  /**
    105   * Method responsible for uploading the audio to the remote endpoint
    106   */
    107  void DoSTT();
    108 
    109  // Encoded and packaged ogg audio data
    110  nsTArray<nsTArray<uint8_t>> mEncodedData;
    111  // Member responsible for holding a reference to the TrackEncoderListener
    112  RefPtr<SpeechEncoderListener> mSpeechEncoderListener;
    113  // MediaQueue fed encoded data by mAudioEncoder
    114  MediaQueue<EncodedFrame> mEncodedAudioQueue;
    115  // Encoder responsible for encoding the frames from pcm to opus which is the
    116  // format supported by our backend
    117  UniquePtr<AudioTrackEncoder> mAudioEncoder;
    118  // Object responsible for wrapping the opus frames into an ogg container
    119  UniquePtr<ContainerWriter> mWriter;
    120  // Member responsible for storing the json string returned by the endpoint
    121  nsCString mBuf;
    122  // Used to calculate a ceiling on the time spent listening.
    123  TimeStamp mFirstIteration;
    124  // flag responsible to control if the user choose to abort
    125  bool mAborted = false;
    126  //  reference to the audio encoder queue
    127  RefPtr<TaskQueue> mEncodeTaskQueue;
    128 };
    129 
    130 }  // namespace mozilla
    131 
    132 #endif