tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

OSXSpeechSynthesizerService.mm (13865B)


      1 /* -*- Mode: Objective-C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset:
      2 * 2 -*- */
      3 /* vim: set ts=2 sw=2 et tw=80: */
      4 /* This Source Code Form is subject to the terms of the Mozilla Public
      5 * License, v. 2.0. If a copy of the MPL was not distributed with this
      6 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      7 
      8 #include "OSXSpeechSynthesizerService.h"
      9 #include "mozilla/Assertions.h"
     10 #include "mozilla/ClearOnShutdown.h"
     11 #include "mozilla/Preferences.h"
     12 #include "mozilla/StaticPrefs_media.h"
     13 #include "mozilla/dom/nsSpeechTask.h"
     14 #include "mozilla/dom/nsSynthVoiceRegistry.h"
     15 #include "nsCocoaUtils.h"
     16 #include "nsISupports.h"
     17 #include "nsIThread.h"
     18 #include "nsObjCExceptions.h"
     19 #include "nsServiceManagerUtils.h"
     20 #include "nsThreadUtils.h"
     21 #include "nsXULAppAPI.h"
     22 
     23 #import <Cocoa/Cocoa.h>
     24 
     25 @class SpeechDelegate;
     26 
     27 // We can escape the default delimiters ("[[" and "]]") by temporarily
     28 // changing the delimiters just before they appear, and changing them back
     29 // just after.
     30 #define DLIM_ESCAPE_START "[[dlim (( ))]]"
     31 #define DLIM_ESCAPE_END "((dlim [[ ]]))"
     32 
     33 using namespace mozilla;
     34 
     35 class SpeechTaskCallback final : public nsISpeechTaskCallback {
     36 public:
     37  SpeechTaskCallback(nsISpeechTask* aTask, NSSpeechSynthesizer* aSynth,
     38                     const nsTArray<size_t>& aOffsets);
     39 
     40  NS_DECL_CYCLE_COLLECTING_ISUPPORTS
     41  NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(SpeechTaskCallback,
     42                                           nsISpeechTaskCallback)
     43 
     44  NS_DECL_NSISPEECHTASKCALLBACK
     45 
     46  void OnWillSpeakWord(uint32_t aIndex, uint32_t aLength);
     47  void OnError(uint32_t aIndex);
     48  void OnDidFinishSpeaking();
     49 
     50 private:
     51  virtual ~SpeechTaskCallback();
     52 
     53  float GetTimeDurationFromStart();
     54 
     55  nsCOMPtr<nsISpeechTask> mTask;
     56  NSSpeechSynthesizer* mSpeechSynthesizer;
     57  SpeechDelegate* mDelegate;
     58  TimeStamp mStartingTime;
     59  uint32_t mCurrentIndex;
     60  nsTArray<size_t> mOffsets;
     61 };
     62 
     63 @interface SpeechDelegate : NSObject <NSSpeechSynthesizerDelegate> {
     64 @private
     65  SpeechTaskCallback* mCallback;
     66 }
     67 
     68 - (id)initWithCallback:(SpeechTaskCallback*)aCallback;
     69 @end
     70 
     71 @implementation SpeechDelegate
     72 - (id)initWithCallback:(SpeechTaskCallback*)aCallback {
     73  [super init];
     74  mCallback = aCallback;
     75  return self;
     76 }
     77 
     78 - (void)speechSynthesizer:(NSSpeechSynthesizer*)aSender
     79            willSpeakWord:(NSRange)aRange
     80                 ofString:(NSString*)aString {
     81  mCallback->OnWillSpeakWord(aRange.location, aRange.length);
     82 }
     83 
     84 - (void)speechSynthesizer:(NSSpeechSynthesizer*)aSender
     85        didFinishSpeaking:(BOOL)aFinishedSpeaking {
     86  mCallback->OnDidFinishSpeaking();
     87 }
     88 
     89 - (void)speechSynthesizer:(NSSpeechSynthesizer*)aSender
     90    didEncounterErrorAtIndex:(NSUInteger)aCharacterIndex
     91                    ofString:(NSString*)aString
     92                     message:(NSString*)aMessage {
     93  mCallback->OnError(aCharacterIndex);
     94 }
     95 @end
     96 
     97 NS_IMPL_CYCLE_COLLECTION(SpeechTaskCallback, mTask);
     98 
     99 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechTaskCallback)
    100  NS_INTERFACE_MAP_ENTRY(nsISpeechTaskCallback)
    101  NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTaskCallback)
    102 NS_INTERFACE_MAP_END
    103 
    104 NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechTaskCallback)
    105 NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechTaskCallback)
    106 
    107 SpeechTaskCallback::SpeechTaskCallback(nsISpeechTask* aTask,
    108                                       NSSpeechSynthesizer* aSynth,
    109                                       const nsTArray<size_t>& aOffsets)
    110    : mTask(aTask),
    111      mSpeechSynthesizer(aSynth),
    112      mCurrentIndex(0),
    113      mOffsets(aOffsets.Clone()) {
    114  mDelegate = [[SpeechDelegate alloc] initWithCallback:this];
    115  [mSpeechSynthesizer setDelegate:mDelegate];
    116  mStartingTime = TimeStamp::Now();
    117 }
    118 
    119 SpeechTaskCallback::~SpeechTaskCallback() {
    120  [mSpeechSynthesizer setDelegate:nil];
    121  [mDelegate release];
    122  [mSpeechSynthesizer release];
    123 }
    124 
    125 NS_IMETHODIMP
    126 SpeechTaskCallback::OnCancel() {
    127  NS_OBJC_BEGIN_TRY_BLOCK_RETURN;
    128 
    129  [mSpeechSynthesizer stopSpeaking];
    130  return NS_OK;
    131 
    132  NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE);
    133 }
    134 
    135 NS_IMETHODIMP
    136 SpeechTaskCallback::OnPause() {
    137  NS_OBJC_BEGIN_TRY_BLOCK_RETURN;
    138 
    139  [mSpeechSynthesizer pauseSpeakingAtBoundary:NSSpeechImmediateBoundary];
    140  if (!mTask) {
    141    // When calling pause() on child porcess, it may not receive end event
    142    // from chrome process yet.
    143    return NS_ERROR_FAILURE;
    144  }
    145  mTask->DispatchPause(GetTimeDurationFromStart(), mCurrentIndex);
    146  return NS_OK;
    147 
    148  NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE);
    149 }
    150 
    151 NS_IMETHODIMP
    152 SpeechTaskCallback::OnResume() {
    153  NS_OBJC_BEGIN_TRY_BLOCK_RETURN;
    154 
    155  [mSpeechSynthesizer continueSpeaking];
    156  if (!mTask) {
    157    // When calling resume() on child porcess, it may not receive end event
    158    // from chrome process yet.
    159    return NS_ERROR_FAILURE;
    160  }
    161  mTask->DispatchResume(GetTimeDurationFromStart(), mCurrentIndex);
    162  return NS_OK;
    163 
    164  NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE);
    165 }
    166 
    167 NS_IMETHODIMP
    168 SpeechTaskCallback::OnVolumeChanged(float aVolume) {
    169  NS_OBJC_BEGIN_TRY_BLOCK_RETURN;
    170 
    171  [mSpeechSynthesizer setObject:[NSNumber numberWithFloat:aVolume]
    172                    forProperty:NSSpeechVolumeProperty
    173                          error:nil];
    174  return NS_OK;
    175 
    176  NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE);
    177 }
    178 
    179 float SpeechTaskCallback::GetTimeDurationFromStart() {
    180  TimeDuration duration = TimeStamp::Now() - mStartingTime;
    181  return duration.ToSeconds();
    182 }
    183 
    184 void SpeechTaskCallback::OnWillSpeakWord(uint32_t aIndex, uint32_t aLength) {
    185  mCurrentIndex = aIndex < mOffsets.Length() ? mOffsets[aIndex] : mCurrentIndex;
    186  if (!mTask) {
    187    return;
    188  }
    189  mTask->DispatchBoundary(u"word"_ns, GetTimeDurationFromStart(), mCurrentIndex,
    190                          aLength, 1);
    191 }
    192 
    193 void SpeechTaskCallback::OnError(uint32_t aIndex) {
    194  if (!mTask) {
    195    return;
    196  }
    197  mTask->DispatchError(GetTimeDurationFromStart(), aIndex);
    198 }
    199 
    200 void SpeechTaskCallback::OnDidFinishSpeaking() {
    201  mTask->DispatchEnd(GetTimeDurationFromStart(), mCurrentIndex);
    202  // no longer needed
    203  [mSpeechSynthesizer setDelegate:nil];
    204  mTask = nullptr;
    205 }
    206 
    207 namespace mozilla {
    208 namespace dom {
    209 
    210 struct OSXVoice {
    211  OSXVoice() : mIsDefault(false) {}
    212 
    213  nsString mUri;
    214  nsString mName;
    215  nsString mLocale;
    216  bool mIsDefault;
    217 };
    218 
    219 class RegisterVoicesRunnable final : public Runnable {
    220 public:
    221  RegisterVoicesRunnable(OSXSpeechSynthesizerService* aSpeechService,
    222                         nsTArray<OSXVoice>& aList)
    223      : Runnable("RegisterVoicesRunnable"),
    224        mSpeechService(aSpeechService),
    225        mVoices(aList) {}
    226 
    227  NS_IMETHOD Run() override;
    228 
    229 private:
    230  ~RegisterVoicesRunnable() override = default;
    231 
    232  // This runnable always use sync mode.  It is unnecesarry to reference object
    233  OSXSpeechSynthesizerService* mSpeechService;
    234  nsTArray<OSXVoice>& mVoices;
    235 };
    236 
    237 NS_IMETHODIMP
    238 RegisterVoicesRunnable::Run() {
    239  nsresult rv;
    240  nsCOMPtr<nsISynthVoiceRegistry> registry =
    241      do_GetService(NS_SYNTHVOICEREGISTRY_CONTRACTID, &rv);
    242  if (!registry) {
    243    return rv;
    244  }
    245 
    246  for (OSXVoice voice : mVoices) {
    247    rv = registry->AddVoice(mSpeechService, voice.mUri, voice.mName,
    248                            voice.mLocale, true, false);
    249    if (NS_WARN_IF(NS_FAILED(rv))) {
    250      continue;
    251    }
    252 
    253    if (voice.mIsDefault) {
    254      registry->SetDefaultVoice(voice.mUri, true);
    255    }
    256  }
    257 
    258  registry->NotifyVoicesChanged();
    259 
    260  return NS_OK;
    261 }
    262 
    263 class EnumVoicesRunnable final : public Runnable {
    264 public:
    265  explicit EnumVoicesRunnable(OSXSpeechSynthesizerService* aSpeechService)
    266      : Runnable("EnumVoicesRunnable"), mSpeechService(aSpeechService) {}
    267 
    268  NS_IMETHOD Run() override;
    269 
    270 private:
    271  ~EnumVoicesRunnable() override = default;
    272 
    273  RefPtr<OSXSpeechSynthesizerService> mSpeechService;
    274 };
    275 
    276 NS_IMETHODIMP
    277 EnumVoicesRunnable::Run() {
    278  NS_OBJC_BEGIN_TRY_BLOCK_RETURN;
    279 
    280  AutoTArray<OSXVoice, 64> list;
    281 
    282  NSArray* voices = [NSSpeechSynthesizer availableVoices];
    283  NSString* defaultVoice = [NSSpeechSynthesizer defaultVoice];
    284 
    285  for (NSString* voice in voices) {
    286    OSXVoice item;
    287 
    288    NSDictionary* attr = [NSSpeechSynthesizer attributesForVoice:voice];
    289 
    290    nsAutoString identifier;
    291    nsCocoaUtils::GetStringForNSString([attr objectForKey:NSVoiceIdentifier],
    292                                       identifier);
    293 
    294    nsCocoaUtils::GetStringForNSString([attr objectForKey:NSVoiceName],
    295                                       item.mName);
    296 
    297    nsCocoaUtils::GetStringForNSString(
    298        [attr objectForKey:NSVoiceLocaleIdentifier], item.mLocale);
    299    item.mLocale.ReplaceChar('_', '-');
    300 
    301    item.mUri.AssignLiteral("urn:moz-tts:osx:");
    302    item.mUri.Append(identifier);
    303 
    304    if ([voice isEqualToString:defaultVoice]) {
    305      item.mIsDefault = true;
    306    }
    307 
    308    list.AppendElement(item);
    309  }
    310 
    311  RefPtr<RegisterVoicesRunnable> runnable =
    312      new RegisterVoicesRunnable(mSpeechService, list);
    313  NS_DispatchAndSpinEventLoopUntilComplete("EnumVoicesRunnable"_ns,
    314                                           GetMainThreadSerialEventTarget(),
    315                                           runnable.forget());
    316 
    317  return NS_OK;
    318 
    319  NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE);
    320 }
    321 
    322 StaticRefPtr<OSXSpeechSynthesizerService>
    323    OSXSpeechSynthesizerService::sSingleton;
    324 
    325 NS_INTERFACE_MAP_BEGIN(OSXSpeechSynthesizerService)
    326  NS_INTERFACE_MAP_ENTRY(nsISpeechService)
    327  NS_INTERFACE_MAP_ENTRY(nsIObserver)
    328  NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechService)
    329 NS_INTERFACE_MAP_END
    330 
    331 NS_IMPL_ADDREF(OSXSpeechSynthesizerService)
    332 NS_IMPL_RELEASE(OSXSpeechSynthesizerService)
    333 
    334 OSXSpeechSynthesizerService::OSXSpeechSynthesizerService()
    335    : mInitialized(false) {}
    336 
    337 bool OSXSpeechSynthesizerService::Init() {
    338  if (Preferences::GetBool("media.webspeech.synth.test") ||
    339      !StaticPrefs::media_webspeech_synth_enabled()) {
    340    // When test is enabled, we shouldn't add OS backend (Bug 1160844)
    341    return false;
    342  }
    343 
    344  nsCOMPtr<nsIThread> thread;
    345  if (NS_FAILED(NS_NewNamedThread("SpeechWorker", getter_AddRefs(thread)))) {
    346    return false;
    347  }
    348 
    349  // Get all the voices and register in the SynthVoiceRegistry
    350  nsCOMPtr<nsIRunnable> runnable = new EnumVoicesRunnable(this);
    351  thread->Dispatch(runnable, NS_DISPATCH_NORMAL);
    352 
    353  mInitialized = true;
    354  return true;
    355 }
    356 
    357 NS_IMETHODIMP
    358 OSXSpeechSynthesizerService::Speak(const nsAString& aText,
    359                                   const nsAString& aUri, float aVolume,
    360                                   float aRate, float aPitch,
    361                                   nsISpeechTask* aTask) {
    362  NS_OBJC_BEGIN_TRY_BLOCK_RETURN;
    363 
    364  MOZ_ASSERT(StringBeginsWith(aUri, u"urn:moz-tts:osx:"_ns),
    365             "OSXSpeechSynthesizerService doesn't allow this voice URI");
    366 
    367  NSSpeechSynthesizer* synth = [[NSSpeechSynthesizer alloc] init];
    368  // strlen("urn:moz-tts:osx:") == 16
    369  NSString* identifier = nsCocoaUtils::ToNSString(Substring(aUri, 16));
    370  [synth setVoice:identifier];
    371 
    372  // default rate is 180-220
    373  [synth setObject:[NSNumber numberWithInt:aRate * 200]
    374       forProperty:NSSpeechRateProperty
    375             error:nil];
    376  // volume allows 0.0-1.0
    377  [synth setObject:[NSNumber numberWithFloat:aVolume]
    378       forProperty:NSSpeechVolumeProperty
    379             error:nil];
    380  // Use default pitch value to calculate this
    381  NSNumber* defaultPitch = [synth objectForProperty:NSSpeechPitchBaseProperty
    382                                              error:nil];
    383  if (defaultPitch) {
    384    int newPitch = [defaultPitch intValue] * (aPitch / 2 + 0.5);
    385    [synth setObject:[NSNumber numberWithInt:newPitch]
    386         forProperty:NSSpeechPitchBaseProperty
    387               error:nil];
    388  }
    389 
    390  nsAutoString escapedText;
    391  // We need to map the the offsets from the given text to the escaped text.
    392  // The index of the offsets array is the position in the escaped text,
    393  // the element value is the position in the user-supplied text.
    394  nsTArray<size_t> offsets;
    395  offsets.SetCapacity(aText.Length());
    396 
    397  // This loop looks for occurances of "[[" or "]]", escapes them, and
    398  // populates the offsets array to supply a map to the original offsets.
    399  for (size_t i = 0; i < aText.Length(); i++) {
    400    if (aText.Length() > i + 1 && ((aText[i] == ']' && aText[i + 1] == ']') ||
    401                                   (aText[i] == '[' && aText[i + 1] == '['))) {
    402      escapedText.AppendLiteral(DLIM_ESCAPE_START);
    403      offsets.AppendElements(strlen(DLIM_ESCAPE_START));
    404      escapedText.Append(aText[i]);
    405      offsets.AppendElement(i);
    406      escapedText.Append(aText[++i]);
    407      offsets.AppendElement(i);
    408      escapedText.AppendLiteral(DLIM_ESCAPE_END);
    409      offsets.AppendElements(strlen(DLIM_ESCAPE_END));
    410    } else {
    411      escapedText.Append(aText[i]);
    412      offsets.AppendElement(i);
    413    }
    414  }
    415 
    416  RefPtr<SpeechTaskCallback> callback =
    417      new SpeechTaskCallback(aTask, synth, offsets);
    418  nsresult rv = aTask->Setup(callback);
    419  NS_ENSURE_SUCCESS(rv, rv);
    420 
    421  NSString* text = nsCocoaUtils::ToNSString(escapedText);
    422  BOOL success = [synth startSpeakingString:text];
    423  NS_ENSURE_TRUE(success, NS_ERROR_FAILURE);
    424 
    425  aTask->DispatchStart();
    426  return NS_OK;
    427 
    428  NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE);
    429 }
    430 
    431 NS_IMETHODIMP
    432 OSXSpeechSynthesizerService::Observe(nsISupports* aSubject, const char* aTopic,
    433                                     const char16_t* aData) {
    434  return NS_OK;
    435 }
    436 
    437 OSXSpeechSynthesizerService* OSXSpeechSynthesizerService::GetInstance() {
    438  MOZ_ASSERT(NS_IsMainThread());
    439  if (XRE_GetProcessType() != GeckoProcessType_Default) {
    440    return nullptr;
    441  }
    442 
    443  if (!sSingleton) {
    444    RefPtr<OSXSpeechSynthesizerService> speechService =
    445        new OSXSpeechSynthesizerService();
    446    if (speechService->Init()) {
    447      sSingleton = speechService;
    448      ClearOnShutdown(&sSingleton);
    449    }
    450  }
    451  return sSingleton;
    452 }
    453 
    454 already_AddRefed<OSXSpeechSynthesizerService>
    455 OSXSpeechSynthesizerService::GetInstanceForService() {
    456  RefPtr<OSXSpeechSynthesizerService> speechService = GetInstance();
    457  return speechService.forget();
    458 }
    459 
    460 }  // namespace dom
    461 }  // namespace mozilla