// Copyright (c) Microsoft Corporation. // Licensed under the MIT License. using Microsoft.MixedReality.Toolkit.Input; using Microsoft.MixedReality.Toolkit.Utilities; using System.Threading.Tasks; using Unity.Profiling; using UnityEngine; #if UNITY_STANDALONE_WIN || UNITY_WSA || UNITY_EDITOR_WIN using System.Text; using UnityEngine.Windows.Speech; #endif // UNITY_STANDALONE_WIN || UNITY_WSA || UNITY_EDITOR_WIN namespace Microsoft.MixedReality.Toolkit.Windows.Input { [MixedRealityDataProvider( typeof(IMixedRealityInputSystem), SupportedPlatforms.WindowsStandalone | SupportedPlatforms.WindowsUniversal | SupportedPlatforms.WindowsEditor, "Windows Dictation Input")] [HelpURL("https://docs.microsoft.com/windows/mixed-reality/mrtk-unity/features/input/dictation")] public class WindowsDictationInputProvider : BaseInputDeviceManager, IMixedRealityDictationSystem, IMixedRealityCapabilityCheck { /// /// Constructor. /// /// The instance that loaded the data provider. /// The instance that receives data from this provider. /// Friendly name of the service. /// Service priority. Used to determine order of instantiation. /// The service's configuration profile. [System.Obsolete("This constructor is obsolete (registrar parameter is no longer required) and will be removed in a future version of the Microsoft Mixed Reality Toolkit.")] public WindowsDictationInputProvider( IMixedRealityServiceRegistrar registrar, IMixedRealityInputSystem inputSystem, string name = null, uint priority = DefaultPriority, BaseMixedRealityProfile profile = null) : this(inputSystem, name, priority, profile) { Registrar = registrar; } /// /// Constructor. /// /// The instance that receives data from this provider. /// Friendly name of the service. /// Service priority. Used to determine order of instantiation. /// The service's configuration profile. public WindowsDictationInputProvider( IMixedRealityInputSystem inputSystem, string name = null, uint priority = DefaultPriority, BaseMixedRealityProfile profile = null) : base(inputSystem, name, priority, profile) { } /// public bool IsListening { get; private set; } = false; #region IMixedRealityCapabilityCheck Implementation /// public bool CheckCapability(MixedRealityCapability capability) { return (capability == MixedRealityCapability.VoiceDictation); } #endregion IMixedRealityCapabilityCheck Implementation /// public async void StartRecording(GameObject listener, float initialSilenceTimeout = 5, float autoSilenceTimeout = 20, int recordingTime = 10, string micDeviceName = "") { await StartRecordingAsync(listener, initialSilenceTimeout, autoSilenceTimeout, recordingTime, micDeviceName); } /// public async void StopRecording() { await StopRecordingAsync(); } private static readonly ProfilerMarker StartRecordingAsyncPerfMarker = new ProfilerMarker("[MRTK] WindowsDictationInputProvider.StartRecordingAsync"); /// public async Task StartRecordingAsync(GameObject listener = null, float initialSilenceTimeout = 5f, float autoSilenceTimeout = 20f, int recordingTime = 10, string micDeviceName = "") { using (StartRecordingAsyncPerfMarker.Auto()) { #if UNITY_STANDALONE_WIN || UNITY_WSA || UNITY_EDITOR_WIN if (IsListening || isTransitioning || Service == null || !Application.isPlaying) { Debug.LogWarning("Unable to start recording"); return; } if (dictationRecognizer == null && InputSystemProfile.SpeechCommandsProfile.SpeechRecognizerStartBehavior == AutoStartBehavior.ManualStart) { InitializeDictationRecognizer(); } hasFailed = false; IsListening = true; isTransitioning = true; if (listener != null) { hasListener = true; Service.PushModalInputHandler(listener); } if (PhraseRecognitionSystem.Status == SpeechSystemStatus.Running) { PhraseRecognitionSystem.Shutdown(); } await waitUntilPhraseRecognitionSystemHasStopped; Debug.Assert(PhraseRecognitionSystem.Status == SpeechSystemStatus.Stopped); // Query the maximum frequency of the default microphone. int minSamplingRate; // Not used. deviceName = micDeviceName; Microphone.GetDeviceCaps(deviceName, out minSamplingRate, out samplingRate); dictationRecognizer.InitialSilenceTimeoutSeconds = initialSilenceTimeout; dictationRecognizer.AutoSilenceTimeoutSeconds = autoSilenceTimeout; dictationRecognizer.Start(); await waitUntilDictationRecognizerHasStarted; Debug.Assert(dictationRecognizer.Status == SpeechSystemStatus.Running); if (dictationRecognizer.Status == SpeechSystemStatus.Failed) { Service.RaiseDictationError(inputSource, "Dictation recognizer failed to start!"); return; } // Start recording from the microphone. dictationAudioClip = Microphone.Start(deviceName, false, recordingTime, samplingRate); textSoFar = new StringBuilder(); isTransitioning = false; #else await Task.CompletedTask; #endif } } private static readonly ProfilerMarker StopRecordingAsyncPerfMarker = new ProfilerMarker("[MRTK] WindowsDictationInputProvider.StopRecordingAsync"); /// public async Task StopRecordingAsync() { using (StopRecordingAsyncPerfMarker.Auto()) { #if UNITY_STANDALONE_WIN || UNITY_WSA || UNITY_EDITOR_WIN if (!IsListening || isTransitioning || !Application.isPlaying) { Debug.LogWarning("Unable to stop recording"); return null; } IsListening = false; isTransitioning = true; if (hasListener) { Service?.PopModalInputHandler(); hasListener = false; } Microphone.End(deviceName); if (dictationRecognizer.Status == SpeechSystemStatus.Running) { dictationRecognizer.Stop(); } await waitUntilDictationRecognizerHasStopped; Debug.Assert(dictationRecognizer.Status == SpeechSystemStatus.Stopped); PhraseRecognitionSystem.Restart(); await waitUntilPhraseRecognitionSystemHasStarted; Debug.Assert(PhraseRecognitionSystem.Status == SpeechSystemStatus.Running); isTransitioning = false; return dictationAudioClip; #else await Task.CompletedTask; return null; #endif } } #if UNITY_STANDALONE_WIN || UNITY_WSA || UNITY_EDITOR_WIN private bool hasFailed; private bool hasListener; private bool isTransitioning; private IMixedRealityInputSource inputSource = null; /// /// Caches the text currently being displayed in dictation display text. /// private StringBuilder textSoFar; private string deviceName = string.Empty; /// /// The device audio sampling rate. /// /// Set by UnityEngine.Microphone. private int samplingRate; /// /// String result of the current dictation. /// private string dictationResult; /// /// Audio clip of the last dictation session. /// private AudioClip dictationAudioClip; private static DictationRecognizer dictationRecognizer; private readonly WaitUntil waitUntilPhraseRecognitionSystemHasStarted = new WaitUntil(() => PhraseRecognitionSystem.Status != SpeechSystemStatus.Stopped); private readonly WaitUntil waitUntilPhraseRecognitionSystemHasStopped = new WaitUntil(() => PhraseRecognitionSystem.Status != SpeechSystemStatus.Running); private readonly WaitUntil waitUntilDictationRecognizerHasStarted = new WaitUntil(() => dictationRecognizer.Status != SpeechSystemStatus.Stopped); private readonly WaitUntil waitUntilDictationRecognizerHasStopped = new WaitUntil(() => dictationRecognizer.Status != SpeechSystemStatus.Running); #if UNITY_EDITOR && UNITY_WSA /// public override void Initialize() { Toolkit.Utilities.Editor.UWPCapabilityUtility.RequireCapability( UnityEditor.PlayerSettings.WSACapability.InternetClient, this.GetType()); Toolkit.Utilities.Editor.UWPCapabilityUtility.RequireCapability( UnityEditor.PlayerSettings.WSACapability.Microphone, this.GetType()); } #endif /// public override void Enable() { if (!Application.isPlaying) { return; } if (Service == null) { Debug.LogError($"Unable to start {Name}. An Input System is required for this feature."); return; } inputSource = Service.RequestNewGenericInputSource(Name, sourceType: InputSourceType.Voice); dictationResult = string.Empty; if (dictationRecognizer == null && InputSystemProfile.SpeechCommandsProfile.SpeechRecognizerStartBehavior == AutoStartBehavior.AutoStart) { InitializeDictationRecognizer(); } // Call the base here to ensure any early exits do not // artificially declare the service as enabled. base.Enable(); } private void InitializeDictationRecognizer() { try { if (dictationRecognizer == null) { dictationRecognizer = new DictationRecognizer(); dictationRecognizer.DictationHypothesis += DictationRecognizer_DictationHypothesis; dictationRecognizer.DictationResult += DictationRecognizer_DictationResult; dictationRecognizer.DictationComplete += DictationRecognizer_DictationComplete; dictationRecognizer.DictationError += DictationRecognizer_DictationError; } } catch (System.Exception ex) { // Don't log if the application is currently running in batch mode (for example, when running tests). This failure is expected in this case. if (!Application.isBatchMode) { Debug.LogWarning($"Failed to start dictation recognizer. Are microphone permissions granted? Exception: {ex}"); } Disable(); dictationRecognizer = null; } } private static readonly ProfilerMarker UpdatePerfMarker = new ProfilerMarker("[MRTK] WindowsDictationInputProvider.Update"); /// public override void Update() { using (UpdatePerfMarker.Auto()) { if (!Application.isPlaying || Service == null || dictationRecognizer == null) { return; } base.Update(); if (!isTransitioning && IsListening && !Microphone.IsRecording(deviceName) && dictationRecognizer.Status == SpeechSystemStatus.Running) { // If the microphone stops as a result of timing out, make sure to manually stop the dictation recognizer. StopRecording(); } if (!hasFailed && dictationRecognizer.Status == SpeechSystemStatus.Failed) { hasFailed = true; Service.RaiseDictationError(inputSource, "Dictation recognizer has failed!"); } } } /// public override async void Disable() { if (Application.isPlaying && dictationRecognizer != null) { if (!isTransitioning && IsListening) { await StopRecordingAsync(); } dictationRecognizer.DictationHypothesis -= DictationRecognizer_DictationHypothesis; dictationRecognizer.DictationResult -= DictationRecognizer_DictationResult; dictationRecognizer.DictationComplete -= DictationRecognizer_DictationComplete; dictationRecognizer.DictationError -= DictationRecognizer_DictationError; dictationRecognizer.Dispose(); } base.Disable(); } /// protected override void Dispose(bool disposing) { if (disposing) { dictationRecognizer?.Dispose(); } } private static readonly ProfilerMarker DictationHypothesisPerfMarker = new ProfilerMarker("[MRTK] WindowsDictationInputProvider.DictationRecognizer_DictationHypothesis"); /// /// This event is fired while the user is talking. As the recognizer listens, it provides text of what it's heard so far. /// /// The currently hypothesized recognition. private void DictationRecognizer_DictationHypothesis(string text) { using (DictationHypothesisPerfMarker.Auto()) { // We don't want to append to textSoFar yet, because the hypothesis may have changed on the next event. dictationResult = $"{textSoFar} {text}..."; Service?.RaiseDictationHypothesis(inputSource, dictationResult); } } private static readonly ProfilerMarker DictationResultPerfMarker = new ProfilerMarker("[MRTK] WindowsDictationInputProvider.DictationRecognizer_DictationResult"); /// /// This event is fired after the user pauses, typically at the end of a sentence. The full recognized string is returned here. /// /// The text that was heard by the recognizer. /// A representation of how confident (rejected, low, medium, high) the recognizer is of this recognition. private void DictationRecognizer_DictationResult(string text, ConfidenceLevel confidence) { using (DictationResultPerfMarker.Auto()) { textSoFar.Append($"{text}. "); dictationResult = textSoFar.ToString(); Service?.RaiseDictationResult(inputSource, dictationResult); } } private static readonly ProfilerMarker DictationCompletePerfMarker = new ProfilerMarker("[MRTK] WindowsDictationInputProvider.DictationRecognizer_DictationComplete"); /// /// This event is fired when the recognizer stops, whether from StartRecording() being called, a timeout occurring, or some other error. /// Typically, this will simply return "Complete". In this case, we check to see if the recognizer timed out. /// /// An enumerated reason for the session completing. private void DictationRecognizer_DictationComplete(DictationCompletionCause cause) { using (DictationCompletePerfMarker.Auto()) { // If Timeout occurs, the user has been silent for too long. if (cause == DictationCompletionCause.TimeoutExceeded) { Microphone.End(deviceName); dictationResult = "Dictation has timed out. Please try again."; } Service?.RaiseDictationComplete(inputSource, dictationResult, dictationAudioClip); textSoFar = null; dictationResult = string.Empty; } } private static readonly ProfilerMarker DictationErrorPerfMarker = new ProfilerMarker("[MRTK] WindowsDictationInputProvider.DictationRecognizer_DictationError"); /// /// This event is fired when an error occurs. /// /// The string representation of the error reason. /// The int representation of the hresult. private void DictationRecognizer_DictationError(string error, int hresult) { using (DictationErrorPerfMarker.Auto()) { dictationResult = $"{error}\nHRESULT: {hresult}"; Service?.RaiseDictationError(inputSource, dictationResult); textSoFar = null; dictationResult = string.Empty; } } #endif // UNITY_STANDALONE_WIN || UNITY_WSA || UNITY_EDITOR_WIN /// public AudioClip AudioClip { get { #if UNITY_STANDALONE_WIN || UNITY_WSA || UNITY_EDITOR_WIN return dictationAudioClip; #else return null; #endif } } } }