Welcome!
This is the community forum for my apps Pythonista and Editorial.
For individual support questions, you can also send an email. If you have a very short question or just want to say hello — I'm @olemoritz on Twitter.
implementing live voice commands?
-
-
2nd part and really enough for today
AVAudioEngine = ObjCClass('AVAudioEngine').alloc().init() AVAudioSession = ObjCClass('AVAudioSession') AVAudioRecorder = ObjCClass('AVAudioRecorder') shared_session = AVAudioSession.sharedInstance() category_set = shared_session.setCategory_mode_options_error_(ns('AVAudioSessionCategoryRecord'), ns('AVAudioSessionModeMeasurement'),ns('AVAudioSession.CategoryOptionsDuckOthers'),None) setActiveOptions = 0 # notifyOthersOnDeactivation shared_session.setActive_withOptions_error_(True,setActiveOptions,None) inputNode = AVAudioEngine.inputNode() # Configure the microphone input. recordingFormat = inputNode.outputFormatForBus_(0) def handler(_cmd,obj1_ptr,obj2_ptr): # param1 = AVAudioPCMBuffer # The buffer parameter is a buffer of audio captured # from the output of an AVAudioNode. # param2 = AVAudioTime # The when parameter is the time the buffer was captured if obj1_ptr: obj1 = ObjCInstance(obj1_ptr) #self.recognitionRequest?.append(buffer) handler_block = ObjCBlock(handler, restype=None, argtypes=[c_void_p, c_void_p, c_void_p]) inputNode.installTapOnBus_bufferSize_format_block_(0,1024,recordingFormat, handler_block) AVAudioEngine.prepare() err_ptr = c_void_p() AVAudioEngine.startAndReturnError_(byref(err_ptr)) if err_ptr: err = ObjCInstance(err) print(err) # Create and configure the speech recognition request. recognitionRequest = ObjCClass('SFSpeechAudioBufferRecognitionRequest').alloc() print(dir(recognitionRequest)) recognitionRequest.setShouldReportPartialResults_(True)
And
Fatal Python error: Bus error
Thread 0x000000016fb67000 (most recent call first):
No error if I comment the line
AVAudioEngine.startAndReturnError_(byref(err_ptr))
-
This post is deleted! -
you had some errors on one of your constants (the audiosession options should have been 0x2 for the duckothers option -- this is a mask, not a string)
here is a minor mod -- i verified the handler gets called, but i dont have speech recogognize to test against
https://gist.github.com/ad17f52c8944993092f537d963ce1963 -
@JonB Thanks, I'll try to continue today...
-
@JonB Really need help now:
- segmentation fault if no underscore before appendAudioPCMBuffer_(obj1)
- segmentation fault in last line not commented
from objc_util import * AVAudioEngine = ObjCClass('AVAudioEngine').alloc().init() AVAudioSession = ObjCClass('AVAudioSession') AVAudioRecorder = ObjCClass('AVAudioRecorder') shared_session = AVAudioSession.sharedInstance() category_set= shared_session.setCategory_withOptions_error_( ns('AVAudioSessionCategoryRecord'), 0x2, #duckothers None) shared_session.setMode_error_(ns('AVAudioSessionModeMeasurement'),None) setActiveOptions = 0# notifyOthersOnDeactivation shared_session.setActive_withOptions_error_(True,setActiveOptions,None) inputNode = AVAudioEngine.inputNode() # Configure the microphone input. recordingFormat = inputNode.outputFormatForBus_(0) # Create and configure the speech recognition request. recognitionRequest = ObjCClass('SFSpeechAudioBufferRecognitionRequest').alloc() print(dir(recognitionRequest)) recognitionRequest.setShouldReportPartialResults_(True) retain_global(recognitionRequest) @on_main_thread def handler_buffer(_cmd,obj1_ptr,obj2_ptr): print('handler_buffer') # param1 = AVAudioPCMBuffer # The buffer parameter is a buffer of audio captured # from the output of an AVAudioNode. # param2 = AVAudioTime # The when parameter is the time the buffer was captured if obj1_ptr: obj1 = ObjCInstance(obj1_ptr) #print(str(obj1._get_objc_classname())) # AVAudioPCMBuffer #print(str(obj1.frameLength())) # 4410 # segmentation in next line if no "_" before appendAudioPCMBuffer recognitionRequest._appendAudioPCMBuffer_(obj1) handler_block_buffer = ObjCBlock(handler_buffer, restype=None, argtypes=[c_void_p, c_void_p, c_void_p]) inputNode.installTapOnBus_bufferSize_format_block_(0,1024,recordingFormat, handler_block_buffer) AVAudioEngine.prepare() err_ptr = c_void_p() AVAudioEngine.startAndReturnError_(byref(err_ptr)) if err_ptr: err = ObjCInstance(err) print(err) @on_main_thread def handler_recognize(_cmd,obj1_ptr,obj2_ptr): print('handler_recognize') # param1 = result # The object containing the partial or final transcriptions # of the audio content. # param2 = error # An error object if a problem occurred. # This parameter is nil if speech recognition was successful. if obj1_ptr: obj1 = ObjCInstance(obj1_ptr) #print(str(obj1)) handler_block_recognize = ObjCBlock(handler_recognize, restype=None, argtypes=[c_void_p, c_void_p, c_void_p]) SFSpeechRecognizer = ObjCClass('SFSpeechRecognizer').alloc().init() recognitionTask = SFSpeechRecognizer.recognitionTaskWithRequest_resultHandler_(recognitionRequest, handler_block_recognize)
-
recognitionRequest = ObjCClass('SFSpeechAudioBufferRecognitionRequest').alloc()
Missing .init()?
By the way, you will want AVAudioEngine.stop() handy.
For instance you might want to create a ui.View with a will_close, so that when you are experimenting, you can just close the view to kill the engine. Anyway you will eventually need to show the recognized words. -
@JonB I know for the stop, ui.view, print recognized etc... but I go forward step by step...
I'll try the .init().
Thanks to follow this project, hoping I don't annoy you too much with my problems...
Perhaps, it could be better that I don't (try to) help other people with topics where I'm not a big specialist 😢 -
@JonB ok with the init(),Thanks(almost for @daltonb 😔)
And 🍾 with this imperfect script but a good start for a future app...
See all attributes of SFSpeechRecognitionResult.from objc_util import * import ui import datetime AVAudioEngine = ObjCClass('AVAudioEngine').alloc().init() AVAudioSession = ObjCClass('AVAudioSession') AVAudioRecorder = ObjCClass('AVAudioRecorder') shared_session = AVAudioSession.sharedInstance() category_set= shared_session.setCategory_withOptions_error_( ns('AVAudioSessionCategoryRecord'), 0x2, #duckothers None) shared_session.setMode_error_(ns('AVAudioSessionModeMeasurement'),None) setActiveOptions = 0# notifyOthersOnDeactivation shared_session.setActive_withOptions_error_(True,setActiveOptions,None) inputNode = AVAudioEngine.inputNode() # Configure the microphone input. recordingFormat = inputNode.outputFormatForBus_(0) # Create and configure the speech recognition request. recognitionRequest = ObjCClass('SFSpeechAudioBufferRecognitionRequest').alloc().init() recognitionRequest.setShouldReportPartialResults_(True) retain_global(recognitionRequest) @on_main_thread def handler_buffer(_cmd,obj1_ptr,obj2_ptr): #print('handler_buffer',datetime.datetime.now()) # param1 = AVAudioPCMBuffer # The buffer parameter is a buffer of audio captured # from the output of an AVAudioNode. # param2 = AVAudioTime # The when parameter is the time the buffer was captured if obj1_ptr: obj1 = ObjCInstance(obj1_ptr) #print(str(obj1._get_objc_classname())) # AVAudioPCMBuffer #print(str(obj1.frameLength())) # 4410 # segmentation in next line if no "_" before appendAudioPCMBuffer recognitionRequest._appendAudioPCMBuffer_(obj1) handler_block_buffer = ObjCBlock(handler_buffer, restype=None, argtypes=[c_void_p, c_void_p, c_void_p]) inputNode.installTapOnBus_bufferSize_format_block_(0,1024,recordingFormat, handler_block_buffer) AVAudioEngine.prepare() err_ptr = c_void_p() AVAudioEngine.startAndReturnError_(byref(err_ptr)) if err_ptr: err = ObjCInstance(err) print(err) #@on_main_thread def handler_recognize(_cmd,obj1_ptr,obj2_ptr): #print('handler_recognize') # param1 = result # The object containing the partial/final transcriptions # of the audio content. # param2 = error # An error object if a problem occurred. # This parameter is nil if speech recognition was successful. if obj1_ptr: obj1 = ObjCInstance(obj1_ptr) # obj1 is a SFSpeechRecognitionResult print(obj1.bestTranscription().formattedString()) handler_block_recognize = ObjCBlock(handler_recognize, restype=None, argtypes=[c_void_p, c_void_p, c_void_p]) SFSpeechRecognizer = ObjCClass('SFSpeechRecognizer').alloc().init() recognitionTask = SFSpeechRecognizer.recognitionTaskWithRequest_resultHandler_(recognitionRequest, handler_block_recognize) mv = ui.View() b = ui.ButtonItem() b.title = 'stop' def b_stop(sender): AVAudioEngine.stop() recognitionRequest.endAudio() b.action = b_stop mv.right_button_items = (b,) mv.present('sheet')
-
<SFTranscription: 0x281a70960>, formattedString=Okay, segments=( "<SFTranscriptionSegment: 0x283fb3900>, substringRange={0, 4}, timestamp=0, duration=2.94, confidence=0, substring=Okay, alternativeSubstrings=(\n), phoneSequence=, ipaPhoneSequence=" )
-
Cool!
So, does it give you a transcript object for each word? Or one for a whole phrase, etc?
-
@JonB Output when I say, with my French accent, "hello Jon B how are you"
Hello Hello John Hello John B Hello John B Hello John be Hello John be Hall Hello John B Hall Hello John B Hall how Hello John B Hall how are you Hello John big hole how are you Hello John big hole how are you
Edit: Oups, I just read this post, I swear I said "hello Jon b how are you" but I agree that my accent has to be very frenchy, sorry
-
Sure that I have to stop other objects than Engine because if I rerun, The script crashes with a bus error.
Added at stop button action
recognitionRequest.endAudio()
-
@cvp, muhaha. You and your cheeky French accent! Now I have to check what happens with a Finnish accent. :-)
-
Awesome! @cvp your code-in-progress was really helpful in getting my pattern recognition engine going.. starting to get the hang of this objc_util thing. Looks like we finished around the same time :) Here's my version which is a pretty item-for-item transcription of https://developer.apple.com/documentation/speech/recognizing_speech_in_live_audio?language=objc. Instantiate
Recognizer()
and then you can callstart()
andstop()
for live updates. It's only safe to instantiate once currently. The lineprint(bestTranscription.formattedString())
is where event handling of the recognition updates should occur.from objc_util import * NSLocale = ObjCClass('NSLocale') SFSpeechRecognizer = ObjCClass('SFSpeechRecognizer') AVAudioEngine = ObjCClass('AVAudioEngine') AVAudioSession = ObjCClass('AVAudioSession') SFSpeechAudioBufferRecognitionRequest = ObjCClass('SFSpeechAudioBufferRecognitionRequest') class Recognizer: def __init__(self): locale = NSLocale.alloc().initWithLocaleIdentifier(ns("en-US")) self.speech_recognizer = SFSpeechRecognizer.alloc().initWithLocale(locale) self.audio_engine = AVAudioEngine.new() self.input_node = self.audio_engine.inputNode() self.recognition_request = None self.recognition_task = None def recognitionTaskWithRequest_resultHandler(block_ptr, result_ptr, error_ptr): # TODO: investigate https://forum.omz-software.com/topic/5232/traceback-using-gestures-module/21 is_final = False if not result_ptr is None: result = ObjCInstance(result_ptr) bestTranscription = result.bestTranscription() print(bestTranscription.formattedString()) is_final = result.isFinal() if not error_ptr is None or is_final: if is_final: print("Speech recognition complete.") if error_ptr is not None: error = ObjCInstance(error_ptr) print("Error in recognition task:", error) self.audio_engine.stop() self.input_node.removeTapOnBus_(0) self.recognition_request = None self.recognition_task = None self.recognitionTaskWithRequest_resultHandler = recognitionTaskWithRequest_resultHandler def installTapOnBus_tapBlock(block_ptr, buffer_ptr, when_ptr): buffer = ObjCInstance(buffer_ptr) when = ObjCInstance(when_ptr) if not self.recognition_request is None: self.recognition_request.appendAudioPCMBuffer_(buffer) self.installTapOnBus_tapBlock = installTapOnBus_tapBlock # https://forum.omz-software.com/topic/5380/initialize-search-field-of-a-uidocumentpickerviewcontroller/11 # must stay in scope when in use self.result_handler = ObjCBlock(self.recognitionTaskWithRequest_resultHandler, restype=None, argtypes=[c_void_p, c_void_p, c_void_p]) self.tap_block = ObjCBlock(self.installTapOnBus_tapBlock, restype=None, argtypes=[c_void_p, c_void_p, c_void_p]) def start(self): if self.recognition_task is not None: print("Speech recognition already active.") return print("Starting speech recognition.") audio_session = AVAudioSession.sharedInstance() audio_session.setCategory_mode_options_error_(ns('AVAudioSessionCategoryRecord'), ns('AVAudioSessionModeMeasurement'), ns('AVAudioSessionCategoryOptionDuckOthers'), None) audio_session.setActive_withOptions_error_(True, ns('AVAudioSessionSetActiveOptionNotifyOthersOnDeactivation'), None) self.recognition_request = SFSpeechAudioBufferRecognitionRequest.new() if self.recognition_request is None: print("Error: could not create recognition request!") return self.recognition_request.shouldReportPartialResults = True self.recognition_task = self.speech_recognizer.recognitionTaskWithRequest_resultHandler_(self.recognition_request, self.result_handler) recording_format = self.input_node.outputFormatForBus_(0) self.input_node.installTapOnBus_bufferSize_format_block_(0, 1024, recording_format, self.tap_block) self.audio_engine.prepare() err_ptr = c_void_p() # https://forum.omz-software.com/topic/3618/querying-returned-nserror/2 self.audio_engine.startAndReturnError_(byref(err_ptr)) if err_ptr: err = ObjCInstance(err_ptr) print("Error in audio engine:", err) def stop(self): if self.audio_engine.isRunning(): self.audio_engine.stop() if self.recognition_request is not None: self.recognition_request.endAudio()
-
-
Boring!
Hello Hello Hello Hello John B. Nielsen Hello John B. Nielsen Hello John B. Nielsen Hello John B. Nielsen