Welcome!
This is the community forum for my apps Pythonista and Editorial.
For individual support questions, you can also send an email. If you have a very short question or just want to say hello β I'm @olemoritz on Twitter.
implementing live voice commands?
-
@daltonb I could try to translate it to Objectivec in Pythonista but not sure of the result...nor the delay π’
-
@cvp that would be awesome.. even if it doesnβt work out Iβd love to see a partial result
-
https://github.com/yao23/iOS_Playground/blob/master/SpeechRecognitionPractice/SpeechRecognitionPractice/ViewController.m
is an objc implementation.The tricky bit obviously is getting those blocks implemented in objc_util
-
-
@daltonb, I am tempted to give it a try, but not this week.
-
-
@cvp, the man is fast! :-D
-
@mikael He, that is not my code π, just found there, I just begin to try to modify it...
-
First part (enough for today)
AVAudioEngine = ObjCClass('AVAudioEngine').alloc().init() AVAudioSession = ObjCClass('AVAudioSession') AVAudioRecorder = ObjCClass('AVAudioRecorder') shared_session = AVAudioSession.sharedInstance() category_set = shared_session.setCategory_mode_options_error_(ns('AVAudioSessionCategoryRecord'), ns('AVAudioSessionModeMeasurement'),ns('AVAudioSession.CategoryOptionsDuckOthers'),None) setActiveOptions = 0 # notifyOthersOnDeactivation shared_session.setActive_withOptions_error_(True,setActiveOptions,None) inputNode = AVAudioEngine.inputNode() ```
-
-
2nd part and really enough for today
AVAudioEngine = ObjCClass('AVAudioEngine').alloc().init() AVAudioSession = ObjCClass('AVAudioSession') AVAudioRecorder = ObjCClass('AVAudioRecorder') shared_session = AVAudioSession.sharedInstance() category_set = shared_session.setCategory_mode_options_error_(ns('AVAudioSessionCategoryRecord'), ns('AVAudioSessionModeMeasurement'),ns('AVAudioSession.CategoryOptionsDuckOthers'),None) setActiveOptions = 0 # notifyOthersOnDeactivation shared_session.setActive_withOptions_error_(True,setActiveOptions,None) inputNode = AVAudioEngine.inputNode() # Configure the microphone input. recordingFormat = inputNode.outputFormatForBus_(0) def handler(_cmd,obj1_ptr,obj2_ptr): # param1 = AVAudioPCMBuffer # The buffer parameter is a buffer of audio captured # from the output of an AVAudioNode. # param2 = AVAudioTime # The when parameter is the time the buffer was captured if obj1_ptr: obj1 = ObjCInstance(obj1_ptr) #self.recognitionRequest?.append(buffer) handler_block = ObjCBlock(handler, restype=None, argtypes=[c_void_p, c_void_p, c_void_p]) inputNode.installTapOnBus_bufferSize_format_block_(0,1024,recordingFormat, handler_block) AVAudioEngine.prepare() err_ptr = c_void_p() AVAudioEngine.startAndReturnError_(byref(err_ptr)) if err_ptr: err = ObjCInstance(err) print(err) # Create and configure the speech recognition request. recognitionRequest = ObjCClass('SFSpeechAudioBufferRecognitionRequest').alloc() print(dir(recognitionRequest)) recognitionRequest.setShouldReportPartialResults_(True)
And
Fatal Python error: Bus error
Thread 0x000000016fb67000 (most recent call first):
No error if I comment the line
AVAudioEngine.startAndReturnError_(byref(err_ptr))
-
This post is deleted! -
you had some errors on one of your constants (the audiosession options should have been 0x2 for the duckothers option -- this is a mask, not a string)
here is a minor mod -- i verified the handler gets called, but i dont have speech recogognize to test against
https://gist.github.com/ad17f52c8944993092f537d963ce1963 -
@JonB Thanks, I'll try to continue today...
-
@JonB Really need help now:
- segmentation fault if no underscore before appendAudioPCMBuffer_(obj1)
- segmentation fault in last line not commented
from objc_util import * AVAudioEngine = ObjCClass('AVAudioEngine').alloc().init() AVAudioSession = ObjCClass('AVAudioSession') AVAudioRecorder = ObjCClass('AVAudioRecorder') shared_session = AVAudioSession.sharedInstance() category_set= shared_session.setCategory_withOptions_error_( ns('AVAudioSessionCategoryRecord'), 0x2, #duckothers None) shared_session.setMode_error_(ns('AVAudioSessionModeMeasurement'),None) setActiveOptions = 0# notifyOthersOnDeactivation shared_session.setActive_withOptions_error_(True,setActiveOptions,None) inputNode = AVAudioEngine.inputNode() # Configure the microphone input. recordingFormat = inputNode.outputFormatForBus_(0) # Create and configure the speech recognition request. recognitionRequest = ObjCClass('SFSpeechAudioBufferRecognitionRequest').alloc() print(dir(recognitionRequest)) recognitionRequest.setShouldReportPartialResults_(True) retain_global(recognitionRequest) @on_main_thread def handler_buffer(_cmd,obj1_ptr,obj2_ptr): print('handler_buffer') # param1 = AVAudioPCMBuffer # The buffer parameter is a buffer of audio captured # from the output of an AVAudioNode. # param2 = AVAudioTime # The when parameter is the time the buffer was captured if obj1_ptr: obj1 = ObjCInstance(obj1_ptr) #print(str(obj1._get_objc_classname())) # AVAudioPCMBuffer #print(str(obj1.frameLength())) # 4410 # segmentation in next line if no "_" before appendAudioPCMBuffer recognitionRequest._appendAudioPCMBuffer_(obj1) handler_block_buffer = ObjCBlock(handler_buffer, restype=None, argtypes=[c_void_p, c_void_p, c_void_p]) inputNode.installTapOnBus_bufferSize_format_block_(0,1024,recordingFormat, handler_block_buffer) AVAudioEngine.prepare() err_ptr = c_void_p() AVAudioEngine.startAndReturnError_(byref(err_ptr)) if err_ptr: err = ObjCInstance(err) print(err) @on_main_thread def handler_recognize(_cmd,obj1_ptr,obj2_ptr): print('handler_recognize') # param1 = result # The object containing the partial or final transcriptions # of the audio content. # param2 = error # An error object if a problem occurred. # This parameter is nil if speech recognition was successful. if obj1_ptr: obj1 = ObjCInstance(obj1_ptr) #print(str(obj1)) handler_block_recognize = ObjCBlock(handler_recognize, restype=None, argtypes=[c_void_p, c_void_p, c_void_p]) SFSpeechRecognizer = ObjCClass('SFSpeechRecognizer').alloc().init() recognitionTask = SFSpeechRecognizer.recognitionTaskWithRequest_resultHandler_(recognitionRequest, handler_block_recognize)
-
recognitionRequest = ObjCClass('SFSpeechAudioBufferRecognitionRequest').alloc()
Missing .init()?
By the way, you will want AVAudioEngine.stop() handy.
For instance you might want to create a ui.View with a will_close, so that when you are experimenting, you can just close the view to kill the engine. Anyway you will eventually need to show the recognized words. -
@JonB I know for the stop, ui.view, print recognized etc... but I go forward step by step...
I'll try the .init().
Thanks to follow this project, hoping I don't annoy you too much with my problems...
Perhaps, it could be better that I don't (try to) help other people with topics where I'm not a big specialist π’ -
@JonB ok with the init(),Thanks(almost for @daltonb π)
And πΎ with this imperfect script but a good start for a future app...
See all attributes of SFSpeechRecognitionResult.from objc_util import * import ui import datetime AVAudioEngine = ObjCClass('AVAudioEngine').alloc().init() AVAudioSession = ObjCClass('AVAudioSession') AVAudioRecorder = ObjCClass('AVAudioRecorder') shared_session = AVAudioSession.sharedInstance() category_set= shared_session.setCategory_withOptions_error_( ns('AVAudioSessionCategoryRecord'), 0x2, #duckothers None) shared_session.setMode_error_(ns('AVAudioSessionModeMeasurement'),None) setActiveOptions = 0# notifyOthersOnDeactivation shared_session.setActive_withOptions_error_(True,setActiveOptions,None) inputNode = AVAudioEngine.inputNode() # Configure the microphone input. recordingFormat = inputNode.outputFormatForBus_(0) # Create and configure the speech recognition request. recognitionRequest = ObjCClass('SFSpeechAudioBufferRecognitionRequest').alloc().init() recognitionRequest.setShouldReportPartialResults_(True) retain_global(recognitionRequest) @on_main_thread def handler_buffer(_cmd,obj1_ptr,obj2_ptr): #print('handler_buffer',datetime.datetime.now()) # param1 = AVAudioPCMBuffer # The buffer parameter is a buffer of audio captured # from the output of an AVAudioNode. # param2 = AVAudioTime # The when parameter is the time the buffer was captured if obj1_ptr: obj1 = ObjCInstance(obj1_ptr) #print(str(obj1._get_objc_classname())) # AVAudioPCMBuffer #print(str(obj1.frameLength())) # 4410 # segmentation in next line if no "_" before appendAudioPCMBuffer recognitionRequest._appendAudioPCMBuffer_(obj1) handler_block_buffer = ObjCBlock(handler_buffer, restype=None, argtypes=[c_void_p, c_void_p, c_void_p]) inputNode.installTapOnBus_bufferSize_format_block_(0,1024,recordingFormat, handler_block_buffer) AVAudioEngine.prepare() err_ptr = c_void_p() AVAudioEngine.startAndReturnError_(byref(err_ptr)) if err_ptr: err = ObjCInstance(err) print(err) #@on_main_thread def handler_recognize(_cmd,obj1_ptr,obj2_ptr): #print('handler_recognize') # param1 = result # The object containing the partial/final transcriptions # of the audio content. # param2 = error # An error object if a problem occurred. # This parameter is nil if speech recognition was successful. if obj1_ptr: obj1 = ObjCInstance(obj1_ptr) # obj1 is a SFSpeechRecognitionResult print(obj1.bestTranscription().formattedString()) handler_block_recognize = ObjCBlock(handler_recognize, restype=None, argtypes=[c_void_p, c_void_p, c_void_p]) SFSpeechRecognizer = ObjCClass('SFSpeechRecognizer').alloc().init() recognitionTask = SFSpeechRecognizer.recognitionTaskWithRequest_resultHandler_(recognitionRequest, handler_block_recognize) mv = ui.View() b = ui.ButtonItem() b.title = 'stop' def b_stop(sender): AVAudioEngine.stop() recognitionRequest.endAudio() b.action = b_stop mv.right_button_items = (b,) mv.present('sheet')
-
<SFTranscription: 0x281a70960>, formattedString=Okay, segments=( "<SFTranscriptionSegment: 0x283fb3900>, substringRange={0, 4}, timestamp=0, duration=2.94, confidence=0, substring=Okay, alternativeSubstrings=(\n), phoneSequence=, ipaPhoneSequence=" )
-
Cool!
So, does it give you a transcript object for each word? Or one for a whole phrase, etc?