Just moving the synthesizer creation outside your loop should do what you want without needing the delegate. The delegate would allow you to pause, figure out what is being spoken at a given time, etc, but isn't needed to prevent overlaps.

from objc_util import * txt = ['こんにちは', '私はSiriです。'] AVSpeechUtterance=ObjCClass('AVSpeechUtterance') AVSpeechSynthesizer=ObjCClass('AVSpeechSynthesizer') AVSpeechSynthesisVoice=ObjCClass('AVSpeechSynthesisVoice') voices=AVSpeechSynthesisVoice.speechVoices() for i in range(0,len(voices)): #print(i,voices[i].language(),voices[i].identifier()) if 'ja-JP' in str(voices[i].identifier()): # if u have Japanese Siri voice, replace from 'ja-JP' to 'siri_O-ren_ja-JP' vi = i break synthesizer=AVSpeechSynthesizer.new() for t in txt: utterance=AVSpeechUtterance.speechUtteranceWithString_(t) utterance.rate = 0.5 utterance.useCompactVoice=False utterance.voice = voices[vi] synthesizer.speakUtterance_(utterance)