generated from deepgram/oss-repo-template
-
Notifications
You must be signed in to change notification settings - Fork 81
/
Copy pathmain.py
148 lines (118 loc) · 5.19 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# Copyright 2023-2024 Deepgram SDK contributors. All Rights Reserved.
# Use of this source code is governed by a MIT license that can be found in the LICENSE file.
# SPDX-License-Identifier: MIT
from dotenv import load_dotenv
from time import sleep
import logging
from deepgram.utils import verboselogs
from deepgram import (
DeepgramClient,
DeepgramClientOptions,
LiveTranscriptionEvents,
LiveOptions,
Microphone,
)
load_dotenv()
# We will collect the is_final=true messages here so we can use them when the person finishes speaking
is_finals = []
def main():
try:
# example of setting up a client config. logging values: WARNING, VERBOSE, DEBUG, SPAM
# config = DeepgramClientOptions(
# verbose=verboselogs.DEBUG, options={"keepalive": "true"}
# )
# deepgram: DeepgramClient = DeepgramClient("", config)
# otherwise, use default config
deepgram: DeepgramClient = DeepgramClient()
dg_connection = deepgram.listen.websocket.v("1")
def on_open(self, open, **kwargs):
print("Connection Open")
def on_message(self, result, **kwargs):
global is_finals
sentence = result.channel.alternatives[0].transcript
if len(sentence) == 0:
return
if result.is_final:
print(f"Message: {result.to_json()}")
# We need to collect these and concatenate them together when we get a speech_final=true
# See docs: https://developers.deepgram.com/docs/understand-endpointing-interim-results
is_finals.append(sentence)
# Speech Final means we have detected sufficient silence to consider this end of speech
# Speech final is the lowest latency result as it triggers as soon an the endpointing value has triggered
if result.speech_final:
utterance = " ".join(is_finals)
print(f"Speech Final: {utterance}")
is_finals = []
else:
# These are useful if you need real time captioning and update what the Interim Results produced
print(f"Is Final: {sentence}")
else:
# These are useful if you need real time captioning of what is being spoken
print(f"Interim Results: {sentence}")
def on_metadata(self, metadata, **kwargs):
print(f"Metadata: {metadata}")
def on_speech_started(self, speech_started, **kwargs):
print("Speech Started")
def on_utterance_end(self, utterance_end, **kwargs):
print("Utterance End")
global is_finals
if len(is_finals) > 0:
utterance = " ".join(is_finals)
print(f"Utterance End: {utterance}")
is_finals = []
def on_close(self, close, **kwargs):
print("Connection Closed")
def on_error(self, error, **kwargs):
print(f"Handled Error: {error}")
def on_unhandled(self, unhandled, **kwargs):
print(f"Unhandled Websocket Message: {unhandled}")
dg_connection.on(LiveTranscriptionEvents.Open, on_open)
dg_connection.on(LiveTranscriptionEvents.Transcript, on_message)
dg_connection.on(LiveTranscriptionEvents.Metadata, on_metadata)
dg_connection.on(LiveTranscriptionEvents.SpeechStarted, on_speech_started)
dg_connection.on(LiveTranscriptionEvents.UtteranceEnd, on_utterance_end)
dg_connection.on(LiveTranscriptionEvents.Close, on_close)
dg_connection.on(LiveTranscriptionEvents.Error, on_error)
dg_connection.on(LiveTranscriptionEvents.Unhandled, on_unhandled)
options: LiveOptions = LiveOptions(
model="nova-3",
language="en-US",
# Apply smart formatting to the output
smart_format=True,
# Raw audio format details
encoding="linear16",
channels=1,
sample_rate=16000,
# To get UtteranceEnd, the following must be set:
interim_results=True,
utterance_end_ms="1000",
vad_events=True,
# Time in milliseconds of silence to wait for before finalizing speech
endpointing=300,
)
addons = {
# Prevent waiting for additional numbers
"no_delay": "true"
}
print("\n\nPress Enter to stop recording...\n\n")
if dg_connection.start(options, addons=addons) is False:
print("Failed to connect to Deepgram")
return
# Open a microphone stream on the default input device
microphone = Microphone(dg_connection.send)
# start microphone
microphone.start()
# wait until finished
input("")
# Wait for the microphone to close
microphone.finish()
# Indicate that we've finished
dg_connection.finish()
print("Finished")
# sleep(30) # wait 30 seconds to see if there is any additional socket activity
# print("Really done!")
except Exception as e:
print(f"Could not open socket: {e}")
return
if __name__ == "__main__":
main()