-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathkeyword_detection.py
61 lines (47 loc) · 1.94 KB
/
keyword_detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import logging
from pathlib import Path
from typing import AsyncIterable, Optional
from dotenv import load_dotenv
from livekit import rtc
from livekit.agents import JobContext, WorkerOptions, cli
from livekit.agents.voice import Agent, AgentSession
from livekit.plugins import openai, deepgram, silero
load_dotenv(dotenv_path=Path(__file__).parent.parent / '.env')
logger = logging.getLogger("listen-and-respond")
logger.setLevel(logging.INFO)
class SimpleAgent(Agent):
def __init__(self) -> None:
super().__init__(
instructions="""
You are a helpful agent.
""",
stt=deepgram.STT(),
llm=openai.LLM(),
tts=openai.TTS(),
vad=silero.VAD.load()
)
async def on_enter(self):
self.session.generate_reply()
async def stt_node(self, text: AsyncIterable[str], model_settings: Optional[dict] = None) -> Optional[AsyncIterable[rtc.AudioFrame]]:
keywords = ["Shane", "hello", "thanks"]
parent_stream = super().stt_node(text, model_settings)
if parent_stream is None:
return None
async def process_stream():
async for event in parent_stream:
if hasattr(event, 'type') and str(event.type) == "SpeechEventType.FINAL_TRANSCRIPT" and event.alternatives:
transcript = event.alternatives[0].text
for keyword in keywords:
if keyword.lower() in transcript.lower():
logger.info(f"Keyword detected: '{keyword}'")
yield event
return process_stream()
async def entrypoint(ctx: JobContext):
await ctx.connect()
session = AgentSession()
await session.start(
agent=SimpleAgent(),
room=ctx.room
)
if __name__ == "__main__":
cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))