Text2Speech agent#

 1# You may need to add your working directory to the Python path. To do so, uncomment the following lines of code
 2# import sys
 3# sys.path.append("/Path/to/directory/agentic-framework") # Replace with your directory path
 4
 5# Besser Agentic Framework Hugging Face Text-to-Speech example agent
 6
 7# imports
 8import logging
 9import base64
10
11from baf.core.agent import Agent
12from baf.core.session import Session
13from baf.exceptions.logger import logger
14
15from baf.nlp.text2speech.hf_text2speech import HFText2Speech
16
17from baf.core.file import File
18from baf.library.transition.events.base_events import ReceiveFileEvent
19
20
21# Configure the logging module (optional)
22logger.setLevel(logging.INFO)
23
24# Create the agent
25agent = Agent('Huggingface Text-to-Speech Agent')
26
27# Load agent properties stored in a dedicated file
28agent.load_properties('config.yaml')
29
30# example models
31# 'facebook/mms-tts-eng'
32# 'microsoft/speecht5_tts'
33# 'suno/bark-small'
34
35# Define the platform your agent will use
36websocket_platform = agent.use_websocket_platform(use_ui=True)
37
38tts = HFText2Speech(agent=agent, model_name="facebook/mms-tts-eng")
39
40# States
41initial_state = agent.new_state('initial_state', initial=True)
42tts_state = agent.new_state('tts_state')  # for messages
43tts_file_state = agent.new_state('tts_file_state')  # for text files uploaded through the UI
44
45# STATES BODIES' DEFINITION + TRANSITIONS
46
47def initial_body(session: Session):
48    session.reply('Hi')
49
50
51initial_state.set_body(initial_body)
52initial_state.when_file_received(allowed_types="text/plain").go_to(tts_file_state)  # Only Allow text files
53initial_state.when_no_intent_matched().go_to(tts_state)
54
55
56def tts_body(session: Session):
57    websocket_platform.reply_speech(session, session.event.message)
58
59
60tts_state.set_body(tts_body)
61tts_state.go_to(initial_state)
62
63# Execute when a file is received
64def tts_file_body(session: Session):
65    event: ReceiveFileEvent = session.event
66    file: File = event.file
67
68    # convert file to byte representation
69    base64_content = file._base64
70    # Decode the base64 string into text
71    file_text = base64.b64decode(base64_content).decode('utf-8')
72
73    # call HF Speech2Text and get transcription
74    session.reply(file_text)
75    websocket_platform.reply_speech(session, file_text)
76
77
78tts_file_state.set_body(tts_file_body)
79tts_file_state.go_to(initial_state)
80
81
82if __name__ == '__main__':
83    agent.run()