Skip to content
Snippets Groups Projects
Commit 76c3fe58 authored by Daniel Engelsons's avatar Daniel Engelsons
Browse files

Untested attempt to dialogflow

parent 3eca7e12
No related branches found
No related tags found
No related merge requests found
...@@ -14,54 +14,95 @@ import queue ...@@ -14,54 +14,95 @@ import queue
from google.oauth2 import service_account from google.oauth2 import service_account
import os import os
import time import time
from threading import Thread
#import scipy.io.wavfile as wavf class DialogFlowCommunicator:
def __init__(self, audio_buffer_node):
self.configure_dialogflow()
#import pyaudio self.audio_buffer = audio_buffer_node
#import wave
#import sys """
#import threading Set configuration of the audio that will be sent to DialogFlow (e.g. language, sample_rate).
"""
''' def configure_dialogflow(self):
def play(): audio_encoding = dialogflow.enums.AudioEncoding.AUDIO_ENCODING_LINEAR_16 # uncompressed 16-bit signed little-endian samples
CHUNK = 1024 sample_rate_hertz = 16000
wf = wave.open("out.wav", 'rb') google_application_credentials = "/home/daniel/liu-home-wreckers/src/lhw_nlp/lhw_nlp/cred.json" #rospy.get_param('~google_application_credentials') # Samla ihop alla get_param() till en funktion så de är samlade. Enklare att debugga när man ser alla parametrar som en ros nod använder.
#google_application_credentials = "/workspace/liu-home-wreckers/src/lhw_nlp/lhw_nlp/cred.json"
p = pyaudio.PyAudio() self.default_language = "en"
####
print(wf.getsampwidth())
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True)
data = wf.readframes(CHUNK)
while data != '':
stream.write(data)
data = wf.readframes(CHUNK)
stream.stop_stream()
stream.close()
p.terminate()
'''
credentials = service_account.Credentials.from_service_account_file(google_application_credentials)
project_id = credentials.project_id
print("AGENT'S PROJECT ID: " + str(project_id))
session_id = "some_session" # Id of the session, using the same `session_id` between requests allows continuation of the conversation
self.session_client = dialogflow.SessionsClient(credentials=credentials)
self.session_path = self.session_client.session_path(project_id, session_id)
self.audio_config = dialogflow.types.InputAudioConfig( audio_encoding=audio_encoding,
language_code=self.default_language,
sample_rate_hertz=sample_rate_hertz)
class DialogFlowCommunicator:
def __init__(self):
def request_generator(self):
query_input = dialogflow.types.QueryInput(audio_config=self.audio_config)
# The first request contains the configuration.
yield dialogflow.types.StreamingDetectIntentRequest(
session=self.session_path, query_input=query_input
)
print("init")
# Here we are reading small chunks of audio data from a local
# audio file. In practice these chunks should come from
# an audio input device.
with self.audio_buffer as audio_buffer:
print("inside buffer")
while True:
print("new loop")
chunk = audio_buffer.read()
print("read done")
if not chunk:
print("no chunk")
break
print("send chunk")
# The later requests contains audio data.
yield dialogflow.types.StreamingDetectIntentRequest(input_audio=chunk)
class AudioQueue(Node): def detect_intent_stream(self):
"""Returns the result of detect intent with streaming audio as input.
Using the same `session_id` between requests allows continuation
of the conversation."""
while True:
requests = self.request_generator()
responses = self.session_client.streaming_detect_intent(requests=requests)
print("=" * 20)
for response in responses:
print(
'Intermediate transcript: "{}".'.format(
response.recognition_result.transcript.encode('utf-8')
)
)
query_result = response.query_result
if query_result.query_text != "":
# Note: The result from the last response is the final transcript along
# with the detected content.
print("=" * 20)
print("Query text: {}".format(query_result.query_text))
print(
"Detected intent: {} (confidence: {})\n".format(
query_result.intent.display_name, query_result.intent_detection_confidence
)
)
print("Fulfillment text: {}\n".format(query_result.fulfillment_text))
class AudioBuffer(Node):
def __init__(self): def __init__(self):
#play() #play()
super().__init__('dialogflow') super().__init__('dialogflow')
self.audio_chunk_queue = queue.Queue()
self.audio_chunk_queue = queue.Queue() # Buffer where callback stores audio samples coming from audio_topic. Audio chunks are later removed from it and sent to Dialogflow
self.max_queue_size = 100*10000
self.buffer_sub = self.create_subscription(Audio, 'microphone_buffer', self.audio_cb, 10) self.buffer_sub = self.create_subscription(Audio, 'microphone_buffer', self.audio_cb, 10)
self.text_sub = self.create_subscription(String, 'text', self.text_cb, 10) self.text_sub = self.create_subscription(String, 'text', self.text_cb, 10)
...@@ -69,144 +110,27 @@ class AudioQueue(Node): ...@@ -69,144 +110,27 @@ class AudioQueue(Node):
self.response_pub = self.create_publisher(Response, 'response', 10) self.response_pub = self.create_publisher(Response, 'response', 10)
self.log = self.get_logger() self.log = self.get_logger()
self.log.warn(self.__class__.__name__+" has not yet been implemented.") # TODO self.log.warn(self.__class__.__name__+" has not yet been implemented.") # TODO
self.configure_dialogflow()
self.closed = True
#self.thread = threading.Thread(target=rclpy.spin, args=(self,), daemon=True)
#self.thread.start()
#rclpy.spin(self)
#while rclpy.ok():
# if len(self.audio_chunk_queue) > 0:
# self.detect_intent_stream()
#self.stream_timer = self.create_timer(0.17, self.detect_intent_stream)
#self.text_cb("potato")
#self.x = 0
#fs=48000
#pygame.mixer.pre_init(fs, -16, 1, 8192)
#pygame.mixer.init()
def __enter__(self): def __enter__(self):
print("Closed = False") print("Closed = False")
self.closed = False self.closed = False
return self return self
def __exit__(self, type, value, traceback): def __exit__(self, type, value, traceback):
print("closed = True") print("closed = True")
self.closed = True self.closed = True
# Signal the generator to terminate so that the client's # Signal the generator to terminate so that the client's
# streaming_recognize method will not block the process termination. # streaming_recognize method will not block the process termination.
self.audio_chunk_queue.put(None) #self.audio_chunk_queue.put(None)
#self.audio_chunk_queue = queue.Queue()
"""
Continuosly stream audio chunks from audio_chunk_queue to Dialogflow. Request generator.
"""
def audio_stream_request_generator(self):
# ----------- Trigg intents with audio (InputAudioConfig)
query_input = dialogflow.types.QueryInput(audio_config=self.audio_config) # input specification, it instructs the speech recognizer how to process the speech audio
#print("hejehejej")
# The first request sends up the configuration of the speech recognition. This request doesn't contain input audio.
yield dialogflow.types.StreamingDetectIntentRequest(session=self.session_path,
query_input=query_input,
single_utterance = True)
self.log.info(str("in generatr"))
while not self.closed:
#print("pop sound")
#self.log.info(str("asdasd"))
rclpy.spin_some(self)
#if self.audio_chunk_queue.empty():
self.log.info(str(self.audio_chunk_queue.qsize()))
chunk = self.audio_chunk_queue.get_nowait()
self.log.info(str(chunk is None))
#chunk = self.audio_chunk_queue.get()
#self.log.info(str("after loop"))
if chunk is None:
self.log.info("Chunk = None, return")
return
yield dialogflow.types.StreamingDetectIntentRequest(input_audio=chunk)
self.log.info(str("out of loop"))
return
""" Call request generator function and get intent responses from DialogFlow. """
def detect_intent_stream(self):
while rclpy.ok():
self.log.info(str("new open"))
with self as stream:
rclpy.spin_once(self)
print("1")
requests = stream.audio_stream_request_generator()
print("2")
responses = stream.session_client.streaming_detect_intent(requests)
print("3")
last_response = False
try:
for response in responses:
#print("Response size", len(response.recognition_result.transcript))
self.log.info('Intermediate transcipt: "{}".'.format(response.recognition_result.transcript.encode('utf-8')))
last_response = response
except:
self.log.error("Couldn't print responses.")
return
if last_response:
# Note: The result from the last response is the final transcript along with the detected content.
query_result = last_response.query_result
#rospy.loginfo("Pepper going to say: {}".format(query_result.fulfillment_text.encode('utf-8')))
self.log.info('Pepper going to say: ' + str(query_result.fulfillment_text.encode('utf-8')))
if query_result.query_text == "":
self.log.info("Empty query_text")
#return
#self.publish_response(query_result) # ROS Publish information def read(self):
#self.say_action_client(query_result) # TEXT-TO-SPEECH ROS Action Client if self.audio_chunk_queue.qsize() == 0:
return False
return self.audio_chunk_queue.get_nowait()
"""
Set configuration of the audio that will be sent to DialogFlow (e.g. language, sample_rate).
"""
def configure_dialogflow(self):
audio_encoding = dialogflow.enums.AudioEncoding.AUDIO_ENCODING_LINEAR_16 # uncompressed 16-bit signed little-endian samples
sample_rate_hertz = 16000
#####
#print(os.getcwd())
# TODO - lägg till från launch file
#google_application_credentials = "/home/daniel/liu-home-wreckers/src/lhw_nlp/lhw_nlp/cred.json" #rospy.get_param('~google_application_credentials') # Samla ihop alla get_param() till en funktion så de är samlade. Enklare att debugga när man ser alla parametrar som en ros nod använder.
google_application_credentials = "/workspace/liu-home-wreckers/src/lhw_nlp/lhw_nlp/cred.json"
self.default_language = "en"
####
credentials = service_account.Credentials.from_service_account_file(google_application_credentials) """ Publishes all necessary information from Dialogflow to a single topic. """
project_id = credentials.project_id # TODO: Rensa bort alla onödiga self variabler. fixat
self.log.info("AGENT'S PROJECT ID: " + str(project_id))
session_id = "some_session" # Id of the session, using the same `session_id` between requests allows continuation of the conversation
self.session_client = dialogflow.SessionsClient(credentials=credentials)
self.session_path = self.session_client.session_path(project_id, session_id)
self.audio_config = dialogflow.types.InputAudioConfig( audio_encoding=audio_encoding,
language_code=self.default_language,
sample_rate_hertz=sample_rate_hertz)
""" Publishes all necessary information from Dialogflow to a single topic. """
def publish_response(self, query_result): def publish_response(self, query_result):
dialogflow_response_msg = DialogflowResponse() dialogflow_response_msg = DialogflowResponse()
# Talsyntes, tablet: # Talsyntes, tablet:
...@@ -249,38 +173,7 @@ class AudioQueue(Node): ...@@ -249,38 +173,7 @@ class AudioQueue(Node):
def audio_cb(self, audio_msg): def audio_cb(self, audio_msg):
#print("Got!") self.audio_chunk_queue.put(audio_msg.data.tobytes())
#self.log.info(audio_msg.data)
#self.log.error("Text callback in Dialogflow has not yet been implemented.") # TODO
if True:
#print(str(audio_msg.data.itemsize))
#print(str(audio_msg.data))
#print(len(str(audio_msg.data)))
#sound_data_interlaced = np.fromstring(str(audio_msg.data), dtype=np.int16)
#print(sound_data_interlaced.shape)
#sound_data = np.reshape(sound_data_interlaced, (4, 48000), 'F')
#print(sound_data.shape)
#self.audio_chunk_queue.append(sound_data[0].tostring())
#print(sound_data[0].tostring())
#self.f = open("demofile2.txt", "a")
#for bit in audio_msg.data:
# self.f.write(str(bit))
#self.f.write("\n")
#self.f.close()
#self.log.info(str(audio_msg.data))
#print(bytearray(audio_msg.data.encode()))
#print("\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n")
#print(str(bytearray(audio_msg.data)))
#print(type(str(bytearray(audio_msg.data))))
# HÄÄR!!!
#print("put sound")
#print(audio_msg.data.tobytes())
self.audio_chunk_queue.put(audio_msg.data.tobytes())
else:
self.log.warn("Audio chunk queue is full, flushing!")
#print(self.audio_chunk_queue)
self.audio_chunk_queue = queue.Queue()
''' '''
msg = Response() msg = Response()
...@@ -328,18 +221,89 @@ class AudioQueue(Node): ...@@ -328,18 +221,89 @@ class AudioQueue(Node):
msg.parameters.append(Parameter(key="location", value="Kitchen")) msg.parameters.append(Parameter(key="location", value="Kitchen"))
self.response_pub.publish(msg) self.response_pub.publish(msg)
"""
def audio_stream_request_generator(self):
# ----------- Trigg intents with audio (InputAudioConfig)
query_input = dialogflow.types.QueryInput(audio_config=self.audio_config) # input specification, it instructs the speech recognizer how to process the speech audio
#print("hejehejej")
# The first request sends up the configuration of the speech recognition. This request doesn't contain input audio.
yield dialogflow.types.StreamingDetectIntentRequest(session=self.session_path,
query_input=query_input,
single_utterance = True)
self.log.info(str("in generatr"))
while not self.closed:
#print("pop sound")
#self.log.info(str("asdasd"))
rclpy.spin_some(self)
#if self.audio_chunk_queue.empty():
self.log.info(str(self.audio_chunk_queue.qsize()))
chunk = self.audio_chunk_queue.get_nowait()
self.log.info(str(chunk is None))
#chunk = self.audio_chunk_queue.get()
#self.log.info(str("after loop"))
if chunk is None:
self.log.info("Chunk = None, return")
return
yield dialogflow.types.StreamingDetectIntentRequest(input_audio=chunk)
self.log.info(str("out of loop"))
return
def detect_intent_stream(self):
while rclpy.ok():
self.log.info(str("new open"))
with self as stream:
rclpy.spin_once(self)
print("1")
requests = stream.audio_stream_request_generator()
print("2")
responses = stream.session_client.streaming_detect_intent(requests)
print("3")
last_response = False
try:
for response in responses:
#print("Response size", len(response.recognition_result.transcript))
self.log.info('Intermediate transcipt: "{}".'.format(response.recognition_result.transcript.encode('utf-8')))
last_response = response
except:
self.log.error("Couldn't print responses.")
return
if last_response:
# Note: The result from the last response is the final transcript along with the detected content.
query_result = last_response.query_result
#rospy.loginfo("Pepper going to say: {}".format(query_result.fulfillment_text.encode('utf-8')))
self.log.info('Pepper going to say: ' + str(query_result.fulfillment_text.encode('utf-8')))
if query_result.query_text == "":
self.log.info("Empty query_text")
#return
#self.publish_response(query_result) # ROS Publish information
#self.say_action_client(query_result) # TEXT-TO-SPEECH ROS Action Client
"""
def main(args=None): def main(args=None):
rclpy.init(args=args) rclpy.init(args=args)
dialogflow_node = DialogFlowCommunicator()
dialogflow_node.detect_intent_stream() audio_buffer_node = AudioBuffer()
#rclpy.spin(dialogflow_node) dialogflow_communicator = DialogFlowCommunicator(audio_buffer_node)
#thread = threading.Thread(target=rclpy.spin, args=(dialogflow_node,), daemon=True)
#thread.start() process = Thread(target=dialogflow_communicator.detect_intent_stream)
#dialogflow_node.thread.join() process.start()
#dialogflow_node.file.close() rclpy.spin(audio_buffer_node)
dialogflow_node.destroy_node()
audio_buffer_node.destroy_node()
rclpy.shutdown() rclpy.shutdown()
#thread.join() process.join()
if __name__ == '__main__': if __name__ == '__main__':
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment