Commit 76c3fe58 authored by Daniel Engelsons's avatar Daniel Engelsons
Browse files

Untested attempt to dialogflow

parent 3eca7e12
......@@ -14,54 +14,95 @@ import queue
from google.oauth2 import service_account
import os
import time
from threading import Thread
#import scipy.io.wavfile as wavf
#import pyaudio
#import wave
#import sys
#import threading
'''
def play():
CHUNK = 1024
wf = wave.open("out.wav", 'rb')
p = pyaudio.PyAudio()
print(wf.getsampwidth())
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True)
data = wf.readframes(CHUNK)
while data != '':
stream.write(data)
data = wf.readframes(CHUNK)
stream.stop_stream()
stream.close()
p.terminate()
'''
class DialogFlowCommunicator:
def __init__(self, audio_buffer_node):
self.configure_dialogflow()
self.audio_buffer = audio_buffer_node
"""
Set configuration of the audio that will be sent to DialogFlow (e.g. language, sample_rate).
"""
def configure_dialogflow(self):
audio_encoding = dialogflow.enums.AudioEncoding.AUDIO_ENCODING_LINEAR_16 # uncompressed 16-bit signed little-endian samples
sample_rate_hertz = 16000
google_application_credentials = "/home/daniel/liu-home-wreckers/src/lhw_nlp/lhw_nlp/cred.json" #rospy.get_param('~google_application_credentials') # Samla ihop alla get_param() till en funktion så de är samlade. Enklare att debugga när man ser alla parametrar som en ros nod använder.
#google_application_credentials = "/workspace/liu-home-wreckers/src/lhw_nlp/lhw_nlp/cred.json"
self.default_language = "en"
####
credentials = service_account.Credentials.from_service_account_file(google_application_credentials)
project_id = credentials.project_id
print("AGENT'S PROJECT ID: " + str(project_id))
session_id = "some_session" # Id of the session, using the same `session_id` between requests allows continuation of the conversation
self.session_client = dialogflow.SessionsClient(credentials=credentials)
self.session_path = self.session_client.session_path(project_id, session_id)
self.audio_config = dialogflow.types.InputAudioConfig( audio_encoding=audio_encoding,
language_code=self.default_language,
sample_rate_hertz=sample_rate_hertz)
class DialogFlowCommunicator:
def __init__(self):
def request_generator(self):
query_input = dialogflow.types.QueryInput(audio_config=self.audio_config)
# The first request contains the configuration.
yield dialogflow.types.StreamingDetectIntentRequest(
session=self.session_path, query_input=query_input
)
print("init")
# Here we are reading small chunks of audio data from a local
# audio file. In practice these chunks should come from
# an audio input device.
with self.audio_buffer as audio_buffer:
print("inside buffer")
while True:
print("new loop")
chunk = audio_buffer.read()
print("read done")
if not chunk:
print("no chunk")
break
print("send chunk")
# The later requests contains audio data.
yield dialogflow.types.StreamingDetectIntentRequest(input_audio=chunk)
class AudioQueue(Node):
def detect_intent_stream(self):
"""Returns the result of detect intent with streaming audio as input.
Using the same `session_id` between requests allows continuation
of the conversation."""
while True:
requests = self.request_generator()
responses = self.session_client.streaming_detect_intent(requests=requests)
print("=" * 20)
for response in responses:
print(
'Intermediate transcript: "{}".'.format(
response.recognition_result.transcript.encode('utf-8')
)
)
query_result = response.query_result
if query_result.query_text != "":
# Note: The result from the last response is the final transcript along
# with the detected content.
print("=" * 20)
print("Query text: {}".format(query_result.query_text))
print(
"Detected intent: {} (confidence: {})\n".format(
query_result.intent.display_name, query_result.intent_detection_confidence
)
)
print("Fulfillment text: {}\n".format(query_result.fulfillment_text))
class AudioBuffer(Node):
def __init__(self):
#play()
super().__init__('dialogflow')
self.audio_chunk_queue = queue.Queue() # Buffer where callback stores audio samples coming from audio_topic. Audio chunks are later removed from it and sent to Dialogflow
self.max_queue_size = 100*10000
self.audio_chunk_queue = queue.Queue()
self.buffer_sub = self.create_subscription(Audio, 'microphone_buffer', self.audio_cb, 10)
self.text_sub = self.create_subscription(String, 'text', self.text_cb, 10)
......@@ -69,144 +110,27 @@ class AudioQueue(Node):
self.response_pub = self.create_publisher(Response, 'response', 10)
self.log = self.get_logger()
self.log.warn(self.__class__.__name__+" has not yet been implemented.") # TODO
self.configure_dialogflow()
self.closed = True
#self.thread = threading.Thread(target=rclpy.spin, args=(self,), daemon=True)
#self.thread.start()
#rclpy.spin(self)
#while rclpy.ok():
# if len(self.audio_chunk_queue) > 0:
# self.detect_intent_stream()
#self.stream_timer = self.create_timer(0.17, self.detect_intent_stream)
#self.text_cb("potato")
#self.x = 0
#fs=48000
#pygame.mixer.pre_init(fs, -16, 1, 8192)
#pygame.mixer.init()
def __enter__(self):
print("Closed = False")
self.closed = False
return self
def __exit__(self, type, value, traceback):
print("closed = True")
self.closed = True
# Signal the generator to terminate so that the client's
# streaming_recognize method will not block the process termination.
self.audio_chunk_queue.put(None)
#self.audio_chunk_queue = queue.Queue()
"""
Continuosly stream audio chunks from audio_chunk_queue to Dialogflow. Request generator.
"""
def audio_stream_request_generator(self):
# ----------- Trigg intents with audio (InputAudioConfig)
query_input = dialogflow.types.QueryInput(audio_config=self.audio_config) # input specification, it instructs the speech recognizer how to process the speech audio
#print("hejehejej")
# The first request sends up the configuration of the speech recognition. This request doesn't contain input audio.
yield dialogflow.types.StreamingDetectIntentRequest(session=self.session_path,
query_input=query_input,
single_utterance = True)
self.log.info(str("in generatr"))
while not self.closed:
#print("pop sound")
#self.log.info(str("asdasd"))
rclpy.spin_some(self)
#if self.audio_chunk_queue.empty():
self.log.info(str(self.audio_chunk_queue.qsize()))
chunk = self.audio_chunk_queue.get_nowait()
self.log.info(str(chunk is None))
#chunk = self.audio_chunk_queue.get()
#self.log.info(str("after loop"))
if chunk is None:
self.log.info("Chunk = None, return")
return
yield dialogflow.types.StreamingDetectIntentRequest(input_audio=chunk)
self.log.info(str("out of loop"))
return
""" Call request generator function and get intent responses from DialogFlow. """
def detect_intent_stream(self):
while rclpy.ok():
self.log.info(str("new open"))
with self as stream:
rclpy.spin_once(self)
print("1")
requests = stream.audio_stream_request_generator()
print("2")
responses = stream.session_client.streaming_detect_intent(requests)
print("3")
last_response = False
try:
for response in responses:
#print("Response size", len(response.recognition_result.transcript))
self.log.info('Intermediate transcipt: "{}".'.format(response.recognition_result.transcript.encode('utf-8')))
last_response = response
except:
self.log.error("Couldn't print responses.")
return
if last_response:
# Note: The result from the last response is the final transcript along with the detected content.
query_result = last_response.query_result
#self.audio_chunk_queue.put(None)
#rospy.loginfo("Pepper going to say: {}".format(query_result.fulfillment_text.encode('utf-8')))
self.log.info('Pepper going to say: ' + str(query_result.fulfillment_text.encode('utf-8')))
if query_result.query_text == "":
self.log.info("Empty query_text")
#return
#self.publish_response(query_result) # ROS Publish information
#self.say_action_client(query_result) # TEXT-TO-SPEECH ROS Action Client
def read(self):
if self.audio_chunk_queue.qsize() == 0:
return False
"""
Set configuration of the audio that will be sent to DialogFlow (e.g. language, sample_rate).
"""
def configure_dialogflow(self):
audio_encoding = dialogflow.enums.AudioEncoding.AUDIO_ENCODING_LINEAR_16 # uncompressed 16-bit signed little-endian samples
sample_rate_hertz = 16000
#####
#print(os.getcwd())
# TODO - lägg till från launch file
#google_application_credentials = "/home/daniel/liu-home-wreckers/src/lhw_nlp/lhw_nlp/cred.json" #rospy.get_param('~google_application_credentials') # Samla ihop alla get_param() till en funktion så de är samlade. Enklare att debugga när man ser alla parametrar som en ros nod använder.
google_application_credentials = "/workspace/liu-home-wreckers/src/lhw_nlp/lhw_nlp/cred.json"
self.default_language = "en"
####
return self.audio_chunk_queue.get_nowait()
credentials = service_account.Credentials.from_service_account_file(google_application_credentials)
project_id = credentials.project_id # TODO: Rensa bort alla onödiga self variabler. fixat
self.log.info("AGENT'S PROJECT ID: " + str(project_id))
session_id = "some_session" # Id of the session, using the same `session_id` between requests allows continuation of the conversation
self.session_client = dialogflow.SessionsClient(credentials=credentials)
self.session_path = self.session_client.session_path(project_id, session_id)
self.audio_config = dialogflow.types.InputAudioConfig( audio_encoding=audio_encoding,
language_code=self.default_language,
sample_rate_hertz=sample_rate_hertz)
""" Publishes all necessary information from Dialogflow to a single topic. """
""" Publishes all necessary information from Dialogflow to a single topic. """
def publish_response(self, query_result):
dialogflow_response_msg = DialogflowResponse()
# Talsyntes, tablet:
......@@ -249,38 +173,7 @@ class AudioQueue(Node):
def audio_cb(self, audio_msg):
#print("Got!")
#self.log.info(audio_msg.data)
#self.log.error("Text callback in Dialogflow has not yet been implemented.") # TODO
if True:
#print(str(audio_msg.data.itemsize))
#print(str(audio_msg.data))
#print(len(str(audio_msg.data)))
#sound_data_interlaced = np.fromstring(str(audio_msg.data), dtype=np.int16)
#print(sound_data_interlaced.shape)
#sound_data = np.reshape(sound_data_interlaced, (4, 48000), 'F')
#print(sound_data.shape)
#self.audio_chunk_queue.append(sound_data[0].tostring())
#print(sound_data[0].tostring())
#self.f = open("demofile2.txt", "a")
#for bit in audio_msg.data:
# self.f.write(str(bit))
#self.f.write("\n")
#self.f.close()
#self.log.info(str(audio_msg.data))
#print(bytearray(audio_msg.data.encode()))
#print("\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n")
#print(str(bytearray(audio_msg.data)))
#print(type(str(bytearray(audio_msg.data))))
# HÄÄR!!!
#print("put sound")
#print(audio_msg.data.tobytes())
self.audio_chunk_queue.put(audio_msg.data.tobytes())
else:
self.log.warn("Audio chunk queue is full, flushing!")
#print(self.audio_chunk_queue)
self.audio_chunk_queue = queue.Queue()
self.audio_chunk_queue.put(audio_msg.data.tobytes())
'''
msg = Response()
......@@ -328,18 +221,89 @@ class AudioQueue(Node):
msg.parameters.append(Parameter(key="location", value="Kitchen"))
self.response_pub.publish(msg)
"""
def audio_stream_request_generator(self):
# ----------- Trigg intents with audio (InputAudioConfig)
query_input = dialogflow.types.QueryInput(audio_config=self.audio_config) # input specification, it instructs the speech recognizer how to process the speech audio
#print("hejehejej")
# The first request sends up the configuration of the speech recognition. This request doesn't contain input audio.
yield dialogflow.types.StreamingDetectIntentRequest(session=self.session_path,
query_input=query_input,
single_utterance = True)
self.log.info(str("in generatr"))
while not self.closed:
#print("pop sound")
#self.log.info(str("asdasd"))
rclpy.spin_some(self)
#if self.audio_chunk_queue.empty():
self.log.info(str(self.audio_chunk_queue.qsize()))
chunk = self.audio_chunk_queue.get_nowait()
self.log.info(str(chunk is None))
#chunk = self.audio_chunk_queue.get()
#self.log.info(str("after loop"))
if chunk is None:
self.log.info("Chunk = None, return")
return
yield dialogflow.types.StreamingDetectIntentRequest(input_audio=chunk)
self.log.info(str("out of loop"))
return
def detect_intent_stream(self):
while rclpy.ok():
self.log.info(str("new open"))
with self as stream:
rclpy.spin_once(self)
print("1")
requests = stream.audio_stream_request_generator()
print("2")
responses = stream.session_client.streaming_detect_intent(requests)
print("3")
last_response = False
try:
for response in responses:
#print("Response size", len(response.recognition_result.transcript))
self.log.info('Intermediate transcipt: "{}".'.format(response.recognition_result.transcript.encode('utf-8')))
last_response = response
except:
self.log.error("Couldn't print responses.")
return
if last_response:
# Note: The result from the last response is the final transcript along with the detected content.
query_result = last_response.query_result
#rospy.loginfo("Pepper going to say: {}".format(query_result.fulfillment_text.encode('utf-8')))
self.log.info('Pepper going to say: ' + str(query_result.fulfillment_text.encode('utf-8')))
if query_result.query_text == "":
self.log.info("Empty query_text")
#return
#self.publish_response(query_result) # ROS Publish information
#self.say_action_client(query_result) # TEXT-TO-SPEECH ROS Action Client
"""
def main(args=None):
rclpy.init(args=args)
dialogflow_node = DialogFlowCommunicator()
dialogflow_node.detect_intent_stream()
#rclpy.spin(dialogflow_node)
#thread = threading.Thread(target=rclpy.spin, args=(dialogflow_node,), daemon=True)
#thread.start()
#dialogflow_node.thread.join()
#dialogflow_node.file.close()
dialogflow_node.destroy_node()
audio_buffer_node = AudioBuffer()
dialogflow_communicator = DialogFlowCommunicator(audio_buffer_node)
process = Thread(target=dialogflow_communicator.detect_intent_stream)
process.start()
rclpy.spin(audio_buffer_node)
audio_buffer_node.destroy_node()
rclpy.shutdown()
#thread.join()
process.join()
if __name__ == '__main__':
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment