diff --git a/src/lhw_nlp/lhw_nlp/dialogflow.py b/src/lhw_nlp/lhw_nlp/dialogflow.py index f1c59fdc6c0b328a9075bb9d9200507a45fadecc..6c3416d5427d8d664ea9f7fd3d97ade69f6c168b 100644 --- a/src/lhw_nlp/lhw_nlp/dialogflow.py +++ b/src/lhw_nlp/lhw_nlp/dialogflow.py @@ -14,54 +14,95 @@ import queue from google.oauth2 import service_account import os import time +from threading import Thread -#import scipy.io.wavfile as wavf - - -#import pyaudio -#import wave -#import sys -#import threading - -''' -def play(): - CHUNK = 1024 - - wf = wave.open("out.wav", 'rb') - - p = pyaudio.PyAudio() - - print(wf.getsampwidth()) - - stream = p.open(format=p.get_format_from_width(wf.getsampwidth()), - channels=wf.getnchannels(), - rate=wf.getframerate(), - output=True) - - data = wf.readframes(CHUNK) - - while data != '': - stream.write(data) - data = wf.readframes(CHUNK) - - stream.stop_stream() - stream.close() - - p.terminate() -''' +class DialogFlowCommunicator: + def __init__(self, audio_buffer_node): + self.configure_dialogflow() + self.audio_buffer = audio_buffer_node + + """ + Set configuration of the audio that will be sent to DialogFlow (e.g. language, sample_rate). + """ + def configure_dialogflow(self): + audio_encoding = dialogflow.enums.AudioEncoding.AUDIO_ENCODING_LINEAR_16 # uncompressed 16-bit signed little-endian samples + sample_rate_hertz = 16000 + + google_application_credentials = "/home/daniel/liu-home-wreckers/src/lhw_nlp/lhw_nlp/cred.json" #rospy.get_param('~google_application_credentials') # Samla ihop alla get_param() till en funktion så de är samlade. Enklare att debugga när man ser alla parametrar som en ros nod använder. + #google_application_credentials = "/workspace/liu-home-wreckers/src/lhw_nlp/lhw_nlp/cred.json" + self.default_language = "en" + #### + credentials = service_account.Credentials.from_service_account_file(google_application_credentials) + project_id = credentials.project_id + print("AGENT'S PROJECT ID: " + str(project_id)) + session_id = "some_session" # Id of the session, using the same `session_id` between requests allows continuation of the conversation + + self.session_client = dialogflow.SessionsClient(credentials=credentials) + self.session_path = self.session_client.session_path(project_id, session_id) + self.audio_config = dialogflow.types.InputAudioConfig( audio_encoding=audio_encoding, + language_code=self.default_language, + sample_rate_hertz=sample_rate_hertz) -class DialogFlowCommunicator: - def __init__(self): + def request_generator(self): + query_input = dialogflow.types.QueryInput(audio_config=self.audio_config) + # The first request contains the configuration. + yield dialogflow.types.StreamingDetectIntentRequest( + session=self.session_path, query_input=query_input + ) + print("init") + # Here we are reading small chunks of audio data from a local + # audio file. In practice these chunks should come from + # an audio input device. + with self.audio_buffer as audio_buffer: + print("inside buffer") + while True: + print("new loop") + chunk = audio_buffer.read() + print("read done") + if not chunk: + print("no chunk") + break + print("send chunk") + # The later requests contains audio data. + yield dialogflow.types.StreamingDetectIntentRequest(input_audio=chunk) -class AudioQueue(Node): + def detect_intent_stream(self): + """Returns the result of detect intent with streaming audio as input. + Using the same `session_id` between requests allows continuation + of the conversation.""" + while True: + + requests = self.request_generator() + responses = self.session_client.streaming_detect_intent(requests=requests) + + print("=" * 20) + for response in responses: + print( + 'Intermediate transcript: "{}".'.format( + response.recognition_result.transcript.encode('utf-8') + ) + ) + query_result = response.query_result + + if query_result.query_text != "": + # Note: The result from the last response is the final transcript along + # with the detected content. + print("=" * 20) + print("Query text: {}".format(query_result.query_text)) + print( + "Detected intent: {} (confidence: {})\n".format( + query_result.intent.display_name, query_result.intent_detection_confidence + ) + ) + print("Fulfillment text: {}\n".format(query_result.fulfillment_text)) + +class AudioBuffer(Node): def __init__(self): #play() super().__init__('dialogflow') - - self.audio_chunk_queue = queue.Queue() # Buffer where callback stores audio samples coming from audio_topic. Audio chunks are later removed from it and sent to Dialogflow - self.max_queue_size = 100*10000 + self.audio_chunk_queue = queue.Queue() self.buffer_sub = self.create_subscription(Audio, 'microphone_buffer', self.audio_cb, 10) self.text_sub = self.create_subscription(String, 'text', self.text_cb, 10) @@ -69,144 +110,27 @@ class AudioQueue(Node): self.response_pub = self.create_publisher(Response, 'response', 10) self.log = self.get_logger() self.log.warn(self.__class__.__name__+" has not yet been implemented.") # TODO - - self.configure_dialogflow() - - self.closed = True - - #self.thread = threading.Thread(target=rclpy.spin, args=(self,), daemon=True) - #self.thread.start() - - - #rclpy.spin(self) - #while rclpy.ok(): - # if len(self.audio_chunk_queue) > 0: - # self.detect_intent_stream() - #self.stream_timer = self.create_timer(0.17, self.detect_intent_stream) - #self.text_cb("potato") - - #self.x = 0 - #fs=48000 - #pygame.mixer.pre_init(fs, -16, 1, 8192) - #pygame.mixer.init() - - + def __enter__(self): print("Closed = False") self.closed = False return self - def __exit__(self, type, value, traceback): print("closed = True") self.closed = True # Signal the generator to terminate so that the client's # streaming_recognize method will not block the process termination. - self.audio_chunk_queue.put(None) - #self.audio_chunk_queue = queue.Queue() - - - """ - Continuosly stream audio chunks from audio_chunk_queue to Dialogflow. Request generator. - """ - def audio_stream_request_generator(self): - # ----------- Trigg intents with audio (InputAudioConfig) - query_input = dialogflow.types.QueryInput(audio_config=self.audio_config) # input specification, it instructs the speech recognizer how to process the speech audio - #print("hejehejej") - # The first request sends up the configuration of the speech recognition. This request doesn't contain input audio. - yield dialogflow.types.StreamingDetectIntentRequest(session=self.session_path, - query_input=query_input, - single_utterance = True) - self.log.info(str("in generatr")) - while not self.closed: - #print("pop sound") - #self.log.info(str("asdasd")) - rclpy.spin_some(self) - #if self.audio_chunk_queue.empty(): - self.log.info(str(self.audio_chunk_queue.qsize())) - - - - chunk = self.audio_chunk_queue.get_nowait() - self.log.info(str(chunk is None)) - #chunk = self.audio_chunk_queue.get() - #self.log.info(str("after loop")) - - if chunk is None: - self.log.info("Chunk = None, return") - return - - yield dialogflow.types.StreamingDetectIntentRequest(input_audio=chunk) - self.log.info(str("out of loop")) - return - - - - - """ Call request generator function and get intent responses from DialogFlow. """ - def detect_intent_stream(self): - - while rclpy.ok(): - self.log.info(str("new open")) - with self as stream: - rclpy.spin_once(self) - - print("1") - requests = stream.audio_stream_request_generator() - print("2") - responses = stream.session_client.streaming_detect_intent(requests) - print("3") - last_response = False - try: - for response in responses: - #print("Response size", len(response.recognition_result.transcript)) - self.log.info('Intermediate transcipt: "{}".'.format(response.recognition_result.transcript.encode('utf-8'))) - last_response = response - except: - self.log.error("Couldn't print responses.") - return - if last_response: - # Note: The result from the last response is the final transcript along with the detected content. - query_result = last_response.query_result + #self.audio_chunk_queue.put(None) - #rospy.loginfo("Pepper going to say: {}".format(query_result.fulfillment_text.encode('utf-8'))) - self.log.info('Pepper going to say: ' + str(query_result.fulfillment_text.encode('utf-8'))) - if query_result.query_text == "": - - self.log.info("Empty query_text") - #return - #self.publish_response(query_result) # ROS Publish information - #self.say_action_client(query_result) # TEXT-TO-SPEECH ROS Action Client + def read(self): + if self.audio_chunk_queue.qsize() == 0: + return False - - """ - Set configuration of the audio that will be sent to DialogFlow (e.g. language, sample_rate). - """ - def configure_dialogflow(self): - audio_encoding = dialogflow.enums.AudioEncoding.AUDIO_ENCODING_LINEAR_16 # uncompressed 16-bit signed little-endian samples - sample_rate_hertz = 16000 - - ##### - #print(os.getcwd()) - # TODO - lägg till från launch file - #google_application_credentials = "/home/daniel/liu-home-wreckers/src/lhw_nlp/lhw_nlp/cred.json" #rospy.get_param('~google_application_credentials') # Samla ihop alla get_param() till en funktion så de är samlade. Enklare att debugga när man ser alla parametrar som en ros nod använder. - google_application_credentials = "/workspace/liu-home-wreckers/src/lhw_nlp/lhw_nlp/cred.json" - self.default_language = "en" - #### + return self.audio_chunk_queue.get_nowait() - credentials = service_account.Credentials.from_service_account_file(google_application_credentials) - project_id = credentials.project_id # TODO: Rensa bort alla onödiga self variabler. fixat - self.log.info("AGENT'S PROJECT ID: " + str(project_id)) - session_id = "some_session" # Id of the session, using the same `session_id` between requests allows continuation of the conversation - - self.session_client = dialogflow.SessionsClient(credentials=credentials) - self.session_path = self.session_client.session_path(project_id, session_id) - self.audio_config = dialogflow.types.InputAudioConfig( audio_encoding=audio_encoding, - language_code=self.default_language, - sample_rate_hertz=sample_rate_hertz) - - """ Publishes all necessary information from Dialogflow to a single topic. """ + """ Publishes all necessary information from Dialogflow to a single topic. """ def publish_response(self, query_result): dialogflow_response_msg = DialogflowResponse() # Talsyntes, tablet: @@ -249,38 +173,7 @@ class AudioQueue(Node): def audio_cb(self, audio_msg): - #print("Got!") - #self.log.info(audio_msg.data) - #self.log.error("Text callback in Dialogflow has not yet been implemented.") # TODO - if True: - #print(str(audio_msg.data.itemsize)) - #print(str(audio_msg.data)) - #print(len(str(audio_msg.data))) - #sound_data_interlaced = np.fromstring(str(audio_msg.data), dtype=np.int16) - #print(sound_data_interlaced.shape) - #sound_data = np.reshape(sound_data_interlaced, (4, 48000), 'F') - #print(sound_data.shape) - #self.audio_chunk_queue.append(sound_data[0].tostring()) - #print(sound_data[0].tostring()) - #self.f = open("demofile2.txt", "a") - #for bit in audio_msg.data: - # self.f.write(str(bit)) - #self.f.write("\n") - #self.f.close() - #self.log.info(str(audio_msg.data)) - #print(bytearray(audio_msg.data.encode())) - #print("\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n") - #print(str(bytearray(audio_msg.data))) - #print(type(str(bytearray(audio_msg.data)))) - # HÄÄR!!! - #print("put sound") - #print(audio_msg.data.tobytes()) - self.audio_chunk_queue.put(audio_msg.data.tobytes()) - else: - self.log.warn("Audio chunk queue is full, flushing!") - #print(self.audio_chunk_queue) - self.audio_chunk_queue = queue.Queue() - + self.audio_chunk_queue.put(audio_msg.data.tobytes()) ''' msg = Response() @@ -328,18 +221,89 @@ class AudioQueue(Node): msg.parameters.append(Parameter(key="location", value="Kitchen")) self.response_pub.publish(msg) + """ + def audio_stream_request_generator(self): + # ----------- Trigg intents with audio (InputAudioConfig) + query_input = dialogflow.types.QueryInput(audio_config=self.audio_config) # input specification, it instructs the speech recognizer how to process the speech audio + #print("hejehejej") + # The first request sends up the configuration of the speech recognition. This request doesn't contain input audio. + yield dialogflow.types.StreamingDetectIntentRequest(session=self.session_path, + query_input=query_input, + single_utterance = True) + self.log.info(str("in generatr")) + while not self.closed: + #print("pop sound") + #self.log.info(str("asdasd")) + rclpy.spin_some(self) + #if self.audio_chunk_queue.empty(): + self.log.info(str(self.audio_chunk_queue.qsize())) + + + + chunk = self.audio_chunk_queue.get_nowait() + self.log.info(str(chunk is None)) + #chunk = self.audio_chunk_queue.get() + #self.log.info(str("after loop")) + + if chunk is None: + self.log.info("Chunk = None, return") + return + + yield dialogflow.types.StreamingDetectIntentRequest(input_audio=chunk) + self.log.info(str("out of loop")) + return + + def detect_intent_stream(self): + + while rclpy.ok(): + self.log.info(str("new open")) + with self as stream: + rclpy.spin_once(self) + + print("1") + requests = stream.audio_stream_request_generator() + print("2") + responses = stream.session_client.streaming_detect_intent(requests) + print("3") + last_response = False + try: + for response in responses: + #print("Response size", len(response.recognition_result.transcript)) + self.log.info('Intermediate transcipt: "{}".'.format(response.recognition_result.transcript.encode('utf-8'))) + last_response = response + except: + self.log.error("Couldn't print responses.") + return + if last_response: + # Note: The result from the last response is the final transcript along with the detected content. + query_result = last_response.query_result + + #rospy.loginfo("Pepper going to say: {}".format(query_result.fulfillment_text.encode('utf-8'))) + self.log.info('Pepper going to say: ' + str(query_result.fulfillment_text.encode('utf-8'))) + if query_result.query_text == "": + + self.log.info("Empty query_text") + #return + + #self.publish_response(query_result) # ROS Publish information + #self.say_action_client(query_result) # TEXT-TO-SPEECH ROS Action Client + + + """ + def main(args=None): rclpy.init(args=args) - dialogflow_node = DialogFlowCommunicator() - dialogflow_node.detect_intent_stream() - #rclpy.spin(dialogflow_node) - #thread = threading.Thread(target=rclpy.spin, args=(dialogflow_node,), daemon=True) - #thread.start() - #dialogflow_node.thread.join() - #dialogflow_node.file.close() - dialogflow_node.destroy_node() + + audio_buffer_node = AudioBuffer() + dialogflow_communicator = DialogFlowCommunicator(audio_buffer_node) + + process = Thread(target=dialogflow_communicator.detect_intent_stream) + process.start() + rclpy.spin(audio_buffer_node) + + audio_buffer_node.destroy_node() rclpy.shutdown() - #thread.join() + process.join() if __name__ == '__main__':