Untested attempt to dialogflow

76c3fe58 · Daniel Engelsons · 3eca7e12 · 76c3fe58
Commit 76c3fe58 authored 4 years ago by Daniel Engelsons
--- a/src/lhw_nlp/lhw_nlp/dialogflow.py
+++ b/src/lhw_nlp/lhw_nlp/dialogflow.py
@@ -14,54 +14,95 @@ import queue
 from google.oauth2 import service_account
 import os
 import time
+from threading import Thread
-#import scipy.io.wavfile as wavf
+class DialogFlowCommunicator:
+    def __init__(self, audio_buffer_node):
+        self.configure_dialogflow()
-#import pyaudio
+        self.audio_buffer = audio_buffer_node
-#import wave
-#import sys
+    """
-#import threading
+    Set configuration of the audio that will be sent to DialogFlow (e.g. language, sample_rate).
+    """
-'''
+    def configure_dialogflow(self):
-def play():
+        audio_encoding = dialogflow.enums.AudioEncoding.AUDIO_ENCODING_LINEAR_16 # uncompressed 16-bit signed little-endian samples
-    CHUNK = 1024
+        sample_rate_hertz = 16000
-    wf = wave.open("out.wav", 'rb')
+        google_application_credentials = "/home/daniel/liu-home-wreckers/src/lhw_nlp/lhw_nlp/cred.json" #rospy.get_param('~google_application_credentials') # Samla ihop alla get_param() till en funktion så de är samlade. Enklare att debugga när man ser alla parametrar som en ros nod använder.
+        #google_application_credentials = "/workspace/liu-home-wreckers/src/lhw_nlp/lhw_nlp/cred.json"
-    p = pyaudio.PyAudio()
+        self.default_language = "en"
+        ####
-    print(wf.getsampwidth())
-    stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
-                    channels=wf.getnchannels(),
-                    rate=wf.getframerate(),
-                    output=True)
-    data = wf.readframes(CHUNK)
-    while data != '':
-        stream.write(data)
-        data = wf.readframes(CHUNK)
-    stream.stop_stream()
-    stream.close()
-    p.terminate()
-'''
+        credentials = service_account.Credentials.from_service_account_file(google_application_credentials)
+        project_id = credentials.project_id 
+        print("AGENT'S PROJECT ID: " + str(project_id))
+        session_id = "some_session"   # Id of the session, using the same `session_id` between requests allows continuation of the conversation
+        self.session_client = dialogflow.SessionsClient(credentials=credentials)
+        self.session_path = self.session_client.session_path(project_id, session_id)
+        self.audio_config = dialogflow.types.InputAudioConfig(  audio_encoding=audio_encoding, 
+                                                                language_code=self.default_language,
+                                                                sample_rate_hertz=sample_rate_hertz)
-class DialogFlowCommunicator:
-    def __init__(self):
+    def request_generator(self):
+        query_input = dialogflow.types.QueryInput(audio_config=self.audio_config)
+        # The first request contains the configuration.
+        yield dialogflow.types.StreamingDetectIntentRequest(
+            session=self.session_path, query_input=query_input
+        )
+        print("init")
+        # Here we are reading small chunks of audio data from a local
+        # audio file.  In practice these chunks should come from
+        # an audio input device.
+        with self.audio_buffer as audio_buffer:
+            print("inside buffer")
+            while True:
+                print("new loop")
+                chunk = audio_buffer.read()
+                print("read done")
+                if not chunk:
+                    print("no chunk")
+                    break
+                print("send chunk")
+                # The later requests contains audio data.
+                yield dialogflow.types.StreamingDetectIntentRequest(input_audio=chunk)
-class AudioQueue(Node):
+    def detect_intent_stream(self):
+        """Returns the result of detect intent with streaming audio as input.
+        Using the same `session_id` between requests allows continuation
+        of the conversation."""
+        while True:
+            requests = self.request_generator()
+            responses = self.session_client.streaming_detect_intent(requests=requests)
+            print("=" * 20)
+            for response in responses:
+                print(
+                    'Intermediate transcript: "{}".'.format(
+                        response.recognition_result.transcript.encode('utf-8')
+                    )
+                )
+            query_result = response.query_result
+            if query_result.query_text != "":
+                # Note: The result from the last response is the final transcript along
+                # with the detected content.
+                print("=" * 20)
+                print("Query text: {}".format(query_result.query_text))
+                print(
+                    "Detected intent: {} (confidence: {})\n".format(
+                        query_result.intent.display_name, query_result.intent_detection_confidence
+                    )
+                )
+                print("Fulfillment text: {}\n".format(query_result.fulfillment_text))
+class AudioBuffer(Node):
    def __init__(self):
        #play()
        super().__init__('dialogflow')
+        self.audio_chunk_queue = queue.Queue()  
-        self.audio_chunk_queue = queue.Queue()     # Buffer where callback stores audio samples coming from audio_topic. Audio chunks are later removed from it and sent to Dialogflow
-        self.max_queue_size = 100*10000
        self.buffer_sub = self.create_subscription(Audio, 'microphone_buffer', self.audio_cb, 10)
        self.text_sub = self.create_subscription(String, 'text', self.text_cb, 10)
@@ -69,144 +110,27 @@ class AudioQueue(Node):
        self.response_pub = self.create_publisher(Response, 'response', 10)
        self.log = self.get_logger()
        self.log.warn(self.__class__.__name__+" has not yet been implemented.")  # TODO
-        self.configure_dialogflow()
-        self.closed = True
-        #self.thread = threading.Thread(target=rclpy.spin, args=(self,), daemon=True)
-        #self.thread.start()
-        #rclpy.spin(self)
-        #while rclpy.ok():
-        #    if len(self.audio_chunk_queue) > 0:
-        #        self.detect_intent_stream()
-        #self.stream_timer = self.create_timer(0.17, self.detect_intent_stream) 
-        #self.text_cb("potato")
-        #self.x = 0
-        #fs=48000
-        #pygame.mixer.pre_init(fs, -16, 1, 8192)
-        #pygame.mixer.init()
    def __enter__(self):
        print("Closed = False")
        self.closed = False
        return self
    def __exit__(self, type, value, traceback):
        print("closed = True")
        self.closed = True
        # Signal the generator to terminate so that the client's
        # streaming_recognize method will not block the process termination.
-        self.audio_chunk_queue.put(None)
+        #self.audio_chunk_queue.put(None)
-        #self.audio_chunk_queue = queue.Queue()
-    """
-    Continuosly stream audio chunks from audio_chunk_queue to Dialogflow. Request generator.
-    """
-    def audio_stream_request_generator(self):
-        # ----------- Trigg intents with audio (InputAudioConfig)
-        query_input = dialogflow.types.QueryInput(audio_config=self.audio_config) # input specification, it instructs the speech recognizer how to process the speech audio
-        #print("hejehejej")
-        # The first request sends up the configuration of the speech recognition. This request doesn't contain input audio.
-        yield dialogflow.types.StreamingDetectIntentRequest(session=self.session_path, 
-                                                            query_input=query_input, 
-                                                            single_utterance = True)
-        self.log.info(str("in generatr"))
-        while not self.closed:
-            #print("pop sound")
-            #self.log.info(str("asdasd"))
-            rclpy.spin_some(self)
-            #if self.audio_chunk_queue.empty():
-            self.log.info(str(self.audio_chunk_queue.qsize()))
-            chunk = self.audio_chunk_queue.get_nowait()
-            self.log.info(str(chunk is None))
-            #chunk = self.audio_chunk_queue.get()
-            #self.log.info(str("after loop"))
-            if chunk is None:
-                self.log.info("Chunk = None, return")
-                return
-            yield dialogflow.types.StreamingDetectIntentRequest(input_audio=chunk)
-        self.log.info(str("out of loop"))
-        return
-    """ Call request generator function and get intent responses from DialogFlow. """
-    def detect_intent_stream(self):
-        while rclpy.ok(): 
-            self.log.info(str("new open"))
-            with self as stream:
-                rclpy.spin_once(self)
-                print("1")
-                requests = stream.audio_stream_request_generator() 
-                print("2")
-                responses = stream.session_client.streaming_detect_intent(requests)
-                print("3")
-                last_response = False
-                try:
-                    for response in responses:
-                        #print("Response size", len(response.recognition_result.transcript))
-                        self.log.info('Intermediate transcipt: "{}".'.format(response.recognition_result.transcript.encode('utf-8')))
-                        last_response = response
-                except:
-                    self.log.error("Couldn't print responses.")
-                    return
-                if last_response:
-                    # Note: The result from the last response is the final transcript along with the detected content.
-                    query_result = last_response.query_result
-                    #rospy.loginfo("Pepper going to say: {}".format(query_result.fulfillment_text.encode('utf-8')))
-                    self.log.info('Pepper going to say: ' + str(query_result.fulfillment_text.encode('utf-8')))
-                    if query_result.query_text == "":
-                        self.log.info("Empty query_text")
-                        #return
-                    #self.publish_response(query_result) # ROS Publish information
+    def read(self):
-                    #self.say_action_client(query_result) # TEXT-TO-SPEECH ROS Action Client
+        if self.audio_chunk_queue.qsize() == 0:
+            return False
+        return self.audio_chunk_queue.get_nowait()
-    """
-    Set configuration of the audio that will be sent to DialogFlow (e.g. language, sample_rate).
-    """
-    def configure_dialogflow(self):
-        audio_encoding = dialogflow.enums.AudioEncoding.AUDIO_ENCODING_LINEAR_16 # uncompressed 16-bit signed little-endian samples
-        sample_rate_hertz = 16000
-        #####
-        #print(os.getcwd())
-        # TODO - lägg till från launch file
-        #google_application_credentials = "/home/daniel/liu-home-wreckers/src/lhw_nlp/lhw_nlp/cred.json" #rospy.get_param('~google_application_credentials') # Samla ihop alla get_param() till en funktion så de är samlade. Enklare att debugga när man ser alla parametrar som en ros nod använder.
-        google_application_credentials = "/workspace/liu-home-wreckers/src/lhw_nlp/lhw_nlp/cred.json"
-        self.default_language = "en"
-        ####
-        credentials = service_account.Credentials.from_service_account_file(google_application_credentials)
+    """ Publishes all necessary information from Dialogflow to a single topic. """
-        project_id = credentials.project_id # TODO: Rensa bort alla onödiga self variabler. fixat
-        self.log.info("AGENT'S PROJECT ID: " + str(project_id))
-        session_id = "some_session"   # Id of the session, using the same `session_id` between requests allows continuation of the conversation
-        self.session_client = dialogflow.SessionsClient(credentials=credentials)
-        self.session_path = self.session_client.session_path(project_id, session_id)
-        self.audio_config = dialogflow.types.InputAudioConfig(  audio_encoding=audio_encoding, 
-                                                                language_code=self.default_language,
-                                                                sample_rate_hertz=sample_rate_hertz)
-        """ Publishes all necessary information from Dialogflow to a single topic. """
    def publish_response(self, query_result):
        dialogflow_response_msg = DialogflowResponse()
        # Talsyntes, tablet:
@@ -249,38 +173,7 @@ class AudioQueue(Node):
    def audio_cb(self, audio_msg):
-        #print("Got!")
+        self.audio_chunk_queue.put(audio_msg.data.tobytes())    
-        #self.log.info(audio_msg.data)
-        #self.log.error("Text callback in Dialogflow has not yet been implemented.")  # TODO
-        if True:
-            #print(str(audio_msg.data.itemsize))
-            #print(str(audio_msg.data))
-            #print(len(str(audio_msg.data)))
-            #sound_data_interlaced = np.fromstring(str(audio_msg.data), dtype=np.int16)
-            #print(sound_data_interlaced.shape)
-            #sound_data = np.reshape(sound_data_interlaced, (4, 48000), 'F')
-            #print(sound_data.shape)
-            #self.audio_chunk_queue.append(sound_data[0].tostring())
-            #print(sound_data[0].tostring())
-            #self.f = open("demofile2.txt", "a")
-            #for bit in audio_msg.data:
-            #    self.f.write(str(bit))
-            #self.f.write("\n")
-            #self.f.close()
-            #self.log.info(str(audio_msg.data))
-            #print(bytearray(audio_msg.data.encode()))
-            #print("\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n")
-            #print(str(bytearray(audio_msg.data)))
-            #print(type(str(bytearray(audio_msg.data))))
-            # HÄÄR!!!
-            #print("put sound")
-            #print(audio_msg.data.tobytes())
-            self.audio_chunk_queue.put(audio_msg.data.tobytes())
-        else:
-            self.log.warn("Audio chunk queue is full, flushing!")
-            #print(self.audio_chunk_queue)
-            self.audio_chunk_queue = queue.Queue()
        '''
        msg = Response()
@@ -328,18 +221,89 @@ class AudioQueue(Node):
        msg.parameters.append(Parameter(key="location", value="Kitchen"))
        self.response_pub.publish(msg)
+    """
+    def audio_stream_request_generator(self):
+        # ----------- Trigg intents with audio (InputAudioConfig)
+        query_input = dialogflow.types.QueryInput(audio_config=self.audio_config) # input specification, it instructs the speech recognizer how to process the speech audio
+        #print("hejehejej")
+        # The first request sends up the configuration of the speech recognition. This request doesn't contain input audio.
+        yield dialogflow.types.StreamingDetectIntentRequest(session=self.session_path, 
+                                                            query_input=query_input, 
+                                                            single_utterance = True)
+        self.log.info(str("in generatr"))
+        while not self.closed:
+            #print("pop sound")
+            #self.log.info(str("asdasd"))
+            rclpy.spin_some(self)
+            #if self.audio_chunk_queue.empty():
+            self.log.info(str(self.audio_chunk_queue.qsize()))
+            chunk = self.audio_chunk_queue.get_nowait()
+            self.log.info(str(chunk is None))
+            #chunk = self.audio_chunk_queue.get()
+            #self.log.info(str("after loop"))
+            if chunk is None:
+                self.log.info("Chunk = None, return")
+                return
+            yield dialogflow.types.StreamingDetectIntentRequest(input_audio=chunk)
+        self.log.info(str("out of loop"))
+        return
+    def detect_intent_stream(self):
+        while rclpy.ok(): 
+            self.log.info(str("new open"))
+            with self as stream:
+                rclpy.spin_once(self)
+                print("1")
+                requests = stream.audio_stream_request_generator() 
+                print("2")
+                responses = stream.session_client.streaming_detect_intent(requests)
+                print("3")
+                last_response = False
+                try:
+                    for response in responses:
+                        #print("Response size", len(response.recognition_result.transcript))
+                        self.log.info('Intermediate transcipt: "{}".'.format(response.recognition_result.transcript.encode('utf-8')))
+                        last_response = response
+                except:
+                    self.log.error("Couldn't print responses.")
+                    return
+                if last_response:
+                    # Note: The result from the last response is the final transcript along with the detected content.
+                    query_result = last_response.query_result
+                    #rospy.loginfo("Pepper going to say: {}".format(query_result.fulfillment_text.encode('utf-8')))
+                    self.log.info('Pepper going to say: ' + str(query_result.fulfillment_text.encode('utf-8')))
+                    if query_result.query_text == "":
+                        self.log.info("Empty query_text")
+                        #return
+                    #self.publish_response(query_result) # ROS Publish information
+                    #self.say_action_client(query_result) # TEXT-TO-SPEECH ROS Action Client
+    """
 def main(args=None):
    rclpy.init(args=args)
-    dialogflow_node = DialogFlowCommunicator()
-    dialogflow_node.detect_intent_stream()
+    audio_buffer_node = AudioBuffer()
-    #rclpy.spin(dialogflow_node)
+    dialogflow_communicator = DialogFlowCommunicator(audio_buffer_node)
-    #thread = threading.Thread(target=rclpy.spin, args=(dialogflow_node,), daemon=True)
-    #thread.start()
+    process = Thread(target=dialogflow_communicator.detect_intent_stream)
-    #dialogflow_node.thread.join()
+    process.start()
-    #dialogflow_node.file.close()
+    rclpy.spin(audio_buffer_node)
-    dialogflow_node.destroy_node()
+    audio_buffer_node.destroy_node()
    rclpy.shutdown()
-    #thread.join()
+    process.join()
 if __name__ == '__main__':