spr

2026-06-19 08:12:36 +02:00 · 2018-08-27 11:45:22 +02:00
parent 8016a3dc92
commit ae5c3ba538
4 changed files with 177 additions and 18 deletions
@@ -5,6 +5,8 @@ from pydub import AudioSegment
 def save_audio(filename, base64_string):
    decoded = None
    orig_filename = filename[:-4]+"_orig"
+    with open("latestBase64","wb") as f:
+        f.write(base64_string)
    try:
        decoded = base64.b64decode(base64_string)
    except TypeError:
@@ -16,9 +18,10 @@ def save_audio(filename, base64_string):
    AudioSegment.from_file(orig_filename).export(filename,format="wav")
    return b"SUCCESS"

-def save_audio_chain(filenames, base64_strings):
-    compleAudio = None
-    for fname in filenames:
+def save_audio_chain(file_str_tupels):
+    completeAudio = None
+    for fname, base64_string in file_str_tupels:
+        print("Filename: {}".format(fname))
        decoded = None
        orig_filename = fname[:-4]+"_orig"
        try:
@@ -27,14 +30,32 @@ def save_audio_chain(filenames, base64_strings):
            return b"ERROR_INVALID_ENCODING_64"
        with open(orig_filename,"wb") as f:
            f.write(decoded)
-        if compleAudio == None:
+        if completeAudio == None:
            completeAudio = AudioSegment.from_file(orig_filename)
        else:
-            completeAudio += [AudioSegment.from_file(orig_filename)]
-    completeAudio.export(filenames[0],format="wav")
+            completeAudio += AudioSegment.from_file(orig_filename)
+    if not completeAudio:
+        return b"ERROR_AUDIO_CONCAT_FAILED"
+    else:
+        completeAudio.export(file_str_tupels[0][0],format="wav")
+        return b"SUCCESS"

 def save_transcript(filename, transcript):
    if os.path.isfile(filename):
        pass
    with open(filename + "_transcript","w") as f:
        f.write(transcript)
+
+def get_transcript(filename):
+    if os.path.isfile(filename):
+        with open(filename + "_transcript","r") as f:
+            return f.read()
+
+def filelist():
+    return ""
+
+def fileinfo(filename):
+    return ""
+
+def copy_to_output(filename):
+    return ""
@@ -5,15 +5,34 @@ MAIN_DIR = b"data/"

 def parse_request(data):
    ''' parse request and call correct function '''
-    
+    #return b"DUMMY"
+    print(data.split(b",")[0])
    # echo/test connection #
    cleared_data = is_data_type(b"ECHOREQUEST,",data)
    if cleared_data:
        return cleared_data
+
+    # reply transcript #
+    cleared_data = is_data_type(b"GET_TRANSCRIPT,",data)
+    if cleared_data:
+        filename = data.decode("utf-8")
+        return filesystem.get_transcript().encode("utf-8")
+
+    # get single file info #
+    cleared_data = is_data_type(b"GET_FILEINFO,",data)
+    if cleared_data:
+        filename = data.decode("utf-8")
+        return filesystem.fileinfo(filename).encode("utf-8")
+
+    # get single file info #
+    cleared_data = is_data_type(b"GET_FILEINFO_ALL,",data)
+    if cleared_data:
+        return filesystem.filelist().encode("utf-8")
    
    # handle audio transmission #
    cleared_data = is_data_type(b"AUDIO_TRANSMISSION,",data)
    if cleared_data:
+        print("Handling audio transmission")
        filename = None
        try:
            filename, base64_string = cleared_data.split(b',')
@@ -27,17 +46,20 @@ def parse_request(data):

    # handle a chain of audiotransmissions #
    if data.startswith(b"CHAIN_AUDIO_TRANSMISSION"):
-        files = []
-        base64_strings = []
-        for el in data.split(b"|"):
+        file_str_tuples = []
+        arr = data.split(b"|")
+        for el in arr[1:-1]:
            filename, base64_string = el.split(b',')
            filename = MAIN_DIR + filename.split(b"/")[-1] + b".wav"
            filename = filename.decode("utf-8")
-            files += [filename]
-            base64_strings += [base64_string]
-        filesystem.save_audio_chain(files,base64_string);
-        speech.async_create_transcript(files[0])
-        return b"SUCCESS"
+            file_str_tuples += [(filename,base64_string)]
+
+        if len(file_str_tuples) < 2: # a chain has 2 or more elements
+            return bytes("ERROR_INVALID_NUMBER_FILES_{}".format(len(file_str_tuples)),"utf-8")
+
+        ret = filesystem.save_audio_chain(file_str_tuples);
+        speech.async_create_transcript(file_str_tuples[0][0])
+        return ret
        

    # other shit
@@ -80,3 +102,4 @@ def recive_transcribe_request(audiofile):

 def android_unittest_transcribe_request(audiofile):
    ''' the android unittests append a special keyword, requests are dummy handled '''
+    pass
@@ -4,7 +4,11 @@ import os.path
 import filesystem
 import log

+USE_FREE=False
+USE_PAID=True
+
 def async_create_transcript(filename):
+    print("Creating transcript..")
    mp.Process(target=create_and_save_transcript,args=(filename,)).start()

 def create_and_save_transcript(filename):
@@ -18,12 +22,21 @@ def analyse(filename):
        audio = recognizer.record(source)

    try:
-        string = recognizer.recognize_google(audio,language="de-DE")
+        if USE_FREE:
+            string = free_google_backend(recognizer, audio)
+        elif USE_PAID:
+            string = paid_google_backend(recognizer,audio)
    except spr.UnknownValueError:
        log.log("Audio file is broken or not an audio file")
-        return None
+        return "ERROR_AUDIO_FILE_INVALID"
    except spr.RequestError as e:
        log.log("Could not connect to google API: {}".format(e))
-        return None
+        return "ERROR_API_FAILURE"

    return string
+
+def free_google_backend(recognizer, audio):
+    return recognizer.recognize_google(audio,language="de-DE")
+
+def paid_google_backend(recognizer, audio):
+    pass
@@ -0,0 +1,102 @@
+#!/usr/bin/env python
+
+# Copyright 2017 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Google Cloud Speech API sample application using the REST API for async
+batch processing.
+
+Example usage:
+    python transcribe_async.py resources/audio.raw
+    python transcribe_async.py gs://cloud-samples-tests/speech/vr.flac
+"""
+
+import argparse
+import io
+
+
+# [START speech_transcribe_async]
+def transcribe_file(speech_file):
+    """Transcribe the given audio file asynchronously."""
+    from google.cloud import speech
+    from google.cloud.speech import enums
+    from google.cloud.speech import types
+    client = speech.SpeechClient()
+
+    # [START speech_python_migration_async_request]
+    with io.open(speech_file, 'rb') as audio_file:
+        content = audio_file.read()
+
+    audio = types.RecognitionAudio(content=content)
+    config = types.RecognitionConfig(
+        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
+        sample_rate_hertz=16000,
+        language_code='en-US')
+
+    # [START speech_python_migration_async_response]
+    operation = client.long_running_recognize(config, audio)
+    # [END speech_python_migration_async_request]
+
+    print('Waiting for operation to complete...')
+    response = operation.result(timeout=90)
+
+    # Each result is for a consecutive portion of the audio. Iterate through
+    # them to get the transcripts for the entire audio file.
+    for result in response.results:
+        # The first alternative is the most likely one for this portion.
+        print(u'Transcript: {}'.format(result.alternatives[0].transcript))
+        print('Confidence: {}'.format(result.alternatives[0].confidence))
+    # [END speech_python_migration_async_response]
+# [END speech_transcribe_async]
+
+
+# [START speech_transcribe_async_gcs]
+def transcribe_gcs(gcs_uri):
+    """Asynchronously transcribes the audio file specified by the gcs_uri."""
+    from google.cloud import speech
+    from google.cloud.speech import enums
+    from google.cloud.speech import types
+    client = speech.SpeechClient()
+
+    audio = types.RecognitionAudio(uri=gcs_uri)
+    config = types.RecognitionConfig(
+        #encoding=enums.RecognitionConfig.AudioEncoding.FLAC,
+        #sample_rate_hertz=16000,
+        language_code='de-DE')
+
+    operation = client.long_running_recognize(config, audio)
+
+    print('Waiting for operation to complete...')
+    response = operation.result(timeout=90)
+
+    # Each result is for a consecutive portion of the audio. Iterate through
+    # them to get the transcripts for the entire audio file.
+    for result in response.results:
+        # The first alternative is the most likely one for this portion.
+        print(u'Transcript: {}'.format(result.alternatives[0].transcript))
+        print('Confidence: {}'.format(result.alternatives[0].confidence))
+# [END speech_transcribe_async_gcs]
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter)
+    parser.add_argument(
+        'path', help='File or GCS path for audio file to be recognized')
+    args = parser.parse_args()
+    if args.path.startswith('gs://'):
+        transcribe_gcs(args.path)
+    else:
+        transcribe_file(args.path)