spr

2026-06-19 10:22:38 +02:00 · 2018-08-27 11:45:22 +02:00
parent 8016a3dc92
commit ae5c3ba538
4 changed files with 177 additions and 18 deletions
@@ -5,6 +5,8 @@ from pydub import AudioSegment
 def save_audio(filename, base64_string):
    decoded = None
    orig_filename = filename[:-4]+"_orig"
    with open("latestBase64","wb") as f:
        f.write(base64_string)
    try:
        decoded = base64.b64decode(base64_string)
    except TypeError:
@@ -16,9 +18,10 @@ def save_audio(filename, base64_string):
    AudioSegment.from_file(orig_filename).export(filename,format="wav")
    return b"SUCCESS"
-def save_audio_chain(filenames, base64_strings):
+def save_audio_chain(file_str_tupels):
-    compleAudio = None
+    completeAudio = None
-    for fname in filenames:
+    for fname, base64_string in file_str_tupels:
        print("Filename: {}".format(fname))
        decoded = None
        orig_filename = fname[:-4]+"_orig"
        try:
@@ -27,14 +30,32 @@ def save_audio_chain(filenames, base64_strings):
            return b"ERROR_INVALID_ENCODING_64"
        with open(orig_filename,"wb") as f:
            f.write(decoded)
-        if compleAudio == None:
+        if completeAudio == None:
            completeAudio = AudioSegment.from_file(orig_filename)
        else:
-            completeAudio += [AudioSegment.from_file(orig_filename)]
+            completeAudio += AudioSegment.from_file(orig_filename)
-    completeAudio.export(filenames[0],format="wav")
+    if not completeAudio:
        return b"ERROR_AUDIO_CONCAT_FAILED"
    else:
        completeAudio.export(file_str_tupels[0][0],format="wav")
        return b"SUCCESS"
 def save_transcript(filename, transcript):
    if os.path.isfile(filename):
        pass
    with open(filename + "_transcript","w") as f:
        f.write(transcript)
 def get_transcript(filename):
    if os.path.isfile(filename):
        with open(filename + "_transcript","r") as f:
            return f.read()
 def filelist():
    return ""
 def fileinfo(filename):
    return ""
 def copy_to_output(filename):
    return ""
@@ -5,15 +5,34 @@ MAIN_DIR = b"data/"
 def parse_request(data):
    ''' parse request and call correct function '''
-    
+    #return b"DUMMY"
    print(data.split(b",")[0])
    # echo/test connection #
    cleared_data = is_data_type(b"ECHOREQUEST,",data)
    if cleared_data:
        return cleared_data
    # reply transcript #
    cleared_data = is_data_type(b"GET_TRANSCRIPT,",data)
    if cleared_data:
        filename = data.decode("utf-8")
        return filesystem.get_transcript().encode("utf-8")
    # get single file info #
    cleared_data = is_data_type(b"GET_FILEINFO,",data)
    if cleared_data:
        filename = data.decode("utf-8")
        return filesystem.fileinfo(filename).encode("utf-8")
    # get single file info #
    cleared_data = is_data_type(b"GET_FILEINFO_ALL,",data)
    if cleared_data:
        return filesystem.filelist().encode("utf-8")
    # handle audio transmission #
    cleared_data = is_data_type(b"AUDIO_TRANSMISSION,",data)
    if cleared_data:
        print("Handling audio transmission")
        filename = None
        try:
            filename, base64_string = cleared_data.split(b',')
@@ -27,17 +46,20 @@ def parse_request(data):
    # handle a chain of audiotransmissions #
    if data.startswith(b"CHAIN_AUDIO_TRANSMISSION"):
-        files = []
+        file_str_tuples = []
-        base64_strings = []
+        arr = data.split(b"|")
-        for el in data.split(b"|"):
+        for el in arr[1:-1]:
            filename, base64_string = el.split(b',')
            filename = MAIN_DIR + filename.split(b"/")[-1] + b".wav"
            filename = filename.decode("utf-8")
-            files += [filename]
+            file_str_tuples += [(filename,base64_string)]
-            base64_strings += [base64_string]
+
-        filesystem.save_audio_chain(files,base64_string);
+        if len(file_str_tuples) < 2: # a chain has 2 or more elements
-        speech.async_create_transcript(files[0])
+            return bytes("ERROR_INVALID_NUMBER_FILES_{}".format(len(file_str_tuples)),"utf-8")
-        return b"SUCCESS"
+
        ret = filesystem.save_audio_chain(file_str_tuples);
        speech.async_create_transcript(file_str_tuples[0][0])
        return ret
    # other shit
@@ -80,3 +102,4 @@ def recive_transcribe_request(audiofile):
 def android_unittest_transcribe_request(audiofile):
    ''' the android unittests append a special keyword, requests are dummy handled '''
    pass
@@ -4,7 +4,11 @@ import os.path
 import filesystem
 import log
 USE_FREE=False
 USE_PAID=True
 def async_create_transcript(filename):
    print("Creating transcript..")
    mp.Process(target=create_and_save_transcript,args=(filename,)).start()
 def create_and_save_transcript(filename):
@@ -18,12 +22,21 @@ def analyse(filename):
        audio = recognizer.record(source)
    try:
-        string = recognizer.recognize_google(audio,language="de-DE")
+        if USE_FREE:
            string = free_google_backend(recognizer, audio)
        elif USE_PAID:
            string = paid_google_backend(recognizer,audio)
    except spr.UnknownValueError:
        log.log("Audio file is broken or not an audio file")
-        return None
+        return "ERROR_AUDIO_FILE_INVALID"
    except spr.RequestError as e:
        log.log("Could not connect to google API: {}".format(e))
-        return None
+        return "ERROR_API_FAILURE"
    return string
 def free_google_backend(recognizer, audio):
    return recognizer.recognize_google(audio,language="de-DE")
 def paid_google_backend(recognizer, audio):
    pass
@@ -0,0 +1,102 @@
 #!/usr/bin/env python
 # Copyright 2017 Google Inc. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #      http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Google Cloud Speech API sample application using the REST API for async
 batch processing.
 Example usage:
    python transcribe_async.py resources/audio.raw
    python transcribe_async.py gs://cloud-samples-tests/speech/vr.flac
 """
 import argparse
 import io
 # [START speech_transcribe_async]
 def transcribe_file(speech_file):
    """Transcribe the given audio file asynchronously."""
    from google.cloud import speech
    from google.cloud.speech import enums
    from google.cloud.speech import types
    client = speech.SpeechClient()
    # [START speech_python_migration_async_request]
    with io.open(speech_file, 'rb') as audio_file:
        content = audio_file.read()
    audio = types.RecognitionAudio(content=content)
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=16000,
        language_code='en-US')
    # [START speech_python_migration_async_response]
    operation = client.long_running_recognize(config, audio)
    # [END speech_python_migration_async_request]
    print('Waiting for operation to complete...')
    response = operation.result(timeout=90)
    # Each result is for a consecutive portion of the audio. Iterate through
    # them to get the transcripts for the entire audio file.
    for result in response.results:
        # The first alternative is the most likely one for this portion.
        print(u'Transcript: {}'.format(result.alternatives[0].transcript))
        print('Confidence: {}'.format(result.alternatives[0].confidence))
    # [END speech_python_migration_async_response]
 # [END speech_transcribe_async]
 # [START speech_transcribe_async_gcs]
 def transcribe_gcs(gcs_uri):
    """Asynchronously transcribes the audio file specified by the gcs_uri."""
    from google.cloud import speech
    from google.cloud.speech import enums
    from google.cloud.speech import types
    client = speech.SpeechClient()
    audio = types.RecognitionAudio(uri=gcs_uri)
    config = types.RecognitionConfig(
        #encoding=enums.RecognitionConfig.AudioEncoding.FLAC,
        #sample_rate_hertz=16000,
        language_code='de-DE')
    operation = client.long_running_recognize(config, audio)
    print('Waiting for operation to complete...')
    response = operation.result(timeout=90)
    # Each result is for a consecutive portion of the audio. Iterate through
    # them to get the transcripts for the entire audio file.
    for result in response.results:
        # The first alternative is the most likely one for this portion.
        print(u'Transcript: {}'.format(result.alternatives[0].transcript))
        print('Confidence: {}'.format(result.alternatives[0].confidence))
 # [END speech_transcribe_async_gcs]
 if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument(
        'path', help='File or GCS path for audio file to be recognized')
    args = parser.parse_args()
    if args.path.startswith('gs://'):
        transcribe_gcs(args.path)
    else:
        transcribe_file(args.path)