chain audio implemented

2026-04-26 21:42:30 +02:00 · 2018-08-22 05:40:51 +02:00
parent e6412946e8
commit 8016a3dc92
5 changed files with 58 additions and 2 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 *.swp
+data/
 __pycache__/
 thermoscan-speech-android-app//
--- a/python-server/data/.empty
+++ b/python-server/data/.empty
--- a/python-server/filesystem.py
+++ b/python-server/filesystem.py
@@ -0,0 +1,40 @@
+import base64
+import os.path
+from pydub import AudioSegment
+
+def save_audio(filename, base64_string):
+    decoded = None
+    orig_filename = filename[:-4]+"_orig"
+    try:
+        decoded = base64.b64decode(base64_string)
+    except TypeError:
+        return b"ERROR_INVALID_ENCODING_64"
+    #if os.path.isfile(filename):
+    #    return b"ERROR_FILE_EXISTS"
+    with open(orig_filename,"wb") as f:
+        f.write(decoded)
+    AudioSegment.from_file(orig_filename).export(filename,format="wav")
+    return b"SUCCESS"
+
+def save_audio_chain(filenames, base64_strings):
+    compleAudio = None
+    for fname in filenames:
+        decoded = None
+        orig_filename = fname[:-4]+"_orig"
+        try:
+            decoded = base64.b64decode(base64_string)
+        except TypeError:
+            return b"ERROR_INVALID_ENCODING_64"
+        with open(orig_filename,"wb") as f:
+            f.write(decoded)
+        if compleAudio == None:
+            completeAudio = AudioSegment.from_file(orig_filename)
+        else:
+            completeAudio += [AudioSegment.from_file(orig_filename)]
+    completeAudio.export(filenames[0],format="wav")
+
+def save_transcript(filename, transcript):
+    if os.path.isfile(filename):
+        pass
+    with open(filename + "_transcript","w") as f:
+        f.write(transcript)
--- a/python-server/server_interface.py
+++ b/python-server/server_interface.py
@@ -25,6 +25,21 @@ def parse_request(data):
        speech.async_create_transcript(filename)
        return ret

+    # handle a chain of audiotransmissions #
+    if data.startswith(b"CHAIN_AUDIO_TRANSMISSION"):
+        files = []
+        base64_strings = []
+        for el in data.split(b"|"):
+            filename, base64_string = el.split(b',')
+            filename = MAIN_DIR + filename.split(b"/")[-1] + b".wav"
+            filename = filename.decode("utf-8")
+            files += [filename]
+            base64_strings += [base64_string]
+        filesystem.save_audio_chain(files,base64_string);
+        speech.async_create_transcript(files[0])
+        return b"SUCCESS"
+        
+
    # other shit
    return b"UNRECOGNIZED_SERVER_OPTION\n"
      
--- a/python-server/speech.py
+++ b/python-server/speech.py
@@ -9,7 +9,7 @@ def async_create_transcript(filename):

 def create_and_save_transcript(filename):
    transcript = analyse(filename)
-    filesystem.save_transcript(transcript)
+    filesystem.save_transcript(filename, transcript)

 def analyse(filename):
    ''' returns the transcripted audio, or None if the analysis fails '''
@@ -18,7 +18,7 @@ def analyse(filename):
        audio = recognizer.record(source)

    try:
-        string = recognizer.recognize_google(audio)
+        string = recognizer.recognize_google(audio,language="de-DE")
    except spr.UnknownValueError:
        log.log("Audio file is broken or not an audio file")
        return None