diff --git a/.gitignore b/.gitignore index aafc56f..dbf4d04 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ *.swp +data/ __pycache__/ thermoscan-speech-android-app// diff --git a/python-server/data/.empty b/python-server/data/.empty deleted file mode 100644 index e69de29..0000000 diff --git a/python-server/filesystem.py b/python-server/filesystem.py new file mode 100644 index 0000000..0ca7ac0 --- /dev/null +++ b/python-server/filesystem.py @@ -0,0 +1,40 @@ +import base64 +import os.path +from pydub import AudioSegment + +def save_audio(filename, base64_string): + decoded = None + orig_filename = filename[:-4]+"_orig" + try: + decoded = base64.b64decode(base64_string) + except TypeError: + return b"ERROR_INVALID_ENCODING_64" + #if os.path.isfile(filename): + # return b"ERROR_FILE_EXISTS" + with open(orig_filename,"wb") as f: + f.write(decoded) + AudioSegment.from_file(orig_filename).export(filename,format="wav") + return b"SUCCESS" + +def save_audio_chain(filenames, base64_strings): + compleAudio = None + for fname in filenames: + decoded = None + orig_filename = fname[:-4]+"_orig" + try: + decoded = base64.b64decode(base64_string) + except TypeError: + return b"ERROR_INVALID_ENCODING_64" + with open(orig_filename,"wb") as f: + f.write(decoded) + if compleAudio == None: + completeAudio = AudioSegment.from_file(orig_filename) + else: + completeAudio += [AudioSegment.from_file(orig_filename)] + completeAudio.export(filenames[0],format="wav") + +def save_transcript(filename, transcript): + if os.path.isfile(filename): + pass + with open(filename + "_transcript","w") as f: + f.write(transcript) diff --git a/python-server/server_interface.py b/python-server/server_interface.py index b9fba50..ece52e3 100644 --- a/python-server/server_interface.py +++ b/python-server/server_interface.py @@ -25,6 +25,21 @@ def parse_request(data): speech.async_create_transcript(filename) return ret + # handle a chain of audiotransmissions # + if data.startswith(b"CHAIN_AUDIO_TRANSMISSION"): + files = [] + base64_strings = [] + for el in data.split(b"|"): + filename, base64_string = el.split(b',') + filename = MAIN_DIR + filename.split(b"/")[-1] + b".wav" + filename = filename.decode("utf-8") + files += [filename] + base64_strings += [base64_string] + filesystem.save_audio_chain(files,base64_string); + speech.async_create_transcript(files[0]) + return b"SUCCESS" + + # other shit return b"UNRECOGNIZED_SERVER_OPTION\n" diff --git a/python-server/speech.py b/python-server/speech.py index 753b34a..387bfce 100644 --- a/python-server/speech.py +++ b/python-server/speech.py @@ -9,7 +9,7 @@ def async_create_transcript(filename): def create_and_save_transcript(filename): transcript = analyse(filename) - filesystem.save_transcript(transcript) + filesystem.save_transcript(filename, transcript) def analyse(filename): ''' returns the transcripted audio, or None if the analysis fails ''' @@ -18,7 +18,7 @@ def analyse(filename): audio = recognizer.record(source) try: - string = recognizer.recognize_google(audio) + string = recognizer.recognize_google(audio,language="de-DE") except spr.UnknownValueError: log.log("Audio file is broken or not an audio file") return None