mirror of
https://github.com/FAUSheppy/ths-speech
synced 2025-12-07 14:51:35 +01:00
chain audio implemented
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,3 +1,4 @@
|
|||||||
*.swp
|
*.swp
|
||||||
|
data/
|
||||||
__pycache__/
|
__pycache__/
|
||||||
thermoscan-speech-android-app//
|
thermoscan-speech-android-app//
|
||||||
|
|||||||
40
python-server/filesystem.py
Normal file
40
python-server/filesystem.py
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
import base64
|
||||||
|
import os.path
|
||||||
|
from pydub import AudioSegment
|
||||||
|
|
||||||
|
def save_audio(filename, base64_string):
|
||||||
|
decoded = None
|
||||||
|
orig_filename = filename[:-4]+"_orig"
|
||||||
|
try:
|
||||||
|
decoded = base64.b64decode(base64_string)
|
||||||
|
except TypeError:
|
||||||
|
return b"ERROR_INVALID_ENCODING_64"
|
||||||
|
#if os.path.isfile(filename):
|
||||||
|
# return b"ERROR_FILE_EXISTS"
|
||||||
|
with open(orig_filename,"wb") as f:
|
||||||
|
f.write(decoded)
|
||||||
|
AudioSegment.from_file(orig_filename).export(filename,format="wav")
|
||||||
|
return b"SUCCESS"
|
||||||
|
|
||||||
|
def save_audio_chain(filenames, base64_strings):
|
||||||
|
compleAudio = None
|
||||||
|
for fname in filenames:
|
||||||
|
decoded = None
|
||||||
|
orig_filename = fname[:-4]+"_orig"
|
||||||
|
try:
|
||||||
|
decoded = base64.b64decode(base64_string)
|
||||||
|
except TypeError:
|
||||||
|
return b"ERROR_INVALID_ENCODING_64"
|
||||||
|
with open(orig_filename,"wb") as f:
|
||||||
|
f.write(decoded)
|
||||||
|
if compleAudio == None:
|
||||||
|
completeAudio = AudioSegment.from_file(orig_filename)
|
||||||
|
else:
|
||||||
|
completeAudio += [AudioSegment.from_file(orig_filename)]
|
||||||
|
completeAudio.export(filenames[0],format="wav")
|
||||||
|
|
||||||
|
def save_transcript(filename, transcript):
|
||||||
|
if os.path.isfile(filename):
|
||||||
|
pass
|
||||||
|
with open(filename + "_transcript","w") as f:
|
||||||
|
f.write(transcript)
|
||||||
@@ -25,6 +25,21 @@ def parse_request(data):
|
|||||||
speech.async_create_transcript(filename)
|
speech.async_create_transcript(filename)
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
# handle a chain of audiotransmissions #
|
||||||
|
if data.startswith(b"CHAIN_AUDIO_TRANSMISSION"):
|
||||||
|
files = []
|
||||||
|
base64_strings = []
|
||||||
|
for el in data.split(b"|"):
|
||||||
|
filename, base64_string = el.split(b',')
|
||||||
|
filename = MAIN_DIR + filename.split(b"/")[-1] + b".wav"
|
||||||
|
filename = filename.decode("utf-8")
|
||||||
|
files += [filename]
|
||||||
|
base64_strings += [base64_string]
|
||||||
|
filesystem.save_audio_chain(files,base64_string);
|
||||||
|
speech.async_create_transcript(files[0])
|
||||||
|
return b"SUCCESS"
|
||||||
|
|
||||||
|
|
||||||
# other shit
|
# other shit
|
||||||
return b"UNRECOGNIZED_SERVER_OPTION\n"
|
return b"UNRECOGNIZED_SERVER_OPTION\n"
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ def async_create_transcript(filename):
|
|||||||
|
|
||||||
def create_and_save_transcript(filename):
|
def create_and_save_transcript(filename):
|
||||||
transcript = analyse(filename)
|
transcript = analyse(filename)
|
||||||
filesystem.save_transcript(transcript)
|
filesystem.save_transcript(filename, transcript)
|
||||||
|
|
||||||
def analyse(filename):
|
def analyse(filename):
|
||||||
''' returns the transcripted audio, or None if the analysis fails '''
|
''' returns the transcripted audio, or None if the analysis fails '''
|
||||||
@@ -18,7 +18,7 @@ def analyse(filename):
|
|||||||
audio = recognizer.record(source)
|
audio = recognizer.record(source)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
string = recognizer.recognize_google(audio)
|
string = recognizer.recognize_google(audio,language="de-DE")
|
||||||
except spr.UnknownValueError:
|
except spr.UnknownValueError:
|
||||||
log.log("Audio file is broken or not an audio file")
|
log.log("Audio file is broken or not an audio file")
|
||||||
return None
|
return None
|
||||||
|
|||||||
Reference in New Issue
Block a user