mirror of
https://github.com/FAUSheppy/ths-speech
synced 2025-12-06 08:41:35 +01:00
chain audio implemented
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,3 +1,4 @@
|
||||
*.swp
|
||||
data/
|
||||
__pycache__/
|
||||
thermoscan-speech-android-app//
|
||||
|
||||
40
python-server/filesystem.py
Normal file
40
python-server/filesystem.py
Normal file
@@ -0,0 +1,40 @@
|
||||
import base64
|
||||
import os.path
|
||||
from pydub import AudioSegment
|
||||
|
||||
def save_audio(filename, base64_string):
|
||||
decoded = None
|
||||
orig_filename = filename[:-4]+"_orig"
|
||||
try:
|
||||
decoded = base64.b64decode(base64_string)
|
||||
except TypeError:
|
||||
return b"ERROR_INVALID_ENCODING_64"
|
||||
#if os.path.isfile(filename):
|
||||
# return b"ERROR_FILE_EXISTS"
|
||||
with open(orig_filename,"wb") as f:
|
||||
f.write(decoded)
|
||||
AudioSegment.from_file(orig_filename).export(filename,format="wav")
|
||||
return b"SUCCESS"
|
||||
|
||||
def save_audio_chain(filenames, base64_strings):
|
||||
compleAudio = None
|
||||
for fname in filenames:
|
||||
decoded = None
|
||||
orig_filename = fname[:-4]+"_orig"
|
||||
try:
|
||||
decoded = base64.b64decode(base64_string)
|
||||
except TypeError:
|
||||
return b"ERROR_INVALID_ENCODING_64"
|
||||
with open(orig_filename,"wb") as f:
|
||||
f.write(decoded)
|
||||
if compleAudio == None:
|
||||
completeAudio = AudioSegment.from_file(orig_filename)
|
||||
else:
|
||||
completeAudio += [AudioSegment.from_file(orig_filename)]
|
||||
completeAudio.export(filenames[0],format="wav")
|
||||
|
||||
def save_transcript(filename, transcript):
|
||||
if os.path.isfile(filename):
|
||||
pass
|
||||
with open(filename + "_transcript","w") as f:
|
||||
f.write(transcript)
|
||||
@@ -25,6 +25,21 @@ def parse_request(data):
|
||||
speech.async_create_transcript(filename)
|
||||
return ret
|
||||
|
||||
# handle a chain of audiotransmissions #
|
||||
if data.startswith(b"CHAIN_AUDIO_TRANSMISSION"):
|
||||
files = []
|
||||
base64_strings = []
|
||||
for el in data.split(b"|"):
|
||||
filename, base64_string = el.split(b',')
|
||||
filename = MAIN_DIR + filename.split(b"/")[-1] + b".wav"
|
||||
filename = filename.decode("utf-8")
|
||||
files += [filename]
|
||||
base64_strings += [base64_string]
|
||||
filesystem.save_audio_chain(files,base64_string);
|
||||
speech.async_create_transcript(files[0])
|
||||
return b"SUCCESS"
|
||||
|
||||
|
||||
# other shit
|
||||
return b"UNRECOGNIZED_SERVER_OPTION\n"
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ def async_create_transcript(filename):
|
||||
|
||||
def create_and_save_transcript(filename):
|
||||
transcript = analyse(filename)
|
||||
filesystem.save_transcript(transcript)
|
||||
filesystem.save_transcript(filename, transcript)
|
||||
|
||||
def analyse(filename):
|
||||
''' returns the transcripted audio, or None if the analysis fails '''
|
||||
@@ -18,7 +18,7 @@ def analyse(filename):
|
||||
audio = recognizer.record(source)
|
||||
|
||||
try:
|
||||
string = recognizer.recognize_google(audio)
|
||||
string = recognizer.recognize_google(audio,language="de-DE")
|
||||
except spr.UnknownValueError:
|
||||
log.log("Audio file is broken or not an audio file")
|
||||
return None
|
||||
|
||||
Reference in New Issue
Block a user