mirror of
https://github.com/FAUSheppy/ths-speech
synced 2025-12-08 00:31:36 +01:00
spr
This commit is contained in:
@@ -5,6 +5,8 @@ from pydub import AudioSegment
|
|||||||
def save_audio(filename, base64_string):
|
def save_audio(filename, base64_string):
|
||||||
decoded = None
|
decoded = None
|
||||||
orig_filename = filename[:-4]+"_orig"
|
orig_filename = filename[:-4]+"_orig"
|
||||||
|
with open("latestBase64","wb") as f:
|
||||||
|
f.write(base64_string)
|
||||||
try:
|
try:
|
||||||
decoded = base64.b64decode(base64_string)
|
decoded = base64.b64decode(base64_string)
|
||||||
except TypeError:
|
except TypeError:
|
||||||
@@ -16,9 +18,10 @@ def save_audio(filename, base64_string):
|
|||||||
AudioSegment.from_file(orig_filename).export(filename,format="wav")
|
AudioSegment.from_file(orig_filename).export(filename,format="wav")
|
||||||
return b"SUCCESS"
|
return b"SUCCESS"
|
||||||
|
|
||||||
def save_audio_chain(filenames, base64_strings):
|
def save_audio_chain(file_str_tupels):
|
||||||
compleAudio = None
|
completeAudio = None
|
||||||
for fname in filenames:
|
for fname, base64_string in file_str_tupels:
|
||||||
|
print("Filename: {}".format(fname))
|
||||||
decoded = None
|
decoded = None
|
||||||
orig_filename = fname[:-4]+"_orig"
|
orig_filename = fname[:-4]+"_orig"
|
||||||
try:
|
try:
|
||||||
@@ -27,14 +30,32 @@ def save_audio_chain(filenames, base64_strings):
|
|||||||
return b"ERROR_INVALID_ENCODING_64"
|
return b"ERROR_INVALID_ENCODING_64"
|
||||||
with open(orig_filename,"wb") as f:
|
with open(orig_filename,"wb") as f:
|
||||||
f.write(decoded)
|
f.write(decoded)
|
||||||
if compleAudio == None:
|
if completeAudio == None:
|
||||||
completeAudio = AudioSegment.from_file(orig_filename)
|
completeAudio = AudioSegment.from_file(orig_filename)
|
||||||
else:
|
else:
|
||||||
completeAudio += [AudioSegment.from_file(orig_filename)]
|
completeAudio += AudioSegment.from_file(orig_filename)
|
||||||
completeAudio.export(filenames[0],format="wav")
|
if not completeAudio:
|
||||||
|
return b"ERROR_AUDIO_CONCAT_FAILED"
|
||||||
|
else:
|
||||||
|
completeAudio.export(file_str_tupels[0][0],format="wav")
|
||||||
|
return b"SUCCESS"
|
||||||
|
|
||||||
def save_transcript(filename, transcript):
|
def save_transcript(filename, transcript):
|
||||||
if os.path.isfile(filename):
|
if os.path.isfile(filename):
|
||||||
pass
|
pass
|
||||||
with open(filename + "_transcript","w") as f:
|
with open(filename + "_transcript","w") as f:
|
||||||
f.write(transcript)
|
f.write(transcript)
|
||||||
|
|
||||||
|
def get_transcript(filename):
|
||||||
|
if os.path.isfile(filename):
|
||||||
|
with open(filename + "_transcript","r") as f:
|
||||||
|
return f.read()
|
||||||
|
|
||||||
|
def filelist():
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def fileinfo(filename):
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def copy_to_output(filename):
|
||||||
|
return ""
|
||||||
|
|||||||
@@ -5,15 +5,34 @@ MAIN_DIR = b"data/"
|
|||||||
|
|
||||||
def parse_request(data):
|
def parse_request(data):
|
||||||
''' parse request and call correct function '''
|
''' parse request and call correct function '''
|
||||||
|
#return b"DUMMY"
|
||||||
|
print(data.split(b",")[0])
|
||||||
# echo/test connection #
|
# echo/test connection #
|
||||||
cleared_data = is_data_type(b"ECHOREQUEST,",data)
|
cleared_data = is_data_type(b"ECHOREQUEST,",data)
|
||||||
if cleared_data:
|
if cleared_data:
|
||||||
return cleared_data
|
return cleared_data
|
||||||
|
|
||||||
|
# reply transcript #
|
||||||
|
cleared_data = is_data_type(b"GET_TRANSCRIPT,",data)
|
||||||
|
if cleared_data:
|
||||||
|
filename = data.decode("utf-8")
|
||||||
|
return filesystem.get_transcript().encode("utf-8")
|
||||||
|
|
||||||
|
# get single file info #
|
||||||
|
cleared_data = is_data_type(b"GET_FILEINFO,",data)
|
||||||
|
if cleared_data:
|
||||||
|
filename = data.decode("utf-8")
|
||||||
|
return filesystem.fileinfo(filename).encode("utf-8")
|
||||||
|
|
||||||
|
# get single file info #
|
||||||
|
cleared_data = is_data_type(b"GET_FILEINFO_ALL,",data)
|
||||||
|
if cleared_data:
|
||||||
|
return filesystem.filelist().encode("utf-8")
|
||||||
|
|
||||||
# handle audio transmission #
|
# handle audio transmission #
|
||||||
cleared_data = is_data_type(b"AUDIO_TRANSMISSION,",data)
|
cleared_data = is_data_type(b"AUDIO_TRANSMISSION,",data)
|
||||||
if cleared_data:
|
if cleared_data:
|
||||||
|
print("Handling audio transmission")
|
||||||
filename = None
|
filename = None
|
||||||
try:
|
try:
|
||||||
filename, base64_string = cleared_data.split(b',')
|
filename, base64_string = cleared_data.split(b',')
|
||||||
@@ -27,17 +46,20 @@ def parse_request(data):
|
|||||||
|
|
||||||
# handle a chain of audiotransmissions #
|
# handle a chain of audiotransmissions #
|
||||||
if data.startswith(b"CHAIN_AUDIO_TRANSMISSION"):
|
if data.startswith(b"CHAIN_AUDIO_TRANSMISSION"):
|
||||||
files = []
|
file_str_tuples = []
|
||||||
base64_strings = []
|
arr = data.split(b"|")
|
||||||
for el in data.split(b"|"):
|
for el in arr[1:-1]:
|
||||||
filename, base64_string = el.split(b',')
|
filename, base64_string = el.split(b',')
|
||||||
filename = MAIN_DIR + filename.split(b"/")[-1] + b".wav"
|
filename = MAIN_DIR + filename.split(b"/")[-1] + b".wav"
|
||||||
filename = filename.decode("utf-8")
|
filename = filename.decode("utf-8")
|
||||||
files += [filename]
|
file_str_tuples += [(filename,base64_string)]
|
||||||
base64_strings += [base64_string]
|
|
||||||
filesystem.save_audio_chain(files,base64_string);
|
if len(file_str_tuples) < 2: # a chain has 2 or more elements
|
||||||
speech.async_create_transcript(files[0])
|
return bytes("ERROR_INVALID_NUMBER_FILES_{}".format(len(file_str_tuples)),"utf-8")
|
||||||
return b"SUCCESS"
|
|
||||||
|
ret = filesystem.save_audio_chain(file_str_tuples);
|
||||||
|
speech.async_create_transcript(file_str_tuples[0][0])
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
# other shit
|
# other shit
|
||||||
@@ -80,3 +102,4 @@ def recive_transcribe_request(audiofile):
|
|||||||
|
|
||||||
def android_unittest_transcribe_request(audiofile):
|
def android_unittest_transcribe_request(audiofile):
|
||||||
''' the android unittests append a special keyword, requests are dummy handled '''
|
''' the android unittests append a special keyword, requests are dummy handled '''
|
||||||
|
pass
|
||||||
|
|||||||
@@ -4,7 +4,11 @@ import os.path
|
|||||||
import filesystem
|
import filesystem
|
||||||
import log
|
import log
|
||||||
|
|
||||||
|
USE_FREE=False
|
||||||
|
USE_PAID=True
|
||||||
|
|
||||||
def async_create_transcript(filename):
|
def async_create_transcript(filename):
|
||||||
|
print("Creating transcript..")
|
||||||
mp.Process(target=create_and_save_transcript,args=(filename,)).start()
|
mp.Process(target=create_and_save_transcript,args=(filename,)).start()
|
||||||
|
|
||||||
def create_and_save_transcript(filename):
|
def create_and_save_transcript(filename):
|
||||||
@@ -18,12 +22,21 @@ def analyse(filename):
|
|||||||
audio = recognizer.record(source)
|
audio = recognizer.record(source)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
string = recognizer.recognize_google(audio,language="de-DE")
|
if USE_FREE:
|
||||||
|
string = free_google_backend(recognizer, audio)
|
||||||
|
elif USE_PAID:
|
||||||
|
string = paid_google_backend(recognizer,audio)
|
||||||
except spr.UnknownValueError:
|
except spr.UnknownValueError:
|
||||||
log.log("Audio file is broken or not an audio file")
|
log.log("Audio file is broken or not an audio file")
|
||||||
return None
|
return "ERROR_AUDIO_FILE_INVALID"
|
||||||
except spr.RequestError as e:
|
except spr.RequestError as e:
|
||||||
log.log("Could not connect to google API: {}".format(e))
|
log.log("Could not connect to google API: {}".format(e))
|
||||||
return None
|
return "ERROR_API_FAILURE"
|
||||||
|
|
||||||
return string
|
return string
|
||||||
|
|
||||||
|
def free_google_backend(recognizer, audio):
|
||||||
|
return recognizer.recognize_google(audio,language="de-DE")
|
||||||
|
|
||||||
|
def paid_google_backend(recognizer, audio):
|
||||||
|
pass
|
||||||
|
|||||||
102
python-server/transcribe_async.py
Normal file
102
python-server/transcribe_async.py
Normal file
@@ -0,0 +1,102 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
# Copyright 2017 Google Inc. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
"""Google Cloud Speech API sample application using the REST API for async
|
||||||
|
batch processing.
|
||||||
|
|
||||||
|
Example usage:
|
||||||
|
python transcribe_async.py resources/audio.raw
|
||||||
|
python transcribe_async.py gs://cloud-samples-tests/speech/vr.flac
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import io
|
||||||
|
|
||||||
|
|
||||||
|
# [START speech_transcribe_async]
|
||||||
|
def transcribe_file(speech_file):
|
||||||
|
"""Transcribe the given audio file asynchronously."""
|
||||||
|
from google.cloud import speech
|
||||||
|
from google.cloud.speech import enums
|
||||||
|
from google.cloud.speech import types
|
||||||
|
client = speech.SpeechClient()
|
||||||
|
|
||||||
|
# [START speech_python_migration_async_request]
|
||||||
|
with io.open(speech_file, 'rb') as audio_file:
|
||||||
|
content = audio_file.read()
|
||||||
|
|
||||||
|
audio = types.RecognitionAudio(content=content)
|
||||||
|
config = types.RecognitionConfig(
|
||||||
|
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
|
||||||
|
sample_rate_hertz=16000,
|
||||||
|
language_code='en-US')
|
||||||
|
|
||||||
|
# [START speech_python_migration_async_response]
|
||||||
|
operation = client.long_running_recognize(config, audio)
|
||||||
|
# [END speech_python_migration_async_request]
|
||||||
|
|
||||||
|
print('Waiting for operation to complete...')
|
||||||
|
response = operation.result(timeout=90)
|
||||||
|
|
||||||
|
# Each result is for a consecutive portion of the audio. Iterate through
|
||||||
|
# them to get the transcripts for the entire audio file.
|
||||||
|
for result in response.results:
|
||||||
|
# The first alternative is the most likely one for this portion.
|
||||||
|
print(u'Transcript: {}'.format(result.alternatives[0].transcript))
|
||||||
|
print('Confidence: {}'.format(result.alternatives[0].confidence))
|
||||||
|
# [END speech_python_migration_async_response]
|
||||||
|
# [END speech_transcribe_async]
|
||||||
|
|
||||||
|
|
||||||
|
# [START speech_transcribe_async_gcs]
|
||||||
|
def transcribe_gcs(gcs_uri):
|
||||||
|
"""Asynchronously transcribes the audio file specified by the gcs_uri."""
|
||||||
|
from google.cloud import speech
|
||||||
|
from google.cloud.speech import enums
|
||||||
|
from google.cloud.speech import types
|
||||||
|
client = speech.SpeechClient()
|
||||||
|
|
||||||
|
audio = types.RecognitionAudio(uri=gcs_uri)
|
||||||
|
config = types.RecognitionConfig(
|
||||||
|
#encoding=enums.RecognitionConfig.AudioEncoding.FLAC,
|
||||||
|
#sample_rate_hertz=16000,
|
||||||
|
language_code='de-DE')
|
||||||
|
|
||||||
|
operation = client.long_running_recognize(config, audio)
|
||||||
|
|
||||||
|
print('Waiting for operation to complete...')
|
||||||
|
response = operation.result(timeout=90)
|
||||||
|
|
||||||
|
# Each result is for a consecutive portion of the audio. Iterate through
|
||||||
|
# them to get the transcripts for the entire audio file.
|
||||||
|
for result in response.results:
|
||||||
|
# The first alternative is the most likely one for this portion.
|
||||||
|
print(u'Transcript: {}'.format(result.alternatives[0].transcript))
|
||||||
|
print('Confidence: {}'.format(result.alternatives[0].confidence))
|
||||||
|
# [END speech_transcribe_async_gcs]
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description=__doc__,
|
||||||
|
formatter_class=argparse.RawDescriptionHelpFormatter)
|
||||||
|
parser.add_argument(
|
||||||
|
'path', help='File or GCS path for audio file to be recognized')
|
||||||
|
args = parser.parse_args()
|
||||||
|
if args.path.startswith('gs://'):
|
||||||
|
transcribe_gcs(args.path)
|
||||||
|
else:
|
||||||
|
transcribe_file(args.path)
|
||||||
Reference in New Issue
Block a user