mirror of
https://github.com/FAUSheppy/ths-speech
synced 2025-12-06 07:51:34 +01:00
spr
This commit is contained in:
@@ -5,6 +5,8 @@ from pydub import AudioSegment
|
||||
def save_audio(filename, base64_string):
|
||||
decoded = None
|
||||
orig_filename = filename[:-4]+"_orig"
|
||||
with open("latestBase64","wb") as f:
|
||||
f.write(base64_string)
|
||||
try:
|
||||
decoded = base64.b64decode(base64_string)
|
||||
except TypeError:
|
||||
@@ -16,9 +18,10 @@ def save_audio(filename, base64_string):
|
||||
AudioSegment.from_file(orig_filename).export(filename,format="wav")
|
||||
return b"SUCCESS"
|
||||
|
||||
def save_audio_chain(filenames, base64_strings):
|
||||
compleAudio = None
|
||||
for fname in filenames:
|
||||
def save_audio_chain(file_str_tupels):
|
||||
completeAudio = None
|
||||
for fname, base64_string in file_str_tupels:
|
||||
print("Filename: {}".format(fname))
|
||||
decoded = None
|
||||
orig_filename = fname[:-4]+"_orig"
|
||||
try:
|
||||
@@ -27,14 +30,32 @@ def save_audio_chain(filenames, base64_strings):
|
||||
return b"ERROR_INVALID_ENCODING_64"
|
||||
with open(orig_filename,"wb") as f:
|
||||
f.write(decoded)
|
||||
if compleAudio == None:
|
||||
if completeAudio == None:
|
||||
completeAudio = AudioSegment.from_file(orig_filename)
|
||||
else:
|
||||
completeAudio += [AudioSegment.from_file(orig_filename)]
|
||||
completeAudio.export(filenames[0],format="wav")
|
||||
completeAudio += AudioSegment.from_file(orig_filename)
|
||||
if not completeAudio:
|
||||
return b"ERROR_AUDIO_CONCAT_FAILED"
|
||||
else:
|
||||
completeAudio.export(file_str_tupels[0][0],format="wav")
|
||||
return b"SUCCESS"
|
||||
|
||||
def save_transcript(filename, transcript):
|
||||
if os.path.isfile(filename):
|
||||
pass
|
||||
with open(filename + "_transcript","w") as f:
|
||||
f.write(transcript)
|
||||
|
||||
def get_transcript(filename):
|
||||
if os.path.isfile(filename):
|
||||
with open(filename + "_transcript","r") as f:
|
||||
return f.read()
|
||||
|
||||
def filelist():
|
||||
return ""
|
||||
|
||||
def fileinfo(filename):
|
||||
return ""
|
||||
|
||||
def copy_to_output(filename):
|
||||
return ""
|
||||
|
||||
@@ -5,15 +5,34 @@ MAIN_DIR = b"data/"
|
||||
|
||||
def parse_request(data):
|
||||
''' parse request and call correct function '''
|
||||
|
||||
#return b"DUMMY"
|
||||
print(data.split(b",")[0])
|
||||
# echo/test connection #
|
||||
cleared_data = is_data_type(b"ECHOREQUEST,",data)
|
||||
if cleared_data:
|
||||
return cleared_data
|
||||
|
||||
# reply transcript #
|
||||
cleared_data = is_data_type(b"GET_TRANSCRIPT,",data)
|
||||
if cleared_data:
|
||||
filename = data.decode("utf-8")
|
||||
return filesystem.get_transcript().encode("utf-8")
|
||||
|
||||
# get single file info #
|
||||
cleared_data = is_data_type(b"GET_FILEINFO,",data)
|
||||
if cleared_data:
|
||||
filename = data.decode("utf-8")
|
||||
return filesystem.fileinfo(filename).encode("utf-8")
|
||||
|
||||
# get single file info #
|
||||
cleared_data = is_data_type(b"GET_FILEINFO_ALL,",data)
|
||||
if cleared_data:
|
||||
return filesystem.filelist().encode("utf-8")
|
||||
|
||||
# handle audio transmission #
|
||||
cleared_data = is_data_type(b"AUDIO_TRANSMISSION,",data)
|
||||
if cleared_data:
|
||||
print("Handling audio transmission")
|
||||
filename = None
|
||||
try:
|
||||
filename, base64_string = cleared_data.split(b',')
|
||||
@@ -27,17 +46,20 @@ def parse_request(data):
|
||||
|
||||
# handle a chain of audiotransmissions #
|
||||
if data.startswith(b"CHAIN_AUDIO_TRANSMISSION"):
|
||||
files = []
|
||||
base64_strings = []
|
||||
for el in data.split(b"|"):
|
||||
file_str_tuples = []
|
||||
arr = data.split(b"|")
|
||||
for el in arr[1:-1]:
|
||||
filename, base64_string = el.split(b',')
|
||||
filename = MAIN_DIR + filename.split(b"/")[-1] + b".wav"
|
||||
filename = filename.decode("utf-8")
|
||||
files += [filename]
|
||||
base64_strings += [base64_string]
|
||||
filesystem.save_audio_chain(files,base64_string);
|
||||
speech.async_create_transcript(files[0])
|
||||
return b"SUCCESS"
|
||||
file_str_tuples += [(filename,base64_string)]
|
||||
|
||||
if len(file_str_tuples) < 2: # a chain has 2 or more elements
|
||||
return bytes("ERROR_INVALID_NUMBER_FILES_{}".format(len(file_str_tuples)),"utf-8")
|
||||
|
||||
ret = filesystem.save_audio_chain(file_str_tuples);
|
||||
speech.async_create_transcript(file_str_tuples[0][0])
|
||||
return ret
|
||||
|
||||
|
||||
# other shit
|
||||
@@ -80,3 +102,4 @@ def recive_transcribe_request(audiofile):
|
||||
|
||||
def android_unittest_transcribe_request(audiofile):
|
||||
''' the android unittests append a special keyword, requests are dummy handled '''
|
||||
pass
|
||||
|
||||
@@ -4,7 +4,11 @@ import os.path
|
||||
import filesystem
|
||||
import log
|
||||
|
||||
USE_FREE=False
|
||||
USE_PAID=True
|
||||
|
||||
def async_create_transcript(filename):
|
||||
print("Creating transcript..")
|
||||
mp.Process(target=create_and_save_transcript,args=(filename,)).start()
|
||||
|
||||
def create_and_save_transcript(filename):
|
||||
@@ -18,12 +22,21 @@ def analyse(filename):
|
||||
audio = recognizer.record(source)
|
||||
|
||||
try:
|
||||
string = recognizer.recognize_google(audio,language="de-DE")
|
||||
if USE_FREE:
|
||||
string = free_google_backend(recognizer, audio)
|
||||
elif USE_PAID:
|
||||
string = paid_google_backend(recognizer,audio)
|
||||
except spr.UnknownValueError:
|
||||
log.log("Audio file is broken or not an audio file")
|
||||
return None
|
||||
return "ERROR_AUDIO_FILE_INVALID"
|
||||
except spr.RequestError as e:
|
||||
log.log("Could not connect to google API: {}".format(e))
|
||||
return None
|
||||
return "ERROR_API_FAILURE"
|
||||
|
||||
return string
|
||||
|
||||
def free_google_backend(recognizer, audio):
|
||||
return recognizer.recognize_google(audio,language="de-DE")
|
||||
|
||||
def paid_google_backend(recognizer, audio):
|
||||
pass
|
||||
|
||||
102
python-server/transcribe_async.py
Normal file
102
python-server/transcribe_async.py
Normal file
@@ -0,0 +1,102 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Copyright 2017 Google Inc. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Google Cloud Speech API sample application using the REST API for async
|
||||
batch processing.
|
||||
|
||||
Example usage:
|
||||
python transcribe_async.py resources/audio.raw
|
||||
python transcribe_async.py gs://cloud-samples-tests/speech/vr.flac
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import io
|
||||
|
||||
|
||||
# [START speech_transcribe_async]
|
||||
def transcribe_file(speech_file):
|
||||
"""Transcribe the given audio file asynchronously."""
|
||||
from google.cloud import speech
|
||||
from google.cloud.speech import enums
|
||||
from google.cloud.speech import types
|
||||
client = speech.SpeechClient()
|
||||
|
||||
# [START speech_python_migration_async_request]
|
||||
with io.open(speech_file, 'rb') as audio_file:
|
||||
content = audio_file.read()
|
||||
|
||||
audio = types.RecognitionAudio(content=content)
|
||||
config = types.RecognitionConfig(
|
||||
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
|
||||
sample_rate_hertz=16000,
|
||||
language_code='en-US')
|
||||
|
||||
# [START speech_python_migration_async_response]
|
||||
operation = client.long_running_recognize(config, audio)
|
||||
# [END speech_python_migration_async_request]
|
||||
|
||||
print('Waiting for operation to complete...')
|
||||
response = operation.result(timeout=90)
|
||||
|
||||
# Each result is for a consecutive portion of the audio. Iterate through
|
||||
# them to get the transcripts for the entire audio file.
|
||||
for result in response.results:
|
||||
# The first alternative is the most likely one for this portion.
|
||||
print(u'Transcript: {}'.format(result.alternatives[0].transcript))
|
||||
print('Confidence: {}'.format(result.alternatives[0].confidence))
|
||||
# [END speech_python_migration_async_response]
|
||||
# [END speech_transcribe_async]
|
||||
|
||||
|
||||
# [START speech_transcribe_async_gcs]
|
||||
def transcribe_gcs(gcs_uri):
|
||||
"""Asynchronously transcribes the audio file specified by the gcs_uri."""
|
||||
from google.cloud import speech
|
||||
from google.cloud.speech import enums
|
||||
from google.cloud.speech import types
|
||||
client = speech.SpeechClient()
|
||||
|
||||
audio = types.RecognitionAudio(uri=gcs_uri)
|
||||
config = types.RecognitionConfig(
|
||||
#encoding=enums.RecognitionConfig.AudioEncoding.FLAC,
|
||||
#sample_rate_hertz=16000,
|
||||
language_code='de-DE')
|
||||
|
||||
operation = client.long_running_recognize(config, audio)
|
||||
|
||||
print('Waiting for operation to complete...')
|
||||
response = operation.result(timeout=90)
|
||||
|
||||
# Each result is for a consecutive portion of the audio. Iterate through
|
||||
# them to get the transcripts for the entire audio file.
|
||||
for result in response.results:
|
||||
# The first alternative is the most likely one for this portion.
|
||||
print(u'Transcript: {}'.format(result.alternatives[0].transcript))
|
||||
print('Confidence: {}'.format(result.alternatives[0].confidence))
|
||||
# [END speech_transcribe_async_gcs]
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(
|
||||
description=__doc__,
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter)
|
||||
parser.add_argument(
|
||||
'path', help='File or GCS path for audio file to be recognized')
|
||||
args = parser.parse_args()
|
||||
if args.path.startswith('gs://'):
|
||||
transcribe_gcs(args.path)
|
||||
else:
|
||||
transcribe_file(args.path)
|
||||
Reference in New Issue
Block a user