This commit is contained in:
2018-08-27 11:45:22 +02:00
parent 8016a3dc92
commit ae5c3ba538
4 changed files with 177 additions and 18 deletions

View File

@@ -5,6 +5,8 @@ from pydub import AudioSegment
def save_audio(filename, base64_string):
decoded = None
orig_filename = filename[:-4]+"_orig"
with open("latestBase64","wb") as f:
f.write(base64_string)
try:
decoded = base64.b64decode(base64_string)
except TypeError:
@@ -16,9 +18,10 @@ def save_audio(filename, base64_string):
AudioSegment.from_file(orig_filename).export(filename,format="wav")
return b"SUCCESS"
def save_audio_chain(filenames, base64_strings):
compleAudio = None
for fname in filenames:
def save_audio_chain(file_str_tupels):
completeAudio = None
for fname, base64_string in file_str_tupels:
print("Filename: {}".format(fname))
decoded = None
orig_filename = fname[:-4]+"_orig"
try:
@@ -27,14 +30,32 @@ def save_audio_chain(filenames, base64_strings):
return b"ERROR_INVALID_ENCODING_64"
with open(orig_filename,"wb") as f:
f.write(decoded)
if compleAudio == None:
if completeAudio == None:
completeAudio = AudioSegment.from_file(orig_filename)
else:
completeAudio += [AudioSegment.from_file(orig_filename)]
completeAudio.export(filenames[0],format="wav")
completeAudio += AudioSegment.from_file(orig_filename)
if not completeAudio:
return b"ERROR_AUDIO_CONCAT_FAILED"
else:
completeAudio.export(file_str_tupels[0][0],format="wav")
return b"SUCCESS"
def save_transcript(filename, transcript):
if os.path.isfile(filename):
pass
with open(filename + "_transcript","w") as f:
f.write(transcript)
def get_transcript(filename):
if os.path.isfile(filename):
with open(filename + "_transcript","r") as f:
return f.read()
def filelist():
return ""
def fileinfo(filename):
return ""
def copy_to_output(filename):
return ""

View File

@@ -5,15 +5,34 @@ MAIN_DIR = b"data/"
def parse_request(data):
''' parse request and call correct function '''
#return b"DUMMY"
print(data.split(b",")[0])
# echo/test connection #
cleared_data = is_data_type(b"ECHOREQUEST,",data)
if cleared_data:
return cleared_data
# reply transcript #
cleared_data = is_data_type(b"GET_TRANSCRIPT,",data)
if cleared_data:
filename = data.decode("utf-8")
return filesystem.get_transcript().encode("utf-8")
# get single file info #
cleared_data = is_data_type(b"GET_FILEINFO,",data)
if cleared_data:
filename = data.decode("utf-8")
return filesystem.fileinfo(filename).encode("utf-8")
# get single file info #
cleared_data = is_data_type(b"GET_FILEINFO_ALL,",data)
if cleared_data:
return filesystem.filelist().encode("utf-8")
# handle audio transmission #
cleared_data = is_data_type(b"AUDIO_TRANSMISSION,",data)
if cleared_data:
print("Handling audio transmission")
filename = None
try:
filename, base64_string = cleared_data.split(b',')
@@ -27,17 +46,20 @@ def parse_request(data):
# handle a chain of audiotransmissions #
if data.startswith(b"CHAIN_AUDIO_TRANSMISSION"):
files = []
base64_strings = []
for el in data.split(b"|"):
file_str_tuples = []
arr = data.split(b"|")
for el in arr[1:-1]:
filename, base64_string = el.split(b',')
filename = MAIN_DIR + filename.split(b"/")[-1] + b".wav"
filename = filename.decode("utf-8")
files += [filename]
base64_strings += [base64_string]
filesystem.save_audio_chain(files,base64_string);
speech.async_create_transcript(files[0])
return b"SUCCESS"
file_str_tuples += [(filename,base64_string)]
if len(file_str_tuples) < 2: # a chain has 2 or more elements
return bytes("ERROR_INVALID_NUMBER_FILES_{}".format(len(file_str_tuples)),"utf-8")
ret = filesystem.save_audio_chain(file_str_tuples);
speech.async_create_transcript(file_str_tuples[0][0])
return ret
# other shit
@@ -80,3 +102,4 @@ def recive_transcribe_request(audiofile):
def android_unittest_transcribe_request(audiofile):
''' the android unittests append a special keyword, requests are dummy handled '''
pass

View File

@@ -4,7 +4,11 @@ import os.path
import filesystem
import log
USE_FREE=False
USE_PAID=True
def async_create_transcript(filename):
print("Creating transcript..")
mp.Process(target=create_and_save_transcript,args=(filename,)).start()
def create_and_save_transcript(filename):
@@ -18,12 +22,21 @@ def analyse(filename):
audio = recognizer.record(source)
try:
string = recognizer.recognize_google(audio,language="de-DE")
if USE_FREE:
string = free_google_backend(recognizer, audio)
elif USE_PAID:
string = paid_google_backend(recognizer,audio)
except spr.UnknownValueError:
log.log("Audio file is broken or not an audio file")
return None
return "ERROR_AUDIO_FILE_INVALID"
except spr.RequestError as e:
log.log("Could not connect to google API: {}".format(e))
return None
return "ERROR_API_FAILURE"
return string
def free_google_backend(recognizer, audio):
return recognizer.recognize_google(audio,language="de-DE")
def paid_google_backend(recognizer, audio):
pass

View File

@@ -0,0 +1,102 @@
#!/usr/bin/env python
# Copyright 2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Google Cloud Speech API sample application using the REST API for async
batch processing.
Example usage:
python transcribe_async.py resources/audio.raw
python transcribe_async.py gs://cloud-samples-tests/speech/vr.flac
"""
import argparse
import io
# [START speech_transcribe_async]
def transcribe_file(speech_file):
"""Transcribe the given audio file asynchronously."""
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
client = speech.SpeechClient()
# [START speech_python_migration_async_request]
with io.open(speech_file, 'rb') as audio_file:
content = audio_file.read()
audio = types.RecognitionAudio(content=content)
config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code='en-US')
# [START speech_python_migration_async_response]
operation = client.long_running_recognize(config, audio)
# [END speech_python_migration_async_request]
print('Waiting for operation to complete...')
response = operation.result(timeout=90)
# Each result is for a consecutive portion of the audio. Iterate through
# them to get the transcripts for the entire audio file.
for result in response.results:
# The first alternative is the most likely one for this portion.
print(u'Transcript: {}'.format(result.alternatives[0].transcript))
print('Confidence: {}'.format(result.alternatives[0].confidence))
# [END speech_python_migration_async_response]
# [END speech_transcribe_async]
# [START speech_transcribe_async_gcs]
def transcribe_gcs(gcs_uri):
"""Asynchronously transcribes the audio file specified by the gcs_uri."""
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
client = speech.SpeechClient()
audio = types.RecognitionAudio(uri=gcs_uri)
config = types.RecognitionConfig(
#encoding=enums.RecognitionConfig.AudioEncoding.FLAC,
#sample_rate_hertz=16000,
language_code='de-DE')
operation = client.long_running_recognize(config, audio)
print('Waiting for operation to complete...')
response = operation.result(timeout=90)
# Each result is for a consecutive portion of the audio. Iterate through
# them to get the transcripts for the entire audio file.
for result in response.results:
# The first alternative is the most likely one for this portion.
print(u'Transcript: {}'.format(result.alternatives[0].transcript))
print('Confidence: {}'.format(result.alternatives[0].confidence))
# [END speech_transcribe_async_gcs]
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument(
'path', help='File or GCS path for audio file to be recognized')
args = parser.parse_args()
if args.path.startswith('gs://'):
transcribe_gcs(args.path)
else:
transcribe_file(args.path)