spr

2026-01-22 08:17:39 +01:00 · 2018-08-27 11:45:22 +02:00
parent 8016a3dc92
commit ae5c3ba538
4 changed files with 177 additions and 18 deletions
--- a/python-server/transcribe_async.py
+++ b/python-server/transcribe_async.py
@@ -0,0 +1,102 @@
+#!/usr/bin/env python
+
+# Copyright 2017 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Google Cloud Speech API sample application using the REST API for async
+batch processing.
+
+Example usage:
+    python transcribe_async.py resources/audio.raw
+    python transcribe_async.py gs://cloud-samples-tests/speech/vr.flac
+"""
+
+import argparse
+import io
+
+
+# [START speech_transcribe_async]
+def transcribe_file(speech_file):
+    """Transcribe the given audio file asynchronously."""
+    from google.cloud import speech
+    from google.cloud.speech import enums
+    from google.cloud.speech import types
+    client = speech.SpeechClient()
+
+    # [START speech_python_migration_async_request]
+    with io.open(speech_file, 'rb') as audio_file:
+        content = audio_file.read()
+
+    audio = types.RecognitionAudio(content=content)
+    config = types.RecognitionConfig(
+        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
+        sample_rate_hertz=16000,
+        language_code='en-US')
+
+    # [START speech_python_migration_async_response]
+    operation = client.long_running_recognize(config, audio)
+    # [END speech_python_migration_async_request]
+
+    print('Waiting for operation to complete...')
+    response = operation.result(timeout=90)
+
+    # Each result is for a consecutive portion of the audio. Iterate through
+    # them to get the transcripts for the entire audio file.
+    for result in response.results:
+        # The first alternative is the most likely one for this portion.
+        print(u'Transcript: {}'.format(result.alternatives[0].transcript))
+        print('Confidence: {}'.format(result.alternatives[0].confidence))
+    # [END speech_python_migration_async_response]
+# [END speech_transcribe_async]
+
+
+# [START speech_transcribe_async_gcs]
+def transcribe_gcs(gcs_uri):
+    """Asynchronously transcribes the audio file specified by the gcs_uri."""
+    from google.cloud import speech
+    from google.cloud.speech import enums
+    from google.cloud.speech import types
+    client = speech.SpeechClient()
+
+    audio = types.RecognitionAudio(uri=gcs_uri)
+    config = types.RecognitionConfig(
+        #encoding=enums.RecognitionConfig.AudioEncoding.FLAC,
+        #sample_rate_hertz=16000,
+        language_code='de-DE')
+
+    operation = client.long_running_recognize(config, audio)
+
+    print('Waiting for operation to complete...')
+    response = operation.result(timeout=90)
+
+    # Each result is for a consecutive portion of the audio. Iterate through
+    # them to get the transcripts for the entire audio file.
+    for result in response.results:
+        # The first alternative is the most likely one for this portion.
+        print(u'Transcript: {}'.format(result.alternatives[0].transcript))
+        print('Confidence: {}'.format(result.alternatives[0].confidence))
+# [END speech_transcribe_async_gcs]
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter)
+    parser.add_argument(
+        'path', help='File or GCS path for audio file to be recognized')
+    args = parser.parse_args()
+    if args.path.startswith('gs://'):
+        transcribe_gcs(args.path)
+    else:
+        transcribe_file(args.path)