Greetings! I am attempting to transcribe audio-files using OpenAI’s Whisper model. For audio files larger than approximately 400 KB, my Temporal Workflow fails with an error message “Failure exceeds size limit.”
In the following activities, the first two work perfectly. However, the last one fails:
@activity.defn
async def write_to_redpanda(file_name: str) -> None:
producer = KafkaProducer(bootstrap_servers=REDPANDA_BROKERS)
try:
# Produce the file name as a message to the Redpanda topic
future = producer.send(TOPIC_NAME, value=file_name.encode("utf-8"))
record_metadata = future.get(timeout=10) # Wait for the write acknowledgement
print(f"File name '{file_name}' written to Redpanda topic '{TOPIC_NAME}'")
print(f"Topic: {record_metadata.topic}, Partition: {record_metadata.partition}, Offset: {record_metadata.offset}")
except Exception as e:
print(f"Error writing file name to Redpanda: {str(e)}")
raise
finally:
producer.close()
@activity.defn
async def upload_to_minio(file_name: str) -> None:
# Create a MinIO client
minio_client = Minio(
MINIO_ENDPOINT,
access_key=MINIO_ACCESS_KEY,
secret_key=MINIO_SECRET_KEY,
secure=False # Set to True if using HTTPS
)
try:
# Ensure the bucket exists
if not minio_client.bucket_exists(AUDIO_BUCKET):
minio_client.make_bucket(AUDIO_BUCKET)
# Upload the file to MinIO
minio_client.fput_object(
bucket_name=AUDIO_BUCKET,
object_name=file_name,
file_path=file_name
)
print(f"File '{file_name}' uploaded to MinIO bucket '{AUDIO_BUCKET}'")
except Exception as e:
print(f"Error uploading file to MinIO: {str(e)}")
raise
@activity.defn
async def transcribe_audio(file_name: str) -> None:
# Initialize the Whisper processor
processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
# Initialize the Whisper model
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")
# Create a MinIO client
minio_client = Minio(
MINIO_ENDPOINT,
access_key=MINIO_ACCESS_KEY,
secret_key=MINIO_SECRET_KEY,
secure=False
)
try:
# Fetch the audio file from MinIO
audio_object = minio_client.get_object(AUDIO_BUCKET, file_name)
audio_data = io.BytesIO(audio_object.read())
# Transcribe the audio using Whisper
audio_array = audio_data.getvalue()
input_features = processor(audio_array, return_tensors="pt").input_features
predicted_ids = model.generate(input_features)
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
# Save the transcription to MinIO
transcription_file_name = f"{file_name}.txt"
minio_client.put_object(
TRANSCRIPTION_BUCKET,
transcription_file_name,
io.BytesIO(transcription.encode("utf-8")),
length=len(transcription),
content_type="text/plain",
)
# Send success message to Redpanda
producer = KafkaProducer(bootstrap_servers=REDPANDA_BROKERS)
success_message = f"Transcription successful for {file_name}"
producer.send(TOPIC_NAME, success_message.encode("utf-8"))
producer.flush()
except Exception as e:
# Send failure message to Redpanda
producer = KafkaProducer(bootstrap_servers=REDPANDA_BROKERS)
failure_message = f"Transcription failed for {file_name}: {str(e)}"
producer.send(TOPIC_NAME, failure_message.encode("utf-8"))
producer.flush()
raise
Thank you so much for your consideration.
Best regards,
Sina