Failure exceeds size limit

Greetings! I am attempting to transcribe audio-files using OpenAI’s Whisper model. For audio files larger than approximately 400 KB, my Temporal Workflow fails with an error message “Failure exceeds size limit.”

In the following activities, the first two work perfectly. However, the last one fails:

@activity.defn
async def write_to_redpanda(file_name: str) -> None:
    producer = KafkaProducer(bootstrap_servers=REDPANDA_BROKERS)
    
    try:
        # Produce the file name as a message to the Redpanda topic
        future = producer.send(TOPIC_NAME, value=file_name.encode("utf-8"))
        record_metadata = future.get(timeout=10)  # Wait for the write acknowledgement
        
        print(f"File name '{file_name}' written to Redpanda topic '{TOPIC_NAME}'")
        print(f"Topic: {record_metadata.topic}, Partition: {record_metadata.partition}, Offset: {record_metadata.offset}")
    
    except Exception as e:
        print(f"Error writing file name to Redpanda: {str(e)}")
        raise
    
    finally:
        producer.close()


@activity.defn
async def upload_to_minio(file_name: str) -> None:
    # Create a MinIO client
    minio_client = Minio(
        MINIO_ENDPOINT,
        access_key=MINIO_ACCESS_KEY,
        secret_key=MINIO_SECRET_KEY,
        secure=False  # Set to True if using HTTPS
    )

    try:
        # Ensure the bucket exists
        if not minio_client.bucket_exists(AUDIO_BUCKET):
            minio_client.make_bucket(AUDIO_BUCKET)

        # Upload the file to MinIO
        minio_client.fput_object(
            bucket_name=AUDIO_BUCKET,
            object_name=file_name,
            file_path=file_name
        )

        print(f"File '{file_name}' uploaded to MinIO bucket '{AUDIO_BUCKET}'")

    except Exception as e:
        print(f"Error uploading file to MinIO: {str(e)}")
        raise


@activity.defn
async def transcribe_audio(file_name: str) -> None:

    # Initialize the Whisper processor
    processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
    # Initialize the Whisper model
    model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")

    # Create a MinIO client
    minio_client = Minio(
        MINIO_ENDPOINT,
        access_key=MINIO_ACCESS_KEY,
        secret_key=MINIO_SECRET_KEY,
        secure=False
    )

    try:
        # Fetch the audio file from MinIO
        audio_object = minio_client.get_object(AUDIO_BUCKET, file_name)
        audio_data = io.BytesIO(audio_object.read())

        # Transcribe the audio using Whisper
        audio_array = audio_data.getvalue()
        input_features = processor(audio_array, return_tensors="pt").input_features
        predicted_ids = model.generate(input_features)
        transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]

        # Save the transcription to MinIO
        transcription_file_name = f"{file_name}.txt"
        minio_client.put_object(
            TRANSCRIPTION_BUCKET,
            transcription_file_name,
            io.BytesIO(transcription.encode("utf-8")),
            length=len(transcription),
            content_type="text/plain",
        )

        # Send success message to Redpanda
        producer = KafkaProducer(bootstrap_servers=REDPANDA_BROKERS)
        success_message = f"Transcription successful for {file_name}"
        producer.send(TOPIC_NAME, success_message.encode("utf-8"))
        producer.flush()

    except Exception as e:
        # Send failure message to Redpanda
        producer = KafkaProducer(bootstrap_servers=REDPANDA_BROKERS)
        failure_message = f"Transcription failed for {file_name}: {str(e)}"
        producer.send(TOPIC_NAME, failure_message.encode("utf-8"))
        producer.flush()
        raise

Thank you so much for your consideration.

Best regards,
Sina