Failure exceeds size limit

Sina_Sheikholeslami · March 30, 2024, 9:33pm

Greetings! I am attempting to transcribe audio-files using OpenAI’s Whisper model. For audio files larger than approximately 400 KB, my Temporal Workflow fails with an error message “Failure exceeds size limit.”

In the following activities, the first two work perfectly. However, the last one fails:

@activity.defn
async def write_to_redpanda(file_name: str) -> None:
    producer = KafkaProducer(bootstrap_servers=REDPANDA_BROKERS)
    
    try:
        # Produce the file name as a message to the Redpanda topic
        future = producer.send(TOPIC_NAME, value=file_name.encode("utf-8"))
        record_metadata = future.get(timeout=10)  # Wait for the write acknowledgement
        
        print(f"File name '{file_name}' written to Redpanda topic '{TOPIC_NAME}'")
        print(f"Topic: {record_metadata.topic}, Partition: {record_metadata.partition}, Offset: {record_metadata.offset}")
    
    except Exception as e:
        print(f"Error writing file name to Redpanda: {str(e)}")
        raise
    
    finally:
        producer.close()


@activity.defn
async def upload_to_minio(file_name: str) -> None:
    # Create a MinIO client
    minio_client = Minio(
        MINIO_ENDPOINT,
        access_key=MINIO_ACCESS_KEY,
        secret_key=MINIO_SECRET_KEY,
        secure=False  # Set to True if using HTTPS
    )

    try:
        # Ensure the bucket exists
        if not minio_client.bucket_exists(AUDIO_BUCKET):
            minio_client.make_bucket(AUDIO_BUCKET)

        # Upload the file to MinIO
        minio_client.fput_object(
            bucket_name=AUDIO_BUCKET,
            object_name=file_name,
            file_path=file_name
        )

        print(f"File '{file_name}' uploaded to MinIO bucket '{AUDIO_BUCKET}'")

    except Exception as e:
        print(f"Error uploading file to MinIO: {str(e)}")
        raise


@activity.defn
async def transcribe_audio(file_name: str) -> None:

    # Initialize the Whisper processor
    processor = WhisperProcessor.from_pretrained("openai/whisper-tiny")
    # Initialize the Whisper model
    model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny")

    # Create a MinIO client
    minio_client = Minio(
        MINIO_ENDPOINT,
        access_key=MINIO_ACCESS_KEY,
        secret_key=MINIO_SECRET_KEY,
        secure=False
    )

    try:
        # Fetch the audio file from MinIO
        audio_object = minio_client.get_object(AUDIO_BUCKET, file_name)
        audio_data = io.BytesIO(audio_object.read())

        # Transcribe the audio using Whisper
        audio_array = audio_data.getvalue()
        input_features = processor(audio_array, return_tensors="pt").input_features
        predicted_ids = model.generate(input_features)
        transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]

        # Save the transcription to MinIO
        transcription_file_name = f"{file_name}.txt"
        minio_client.put_object(
            TRANSCRIPTION_BUCKET,
            transcription_file_name,
            io.BytesIO(transcription.encode("utf-8")),
            length=len(transcription),
            content_type="text/plain",
        )

        # Send success message to Redpanda
        producer = KafkaProducer(bootstrap_servers=REDPANDA_BROKERS)
        success_message = f"Transcription successful for {file_name}"
        producer.send(TOPIC_NAME, success_message.encode("utf-8"))
        producer.flush()

    except Exception as e:
        # Send failure message to Redpanda
        producer = KafkaProducer(bootstrap_servers=REDPANDA_BROKERS)
        failure_message = f"Transcription failed for {file_name}: {str(e)}"
        producer.send(TOPIC_NAME, failure_message.encode("utf-8"))
        producer.flush()
        raise

Thank you so much for your consideration.

Best regards,
Sina

Topic		Replies	Views
GRPC message size limit Community Support	7	9697	December 7, 2020
What is the best way to exchange large amounts of data between Activities without running into "Complete result exceeds size limit" error? Community Support go-sdk	4	6958	April 18, 2024
RESOURCE_EXHAUSTED: Received message larger than max (4853862 vs. 4194304) Community Support go-sdk	11	5807	December 27, 2020
Find cause of "Complete result exceeds size limit" error Community Support	5	7188	May 28, 2023
Insight on Complete result exceeds size limit Community Support s3	6	4693	April 12, 2023

Failure exceeds size limit

Related topics