Skip to content

Google Vision API

Google Vision API Complete Reference

This section provides comprehensive examples for all Google Vision API endpoints available in Vision Studio, including both Imagen and Gemini models, as well as VEO for video generation.

Prerequisites

import requests
import json
import base64

# Your API endpoint and key
API_URL = "http://localhost:8527/api/v1/google-vision"
API_KEY = "your-api-key"

headers = {
    "X-API-Key": API_KEY,
    "Content-Type": "application/json"
}

Imagen API Endpoints

1. Generate Images

Generate high-quality images using Google's Imagen models with various configurations.

Basic Image Generation
data = {
    "prompt": "A serene mountain landscape with a crystal-clear lake reflecting snow-capped peaks",
    "number_of_images": 2,
    "aspect_ratio": "16:9",
    "model_name": "imagen-4.0-fast-generate-preview-06-06",
    "use_fast_model": False,
    "add_watermark": False,
    "safety_filter_level": "BLOCK_MEDIUM_AND_ABOVE",
    "person_generation": "ALLOW_ADULT"
}

response = requests.post(
    f"{API_URL}/image/generation/generate-image",
    headers=headers,
    json=data
)
High-Quality Generation
data = {
    "prompt": "Ultra-detailed portrait of a wise old wizard with intricate robes and magical artifacts",
    "number_of_images": 1,
    "aspect_ratio": "3:4",
    "model_name": "imagen-4.0-ultra-generate-preview-06-06",  # Ultra model for highest quality
    "use_fast_model": False,
    "add_watermark": True,
    "safety_filter_level": "BLOCK_LOW_AND_ABOVE",
    "person_generation": "ALLOW_ALL"
}

response = requests.post(
    f"{API_URL}/image/generation/generate-image",
    headers=headers,
    json=data
)
Fast Generation
data = {
    "prompt": "Modern minimalist office space with natural lighting",
    "number_of_images": 4,
    "aspect_ratio": "1:1",
    "use_fast_model": True,  # Override model_name with fast generation
    "add_watermark": False,
    "safety_filter_level": "BLOCK_MEDIUM_AND_ABOVE",
    "person_generation": "DONT_ALLOW"
}

response = requests.post(
    f"{API_URL}/image/generation/generate-image",
    headers=headers,
    json=data
)

2. Generate with Image Guidance

Create images using reference images for style or subject guidance.

Style Guidance
data = {
    "guidance_type": "style",
    "ref_image_1_url": "https://example.com/artistic_style_reference.jpg",
    "ref_image_1_description": "impressionist painting style",  # 5-6 words max
    "prompt": "A bustling city street scene during golden hour",
    "aspect_ratio": "16:9",
    "safety_filter_level": "BLOCK_MEDIUM_AND_ABOVE",
    "person_generation": "ALLOW_ADULT"
}

response = requests.post(
    f"{API_URL}/image/generation/generate-with-image-guidance",
    headers=headers,
    json=data
)
Subject Guidance with Multiple References
data = {
    "guidance_type": "subject",
    "ref_image_1_url": "https://example.com/product_photo.jpg",
    "ref_image_1_description": "modern wireless headphones design",
    "ref_image_2_url": "https://example.com/additional_angle.jpg",
    "ref_image_2_description": "side profile view",
    "subject_type": "SUBJECT_TYPE_PRODUCT",
    "prompt": "Display the product in a premium lifestyle setting with dramatic lighting",
    "aspect_ratio": "4:3",
    "safety_filter_level": "BLOCK_MEDIUM_AND_ABOVE",
    "person_generation": "ALLOW_ADULT"
}

response = requests.post(
    f"{API_URL}/image/generation/generate-with-image-guidance",
    headers=headers,
    json=data
)
Animal Subject Guidance
data = {
    "guidance_type": "subject",
    "ref_image_1_url": "https://example.com/golden_retriever.jpg",
    "ref_image_1_description": "friendly golden retriever face",
    "subject_type": "SUBJECT_TYPE_ANIMAL",
    "prompt": "Show the dog playing in a beautiful autumn park with falling leaves",
    "aspect_ratio": "1:1",
    "safety_filter_level": "BLOCK_MEDIUM_AND_ABOVE",
    "person_generation": "ALLOW_ALL"
}

response = requests.post(
    f"{API_URL}/image/generation/generate-with-image-guidance",
    headers=headers,
    json=data
)

3. Generate Variations

Apply controlled editing to existing images using different control types.

Canny Edge Control
data = {
    "image_url": "https://example.com/original_photo.jpg",
    "prompt": "Transform into a cyberpunk-style neon cityscape while maintaining the same composition",
    "control_type": "CONTROL_TYPE_CANNY",
    "number_of_images": 2,
    "aspect_ratio": "16:9",
    "safety_filter_level": "BLOCK_MEDIUM_AND_ABOVE",
    "person_generation": "ALLOW_ADULT"
}

response = requests.post(
    f"{API_URL}/image/generation/generate-variation",
    headers=headers,
    json=data
)
Scribble Control
data = {
    "image_url": "https://example.com/sketch_or_drawing.jpg",
    "prompt": "Convert this sketch into a photorealistic landscape with dramatic sky and lighting",
    "control_type": "CONTROL_TYPE_SCRIBBLE",
    "number_of_images": 1,
    "aspect_ratio": "3:4",
    "safety_filter_level": "BLOCK_MEDIUM_AND_ABOVE",
    "person_generation": "DONT_ALLOW"
}

response = requests.post(
    f"{API_URL}/image/generation/generate-variation",
    headers=headers,
    json=data
)

Image Editing

1. Inpainting

Perform advanced inpainting operations to insert or remove content.

Insert Content with Foreground Masking
data = {
    "operation_type": "insert",
    "image_url": "https://example.com/room_scene.jpg",
    "prompt": "elegant leather armchair with warm lighting",
    "mask_mode": "MASK_MODE_FOREGROUND",
    "mask_dilation": 0.15,
    "safety_filter_level": "BLOCK_MEDIUM_AND_ABOVE",
    "person_generation": "DONT_ALLOW"
}

response = requests.post(
    f"{API_URL}/image/editing/inpaint",
    headers=headers,
    json=data
)
Remove Content with Semantic Classes
data = {
    "operation_type": "remove",
    "image_url": "https://example.com/street_scene.jpg",
    "removal_mode": "semantic",
    "segmentation_classes": [20, 21],  # Vehicle classes - see Google's segmentation class IDs
    "safety_filter_level": "BLOCK_MEDIUM_AND_ABOVE",
    "person_generation": "ALLOW_ALL"
}

response = requests.post(
    f"{API_URL}/image/editing/inpaint",
    headers=headers,
    json=data
)
Remove Content with Prompt
data = {
    "operation_type": "remove",
    "image_url": "https://example.com/photo_with_objects.jpg",
    "prompt": "remove the red car from the driveway",
    "removal_mode": "prompt",
    "safety_filter_level": "BLOCK_MEDIUM_AND_ABOVE",
    "person_generation": "ALLOW_ALL"
}

response = requests.post(
    f"{API_URL}/image/editing/inpaint",
    headers=headers,
    json=data
)
Insert with User-Provided Mask
data = {
    "operation_type": "insert",
    "image_url": "https://example.com/base_image.jpg",
    "prompt": "beautiful tropical plants and flowers",
    "mask_mode": "MASK_MODE_USER_PROVIDED",
    "mask_dilation": 0.08,
    "safety_filter_level": "BLOCK_MEDIUM_AND_ABOVE",
    "person_generation": "DONT_ALLOW"
}

response = requests.post(
    f"{API_URL}/image/editing/inpaint",
    headers=headers,
    json=data
)

2. Generate Background

Replace image backgrounds while preserving foreground subjects.

data = {
    "image_url": "https://example.com/portrait_subject.jpg",
    "prompt": "magical fantasy forest with glowing mushrooms and ethereal lighting",
    "seed": 12345,
    "number_of_images": 3,
    "safety_filter_level": "BLOCK_MEDIUM_AND_ABOVE",
    "person_generation": "ALLOW_ALL"
}

response = requests.post(
    f"{API_URL}/image/editing/generate-background",
    headers=headers,
    json=data
)

3. Expand Image (Outpainting)

Expand images by adding content around the edges with precise positioning control.

Center Positioning
data = {
    "image_url": "https://example.com/portrait.jpg",
    "prompt": "beautiful mountain landscape with pine trees",
    "target_width": 2000,
    "target_height": 2000,
    "number_of_images": 1,
    "mask_dilation": 0.05,
    "position": "center",
    "safety_filter_level": "BLOCK_MEDIUM_AND_ABOVE",
    "person_generation": "ALLOW_ALL"
}

response = requests.post(
    f"{API_URL}/image/editing/expand-image",
    headers=headers,
    json=data
)
Top Center Positioning for Sky Extension
data = {
    "image_url": "https://example.com/landscape.jpg",
    "prompt": "dramatic stormy clouds and lightning in the sky",
    "target_width": 1920,
    "target_height": 1080,
    "number_of_images": 2,
    "mask_dilation": 0.03,
    "position": "bottom-center",  # Original image at bottom, expand upward
    "safety_filter_level": "BLOCK_MEDIUM_AND_ABOVE",
    "person_generation": "DONT_ALLOW"
}

response = requests.post(
    f"{API_URL}/image/editing/expand-image",
    headers=headers,
    json=data
)
Custom Padding Control
data = {
    "image_url": "https://example.com/subject.jpg",
    "prompt": "modern architectural environment with clean lines",
    "target_width": 2500,
    "target_height": 2500,
    "number_of_images": 1,
    "mask_dilation": 0.04,
    "padding_left": 0.2,
    "padding_right": 0.1,
    "padding_top": 0.15,
    "padding_bottom": 0.25,
    "safety_filter_level": "BLOCK_MEDIUM_AND_ABOVE",
    "person_generation": "ALLOW_ADULT"
}

response = requests.post(
    f"{API_URL}/image/editing/expand-image",
    headers=headers,
    json=data
)

Gemini 2.0 Flash Endpoints

1. Generate Images

Generate images using Gemini 2.0 Flash with optional input images for composition.

Text-Only Generation
data = {
    "prompt": "Create 2 images of a futuristic city with flying cars and neon lights",
    "safety_method": "PROBABILITY",
    "safety_category": "HARM_CATEGORY_DANGEROUS_CONTENT",
    "safety_threshold": "BLOCK_MEDIUM_AND_ABOVE"
}

response = requests.post(
    f"{API_URL}/image/generation/gemini-generate-image",
    headers=headers,
    json=data
)
Image Composition
data = {
    "prompt": "Combine these images into an artistic collage with a vintage aesthetic",
    "image_urls": [
        "https://example.com/photo1.jpg",
        "https://example.com/photo2.jpg",
        "gs://cloud-samples-data/generative-ai/image/sample.jpg"  # GCS URI supported
    ],
    "safety_method": "PROBABILITY",
    "safety_category": "HARM_CATEGORY_HARASSMENT",
    "safety_threshold": "BLOCK_LOW_AND_ABOVE"
}

response = requests.post(
    f"{API_URL}/image/generation/gemini-generate-image",
    headers=headers,
    json=data
)
High Safety Configuration
data = {
    "prompt": "Generate a family-friendly cartoon illustration of animals in a forest",
    "safety_method": "SEVERITY",
    "safety_category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
    "safety_threshold": "BLOCK_LOW_AND_ABOVE"  # Strictest setting
}

response = requests.post(
    f"{API_URL}/image/generation/gemini-generate-image",
    headers=headers,
    json=data
)

2. Generate Image Sequences

Create rich content that mixes text explanations with generated images.

Tutorial with Visual Steps
data = {
    "prompt": "Explain how to plant a garden with step-by-step images showing each stage from soil preparation to harvesting",
    "safety_method": "PROBABILITY",
    "safety_category": "HARM_CATEGORY_DANGEROUS_CONTENT",
    "safety_threshold": "BLOCK_MEDIUM_AND_ABOVE"
}

response = requests.post(
    f"{API_URL}/image/generation/gemini-generate-image-sequence",
    headers=headers,
    json=data
)
Story with Illustrations
data = {
    "prompt": "Tell a short adventure story about a brave mouse exploring an enchanted forest, with illustrations for key scenes",
    "safety_method": "PROBABILITY",
    "safety_category": "HARM_CATEGORY_DANGEROUS_CONTENT",
    "safety_threshold": "BLOCK_MEDIUM_AND_ABOVE"
}

response = requests.post(
    f"{API_URL}/image/generation/gemini-generate-image-sequence",
    headers=headers,
    json=data
)
Scientific Explanation
data = {
    "prompt": "Explain the water cycle with detailed diagrams showing evaporation, condensation, precipitation, and collection",
    "safety_method": "SEVERITY",
    "safety_category": "HARM_CATEGORY_DANGEROUS_CONTENT",
    "safety_threshold": "BLOCK_ONLY_HIGH"
}

response = requests.post(
    f"{API_URL}/image/generation/gemini-generate-image-sequence",
    headers=headers,
    json=data
)

3. Edit Images

Edit images using text prompts with support for single-step or multi-turn editing.

Single-Step Editing
data = {
    "image_url": "https://example.com/portrait.jpg",
    "edit_prompt": "Add sunglasses and change the background to a beach scene with palm trees",
    "safety_method": "PROBABILITY",
    "safety_category": "HARM_CATEGORY_DANGEROUS_CONTENT",
    "safety_threshold": "BLOCK_MEDIUM_AND_ABOVE"
}

response = requests.post(
    f"{API_URL}/image/editing/gemini-edit-image",
    headers=headers,
    json=data
)
Multi-Turn Sequential Editing
data = {
    "image_url": "https://example.com/landscape.jpg",
    "edit_steps": "Add dramatic clouds to the sky, Increase color saturation, Add a rainbow arc, Apply vintage film grain effect",
    "safety_method": "PROBABILITY",
    "safety_category": "HARM_CATEGORY_DANGEROUS_CONTENT",
    "safety_threshold": "BLOCK_MEDIUM_AND_ABOVE"
}

response = requests.post(
    f"{API_URL}/image/editing/gemini-edit-image",
    headers=headers,
    json=data
)
GCS URI Support
data = {
    "image_url": "gs://cloud-samples-data/generative-ai/image/mirror.png",
    "edit_prompt": "Convert to black and white and add vintage film grain effect",
    "safety_method": "SEVERITY",
    "safety_category": "HARM_CATEGORY_HATE_SPEECH",
    "safety_threshold": "BLOCK_LOW_AND_ABOVE"
}

response = requests.post(
    f"{API_URL}/image/editing/gemini-edit-image",
    headers=headers,
    json=data
)

VEO Video Generation

Generate Videos

Create videos using Google's VEO models with various configurations.

Basic Video Generation
data = {
    "prompt": "A peaceful lake surrounded by autumn trees with gentle ripples on the water surface",
    "number_of_videos": 1,
    "aspect_ratio": "16:9",
    "duration": 10,  # seconds
    "person_generation": "allow",
    "enhance_prompt": True
}

# Note: VEO requires special authentication
veo_headers = {
    "X-API-Key": API_KEY,
    "Content-Type": "application/json"
}

response = requests.post(
    f"{API_URL}/video/generation/generate-video",
    headers=veo_headers,
    json=data
)
Video with Starting Image
data = {
    "prompt": "Transform this still image into a dynamic scene with flowing water and moving clouds",
    "start_image_url": "https://example.com/landscape.jpg",
    "number_of_videos": 2,
    "aspect_ratio": "9:16",  # Portrait for mobile
    "duration": 5,
    "person_generation": "dont_allow",
    "enhance_prompt": False
}

response = requests.post(
    f"{API_URL}/video/generation/generate-video",
    headers=veo_headers,
    json=data
)
Multiple Video Variations
data = {
    "prompt": "A bustling city street at night with neon lights reflecting on wet pavement",
    "number_of_videos": 4,
    "aspect_ratio": "16:9",
    "duration": 10,
    "person_generation": "allow_adult",
    "enhance_prompt": True
}

response = requests.post(
    f"{API_URL}/video/generation/generate-video",
    headers=veo_headers,
    json=data
)

Advanced Example: Concurrent Image Generation

Here's a simplified example of making multiple concurrent requests using asyncio and aiohttp for the Imagen generate-image endpoint:

import asyncio
import aiohttp
import time

async def generate_image_async(session, prompt, request_id):
    """Generate a single image asynchronously."""
    url = f"{API_URL}/image/generation/generate-image"

    data = {
        "prompt": prompt,
        "number_of_images": 1,
        "aspect_ratio": "1:1",
        "use_fast_model": True,  # Use fast model for better concurrency
        "add_watermark": False,
        "safety_filter_level": "BLOCK_MEDIUM_AND_ABOVE",
        "person_generation": "ALLOW_ADULT"
    }

    headers_async = {
        "X-API-Key": API_KEY,
        "Content-Type": "application/json"
    }

    start_time = time.time()

    try:
        async with session.post(url, headers=headers_async, json=data) as response:
            result = await response.json()
            duration = time.time() - start_time

            return {
                "request_id": request_id,
                "prompt": prompt,
                "status": response.status,
                "duration": duration,
                "result": result
            }
    except Exception as e:
        return {
            "request_id": request_id,
            "prompt": prompt,
            "status": -1,
            "duration": time.time() - start_time,
            "error": str(e)
        }

async def generate_multiple_images_concurrently():
    """Generate multiple images concurrently with different prompts."""
    prompts = [
        "A majestic mountain peak at sunset with golden light",
        "A serene beach with crystal clear turquoise water",
        "A mystical forest path covered in morning mist",
        "A modern city skyline reflected in a calm river",
        "A cozy coffee shop interior with warm lighting"
    ]

    # Configure session for concurrent requests
    timeout = aiohttp.ClientTimeout(total=120)  # 2 minute timeout
    connector = aiohttp.TCPConnector(limit=10)  # Allow up to 10 concurrent connections

    async with aiohttp.ClientSession(connector=connector, timeout=timeout) as session:
        # Create tasks for concurrent execution
        tasks = [
            generate_image_async(session, prompt, i)
            for i, prompt in enumerate(prompts)
        ]

        # Execute all requests concurrently
        print(f"Starting {len(tasks)} concurrent image generation requests...")
        start_time = time.time()

        results = await asyncio.gather(*tasks)

        total_time = time.time() - start_time
        print(f"Completed all requests in {total_time:.3f} seconds")

        # Process results
        successful = 0
        failed = 0

        for result in results:
            if result["status"] == 200:
                successful += 1
                if result["result"]["success"]:
                    urls = result["result"]["data"]["urls"]
                    print(f"✅ Request {result['request_id']}: Generated image in {result['duration']:.3f}s")
                    print(f"   Prompt: {result['prompt'][:50]}...")
                    print(f"   Image URL: {urls[0]}")
                else:
                    print(f"❌ Request {result['request_id']}: API error - {result['result']['message']}")
                    failed += 1
            else:
                failed += 1
                error_msg = result.get('error', f"HTTP {result['status']}")
                print(f"❌ Request {result['request_id']}: Failed - {error_msg}")

        print(f"\nSummary: {successful} successful, {failed} failed")
        if successful > 0:
            avg_duration = sum(r['duration'] for r in results if r['status'] == 200) / successful
            print(f"Average request time: {avg_duration:.3f}s")
            print(f"Requests per second: {len(results) / total_time:.2f}")

# Run the concurrent generation
if __name__ == "__main__":
    asyncio.run(generate_multiple_images_concurrently())

This concurrent example demonstrates: - Making multiple image generation requests simultaneously - Proper session configuration with timeouts and connection limits - Error handling for both HTTP errors and API errors - Performance metrics calculation - Using the fast model for better concurrency performance

The concurrent approach significantly reduces total processing time when generating multiple images compared to sequential requests.


Best Practices

  1. Use appropriate model versions: Choose between standard, fast, and ultra models based on your quality vs. speed requirements
  2. Optimize prompts: Be specific and detailed in your prompts for better results
  3. Handle safety filters: Implement proper error handling for content safety violations
  4. Manage concurrent requests: Use connection pooling and appropriate timeouts for concurrent operations
  5. Monitor usage: Track API usage and response times to optimize performance
  6. Image formats: Support both HTTP/HTTPS URLs and GCS URIs where applicable
  7. Async operations: Use async/await patterns for better performance in concurrent scenarios