Skip to main content
The Rust SDK is currently in alpha. The API is stable, but some features and interfaces may change based on user feedback.
The Contents API enables you to extract clean, structured content from web pages with optional AI-powered processing, including summarization and structured data extraction.

Basic Usage

use valyu::{ValyuClient, ContentsRequest};

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let client = ValyuClient::new("your-api-key");

    let request = ContentsRequest::new(vec![
        "https://en.wikipedia.org/wiki/Machine_learning".to_string(),
    ]);

    let response = client.contents(&request).await?;

    println!("Processed {}/{} URLs",
        response.urls_processed.unwrap_or(0),
        response.urls_requested.unwrap_or(0));

    if let Some(results) = &response.results {
        for result in results {
            println!("Title: {}", result.title.as_deref().unwrap_or("Untitled"));
            println!("Content length: {} characters", result.length.unwrap_or(0));
            if let Some(content) = &result.content {
                println!("Content preview: {:?}...", content);
            }
        }
    }

    Ok(())
}

Parameters

URLs (Required)

ParameterTypeDescription
urlsVec<String>Array of URLs to process (maximum 10 URLs)

Builder Methods (Optional)

MethodTypeDescriptionDefault
with_response_length()impl Into<String>Content length: "short" (25k), "medium" (50k), "large" (100k), "max""short"
with_custom_response_length()i32Custom character limit (1K-1M)None
with_extract_effort()impl Into<String>Processing effort: "normal", "high", or "auto""normal"
with_summary()boolEnable/disable default summarizationfalse
with_summary_instructions()impl Into<String>Custom summary instructionsNone
with_summary_schema()serde_json::ValueJSON schema for structured extractionNone
with_max_price_dollars()f64Maximum price in dollarsNone

Response Format

pub struct ContentsResponse {
    pub success: bool,
    pub error: Option<String>,
    pub tx_id: Option<String>,
    pub urls_requested: Option<i32>,
    pub urls_processed: Option<i32>,
    pub urls_failed: Option<i32>,
    pub results: Option<Vec<ContentResult>>,
    pub total_cost_dollars: Option<f64>,
    pub total_characters: Option<i32>,
}

pub struct ContentResult {
    pub url: Option<String>,
    pub title: Option<String>,
    pub content: Option<serde_json::Value>,  // String for raw, object for structured
    pub length: Option<i32>,
    pub source: Option<String>,
    pub summary_success: Option<bool>,
    pub data_type: Option<String>,
    pub image_url: Option<serde_json::Value>,
    pub citation: Option<String>,
}

Parameter Examples

Basic Content Extraction

Extract clean content without AI processing:
let request = ContentsRequest::new(vec![
    "https://www.python.org".to_string(),
    "https://nodejs.org".to_string(),
]);

let response = client.contents(&request).await?;

if let Some(results) = &response.results {
    for result in results {
        println!("{}: {} characters",
            result.title.as_deref().unwrap_or("Untitled"),
            result.length.unwrap_or(0));
    }
}

AI Summary

Get automatic AI summaries of the extracted content:
let request = ContentsRequest::new(vec![
    "https://en.wikipedia.org/wiki/Artificial_intelligence".to_string(),
])
.with_summary(true)
.with_response_length("medium");

let response = client.contents(&request).await?;

if let Some(results) = &response.results {
    if let Some(first) = results.first() {
        if let Some(content) = &first.content {
            println!("AI Summary: {}", content);
        }
    }
}

Custom Summary Instructions

Provide specific instructions for AI summarization:
let request = ContentsRequest::new(vec![
    "https://techcrunch.com/latest-ai-news".to_string(),
])
.with_summary_instructions("Summarize the main AI trends mentioned in exactly 3 bullet points")
.with_response_length("medium")
.with_extract_effort("high");

let response = client.contents(&request).await?;

Structured Data Extraction

Extract specific data points using JSON schema:
use serde_json::json;

let company_schema = json!({
    "type": "object",
    "properties": {
        "company_name": {
            "type": "string",
            "description": "The name of the company"
        },
        "industry": {
            "type": "string",
            "enum": ["tech", "finance", "healthcare", "retail", "other"],
            "description": "Primary industry sector"
        },
        "key_products": {
            "type": "array",
            "items": {"type": "string"},
            "maxItems": 5,
            "description": "Main products or services"
        },
        "founded_year": {
            "type": "number",
            "description": "Year the company was founded"
        }
    },
    "required": ["company_name", "industry"]
});

let request = ContentsRequest::new(vec![
    "https://en.wikipedia.org/wiki/OpenAI".to_string(),
])
.with_summary_schema(company_schema)
.with_response_length("max")
.with_extract_effort("high");

let response = client.contents(&request).await?;

if let Some(results) = &response.results {
    if let Some(first) = results.first() {
        if let Some(content) = &first.content {
            println!("Extracted data: {}", content);
        }
    }
}

Response Length Control

Control the amount of content extracted per URL:
// Predefined lengths
let short = ContentsRequest::new(urls.clone()).with_response_length("short");   // 25k chars
let medium = ContentsRequest::new(urls.clone()).with_response_length("medium"); // 50k chars
let large = ContentsRequest::new(urls.clone()).with_response_length("large");   // 100k chars
let max = ContentsRequest::new(urls.clone()).with_response_length("max");       // No limit

// Custom length
let custom = ContentsRequest::new(urls).with_custom_response_length(15000);     // 15k chars

Extract Effort Levels

Control the extraction quality and processing intensity:
// Normal (default) - Fast
let normal = ContentsRequest::new(urls.clone()).with_extract_effort("normal");

// High - Enhanced quality for complex layouts and JS-heavy pages
let high = ContentsRequest::new(urls.clone()).with_extract_effort("high");

// Auto - Intelligent effort selection
let auto = ContentsRequest::new(urls).with_extract_effort("auto");

Use Case Examples

Research Paper Analysis

Build an AI-powered academic research assistant that extracts and analyzes research papers:
use serde_json::json;
use valyu::{ValyuClient, ContentsRequest};

async fn analyze_research_paper(
    client: &ValyuClient,
    paper_url: &str,
) -> Result<Option<serde_json::Value>, Box<dyn std::error::Error>> {
    let paper_schema = json!({
        "type": "object",
        "properties": {
            "title": {"type": "string"},
            "authors": {
                "type": "array",
                "items": {"type": "string"}
            },
            "abstract": {"type": "string"},
            "key_contributions": {
                "type": "array",
                "items": {"type": "string"},
                "maxItems": 5,
                "description": "Main contributions of the research"
            },
            "methodology": {
                "type": "string",
                "description": "Research methodology and approach"
            },
            "results_summary": {
                "type": "string",
                "description": "Summary of key findings and results"
            },
            "implications": {
                "type": "string",
                "description": "Broader implications and significance"
            }
        },
        "required": ["title", "abstract", "key_contributions", "methodology"]
    });

    let request = ContentsRequest::new(vec![paper_url.to_string()])
        .with_summary_schema(paper_schema)
        .with_response_length("max")
        .with_extract_effort("high");

    let response = client.contents(&request).await?;

    if response.success {
        if let Some(results) = &response.results {
            if let Some(first) = results.first() {
                if let Some(content) = &first.content {
                    println!("=== Research Paper Analysis ===");
                    println!("{}", serde_json::to_string_pretty(content)?);
                    return Ok(Some(content.clone()));
                }
            }
        }
    }

    Ok(None)
}

Technical Documentation Processor

Build a documentation analysis tool that extracts API information:
use serde_json::json;

let docs_schema = json!({
    "type": "object",
    "properties": {
        "documentation_overview": {
            "type": "string",
            "description": "Overview of what the documentation covers"
        },
        "api_endpoints": {
            "type": "array",
            "items": {
                "type": "object",
                "properties": {
                    "method": {"type": "string"},
                    "path": {"type": "string"},
                    "description": {"type": "string"},
                    "parameters": {
                        "type": "array",
                        "items": {
                            "type": "object",
                            "properties": {
                                "name": {"type": "string"},
                                "type": {"type": "string"},
                                "required": {"type": "boolean"},
                                "description": {"type": "string"}
                            }
                        }
                    }
                }
            }
        },
        "authentication": {
            "type": "object",
            "properties": {
                "method": {"type": "string"},
                "description": {"type": "string"}
            }
        },
        "rate_limits": {"type": "string"}
    },
    "required": ["documentation_overview", "api_endpoints"]
});

let request = ContentsRequest::new(vec![
    "https://docs.example.com/api-reference".to_string(),
])
.with_summary_schema(docs_schema)
.with_extract_effort("high")
.with_response_length("large");

let response = client.contents(&request).await?;

Multiple URLs Processing

let request = ContentsRequest::new(vec![
    "https://www.valyu.ai/".to_string(),
    "https://docs.valyu.ai/overview".to_string(),
    "https://www.valyu.ai/blogs/ai-search-article".to_string(),
])
.with_summary_instructions("Provide key takeaways in bullet points")
.with_max_price_dollars(2.0);

let response = client.contents(&request).await?;

println!("Processed {}/{} URLs",
    response.urls_processed.unwrap_or(0),
    response.urls_requested.unwrap_or(0));
println!("Cost: ${:.4}", response.total_cost_dollars.unwrap_or(0.0));

Error Handling

use valyu::{ValyuClient, ContentsRequest, ValyuError};

let request = ContentsRequest::new(vec!["https://example.com".to_string()]);

match client.contents(&request).await {
    Ok(response) => {
        if !response.success {
            eprintln!("Contents extraction failed: {:?}", response.error);
            return;
        }

        // Check for partial failures
        if let Some(failed) = response.urls_failed {
            if failed > 0 {
                println!("{} of {} URLs failed",
                    failed,
                    response.urls_requested.unwrap_or(0));
            }
        }

        // Process successful results
        if let Some(results) = &response.results {
            for (i, result) in results.iter().enumerate() {
                println!("Result {}:", i + 1);
                println!("  Title: {}", result.title.as_deref().unwrap_or("Untitled"));
                println!("  URL: {}", result.url.as_deref().unwrap_or("N/A"));
                println!("  Length: {} characters", result.length.unwrap_or(0));

                if result.summary_success.unwrap_or(false) {
                    if let Some(content) = &result.content {
                        println!("  Summary: {}", content);
                    }
                }
            }
        }
    }
    Err(ValyuError::InvalidApiKey) => eprintln!("Invalid API key"),
    Err(ValyuError::RateLimitExceeded) => eprintln!("Rate limit exceeded"),
    Err(e) => eprintln!("Error: {}", e),
}