Skip to main content
The Rust SDK is in alpha. The API is stable, but some interfaces may change based on feedback.
Extract clean, structured content from web pages, with optional AI summarization and structured data extraction.

Wire up Valyu Contents extraction with the Rust SDK (alpha).

Open in Cursor

Basic usage

use valyu::{ValyuClient, ContentsRequest};

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let client = ValyuClient::new("your-api-key");

    let request = ContentsRequest::new(vec![
        "https://en.wikipedia.org/wiki/Machine_learning".to_string(),
    ]);

    let response = client.contents(&request).await?;

    println!("Processed {}/{} URLs",
        response.urls_processed.unwrap_or(0),
        response.urls_requested.unwrap_or(0));

    if let Some(results) = &response.results {
        for result in results {
            println!("{} ({} chars)",
                result.title.as_deref().unwrap_or("Untitled"),
                result.length.unwrap_or(0));
        }
    }

    Ok(())
}

Common patterns

use serde_json::json;
use valyu::ContentsRequest;

// Auto AI summary
let req = ContentsRequest::new(urls.clone())
    .with_summary(true)
    .with_response_length("medium");

// Custom summary instruction
let req = ContentsRequest::new(urls.clone())
    .with_summary_instructions("Summarize the main trends in 3 bullet points")
    .with_extract_effort("high");

// Structured extraction - pass a JSON schema, get back structured fields
let schema = json!({
    "type": "object",
    "properties": {
        "company_name": {"type": "string"},
        "industry": {"type": "string"},
        "founded_year": {"type": "number"}
    },
    "required": ["company_name"]
});
let req = ContentsRequest::new(vec!["https://en.wikipedia.org/wiki/OpenAI".to_string()])
    .with_summary_schema(schema)
    .with_extract_effort("high");

let response = client.contents(&req).await?;
Use with_extract_effort("high") for JS-heavy pages, and with_response_length ("short" 25k, "medium" 50k, "large" 100k, "max") or with_custom_response_length(i32) to control content per URL.
For arXiv, PubMed Central, bioRxiv, medRxiv, and ChemRxiv papers, Valyu serves clean processed markdown (with figures and equations) from its academic index when your plan covers the source - otherwise it uses the live crawler. Pass the paper URL (a /pdf/ arXiv link or a DOI works best) or bare id. See Academic Papers.

Reference

urls (Vec<String>, required) - URLs to process (max 10).
MethodTypeDescriptionDefault
with_response_length()impl Into<String>"short" (25k), "medium" (50k), "large" (100k), "max""short"
with_custom_response_length()i32Custom character limit (1K-1M)None
with_extract_effort()impl Into<String>"normal", "high", or "auto""auto"
with_summary()boolEnable default summarizationfalse
with_summary_instructions()impl Into<String>Custom summary instructionsNone
with_summary_schema()serde_json::ValueJSON schema for structured extractionNone
with_max_price_dollars()f64Maximum price in dollarsNone
pub struct ContentsResponse {
    pub success: bool,
    pub error: Option<String>,
    pub tx_id: Option<String>,
    pub urls_requested: Option<i32>,
    pub urls_processed: Option<i32>,
    pub urls_failed: Option<i32>,
    pub results: Option<Vec<ContentResult>>,
    pub total_cost_dollars: Option<f64>,
    pub total_characters: Option<i32>,
}

pub struct ContentResult {
    pub url: Option<String>,
    pub title: Option<String>,
    pub content: Option<serde_json::Value>,  // String for raw, object for structured
    pub length: Option<i32>,
    pub source: Option<String>,
    pub status: Option<String>,              // "success" | "failed"
    pub error: Option<String>,               // Present when status is "failed"
    pub summary_success: Option<bool>,
    pub data_type: Option<String>,
    pub image_url: Option<serde_json::Value>,
    // Academic-index results (arXiv, PubMed, etc.) also populate:
    pub doi: Option<String>,
    pub authors: Option<Vec<String>>,
    pub citation_count: Option<i32>,
    pub source_type: Option<String>,         // e.g. "paper"
}
use valyu::{ContentsRequest, ValyuError};

let request = ContentsRequest::new(vec!["https://example.com".to_string()]);

match client.contents(&request).await {
    Ok(response) => {
        if !response.success {
            eprintln!("Extraction failed: {:?}", response.error);
            return;
        }
        if let Some(failed) = response.urls_failed {
            if failed > 0 {
                println!("{} URLs failed", failed);
            }
        }
        if let Some(results) = &response.results {
            for result in results {
                println!("{} ({} chars)",
                    result.title.as_deref().unwrap_or("Untitled"),
                    result.length.unwrap_or(0));
            }
        }
    }
    Err(ValyuError::InvalidApiKey) => eprintln!("Invalid API key"),
    Err(ValyuError::RateLimitExceeded) => eprintln!("Rate limit exceeded"),
    Err(e) => eprintln!("Error: {}", e),
}