The Rust SDK is currently in alpha. The API is stable, but some features and interfaces may change based on user feedback.
The Contents API enables you to extract clean, structured content from web pages with optional AI-powered processing, including summarization and structured data extraction.
Basic Usage
use valyu::{ValyuClient, ContentsRequest};
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let client = ValyuClient::new("your-api-key");
let request = ContentsRequest::new(vec![
"https://en.wikipedia.org/wiki/Machine_learning".to_string(),
]);
let response = client.contents(&request).await?;
println!("Processed {}/{} URLs",
response.urls_processed.unwrap_or(0),
response.urls_requested.unwrap_or(0));
if let Some(results) = &response.results {
for result in results {
println!("Title: {}", result.title.as_deref().unwrap_or("Untitled"));
println!("Content length: {} characters", result.length.unwrap_or(0));
if let Some(content) = &result.content {
println!("Content preview: {:?}...", content);
}
}
}
Ok(())
}
Parameters
URLs (Required)
| Parameter | Type | Description |
|---|
urls | Vec<String> | Array of URLs to process (maximum 10 URLs) |
Builder Methods (Optional)
| Method | Type | Description | Default |
|---|
with_response_length() | impl Into<String> | Content length: "short" (25k), "medium" (50k), "large" (100k), "max" | "short" |
with_custom_response_length() | i32 | Custom character limit (1K-1M) | None |
with_extract_effort() | impl Into<String> | Processing effort: "normal", "high", or "auto" | "normal" |
with_summary() | bool | Enable/disable default summarization | false |
with_summary_instructions() | impl Into<String> | Custom summary instructions | None |
with_summary_schema() | serde_json::Value | JSON schema for structured extraction | None |
with_max_price_dollars() | f64 | Maximum price in dollars | None |
pub struct ContentsResponse {
pub success: bool,
pub error: Option<String>,
pub tx_id: Option<String>,
pub urls_requested: Option<i32>,
pub urls_processed: Option<i32>,
pub urls_failed: Option<i32>,
pub results: Option<Vec<ContentResult>>,
pub total_cost_dollars: Option<f64>,
pub total_characters: Option<i32>,
}
pub struct ContentResult {
pub url: Option<String>,
pub title: Option<String>,
pub content: Option<serde_json::Value>, // String for raw, object for structured
pub length: Option<i32>,
pub source: Option<String>,
pub summary_success: Option<bool>,
pub data_type: Option<String>,
pub image_url: Option<serde_json::Value>,
pub citation: Option<String>,
}
Parameter Examples
Extract clean content without AI processing:
let request = ContentsRequest::new(vec![
"https://www.python.org".to_string(),
"https://nodejs.org".to_string(),
]);
let response = client.contents(&request).await?;
if let Some(results) = &response.results {
for result in results {
println!("{}: {} characters",
result.title.as_deref().unwrap_or("Untitled"),
result.length.unwrap_or(0));
}
}
AI Summary
Get automatic AI summaries of the extracted content:
let request = ContentsRequest::new(vec![
"https://en.wikipedia.org/wiki/Artificial_intelligence".to_string(),
])
.with_summary(true)
.with_response_length("medium");
let response = client.contents(&request).await?;
if let Some(results) = &response.results {
if let Some(first) = results.first() {
if let Some(content) = &first.content {
println!("AI Summary: {}", content);
}
}
}
Custom Summary Instructions
Provide specific instructions for AI summarization:
let request = ContentsRequest::new(vec![
"https://techcrunch.com/latest-ai-news".to_string(),
])
.with_summary_instructions("Summarize the main AI trends mentioned in exactly 3 bullet points")
.with_response_length("medium")
.with_extract_effort("high");
let response = client.contents(&request).await?;
Extract specific data points using JSON schema:
use serde_json::json;
let company_schema = json!({
"type": "object",
"properties": {
"company_name": {
"type": "string",
"description": "The name of the company"
},
"industry": {
"type": "string",
"enum": ["tech", "finance", "healthcare", "retail", "other"],
"description": "Primary industry sector"
},
"key_products": {
"type": "array",
"items": {"type": "string"},
"maxItems": 5,
"description": "Main products or services"
},
"founded_year": {
"type": "number",
"description": "Year the company was founded"
}
},
"required": ["company_name", "industry"]
});
let request = ContentsRequest::new(vec![
"https://en.wikipedia.org/wiki/OpenAI".to_string(),
])
.with_summary_schema(company_schema)
.with_response_length("max")
.with_extract_effort("high");
let response = client.contents(&request).await?;
if let Some(results) = &response.results {
if let Some(first) = results.first() {
if let Some(content) = &first.content {
println!("Extracted data: {}", content);
}
}
}
Response Length Control
Control the amount of content extracted per URL:
// Predefined lengths
let short = ContentsRequest::new(urls.clone()).with_response_length("short"); // 25k chars
let medium = ContentsRequest::new(urls.clone()).with_response_length("medium"); // 50k chars
let large = ContentsRequest::new(urls.clone()).with_response_length("large"); // 100k chars
let max = ContentsRequest::new(urls.clone()).with_response_length("max"); // No limit
// Custom length
let custom = ContentsRequest::new(urls).with_custom_response_length(15000); // 15k chars
Control the extraction quality and processing intensity:
// Normal (default) - Fast
let normal = ContentsRequest::new(urls.clone()).with_extract_effort("normal");
// High - Enhanced quality for complex layouts and JS-heavy pages
let high = ContentsRequest::new(urls.clone()).with_extract_effort("high");
// Auto - Intelligent effort selection
let auto = ContentsRequest::new(urls).with_extract_effort("auto");
Use Case Examples
Research Paper Analysis
Build an AI-powered academic research assistant that extracts and analyzes research papers:
use serde_json::json;
use valyu::{ValyuClient, ContentsRequest};
async fn analyze_research_paper(
client: &ValyuClient,
paper_url: &str,
) -> Result<Option<serde_json::Value>, Box<dyn std::error::Error>> {
let paper_schema = json!({
"type": "object",
"properties": {
"title": {"type": "string"},
"authors": {
"type": "array",
"items": {"type": "string"}
},
"abstract": {"type": "string"},
"key_contributions": {
"type": "array",
"items": {"type": "string"},
"maxItems": 5,
"description": "Main contributions of the research"
},
"methodology": {
"type": "string",
"description": "Research methodology and approach"
},
"results_summary": {
"type": "string",
"description": "Summary of key findings and results"
},
"implications": {
"type": "string",
"description": "Broader implications and significance"
}
},
"required": ["title", "abstract", "key_contributions", "methodology"]
});
let request = ContentsRequest::new(vec![paper_url.to_string()])
.with_summary_schema(paper_schema)
.with_response_length("max")
.with_extract_effort("high");
let response = client.contents(&request).await?;
if response.success {
if let Some(results) = &response.results {
if let Some(first) = results.first() {
if let Some(content) = &first.content {
println!("=== Research Paper Analysis ===");
println!("{}", serde_json::to_string_pretty(content)?);
return Ok(Some(content.clone()));
}
}
}
}
Ok(None)
}
Technical Documentation Processor
Build a documentation analysis tool that extracts API information:
use serde_json::json;
let docs_schema = json!({
"type": "object",
"properties": {
"documentation_overview": {
"type": "string",
"description": "Overview of what the documentation covers"
},
"api_endpoints": {
"type": "array",
"items": {
"type": "object",
"properties": {
"method": {"type": "string"},
"path": {"type": "string"},
"description": {"type": "string"},
"parameters": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {"type": "string"},
"type": {"type": "string"},
"required": {"type": "boolean"},
"description": {"type": "string"}
}
}
}
}
}
},
"authentication": {
"type": "object",
"properties": {
"method": {"type": "string"},
"description": {"type": "string"}
}
},
"rate_limits": {"type": "string"}
},
"required": ["documentation_overview", "api_endpoints"]
});
let request = ContentsRequest::new(vec![
"https://docs.example.com/api-reference".to_string(),
])
.with_summary_schema(docs_schema)
.with_extract_effort("high")
.with_response_length("large");
let response = client.contents(&request).await?;
Multiple URLs Processing
let request = ContentsRequest::new(vec![
"https://www.valyu.ai/".to_string(),
"https://docs.valyu.ai/overview".to_string(),
"https://www.valyu.ai/blogs/ai-search-article".to_string(),
])
.with_summary_instructions("Provide key takeaways in bullet points")
.with_max_price_dollars(2.0);
let response = client.contents(&request).await?;
println!("Processed {}/{} URLs",
response.urls_processed.unwrap_or(0),
response.urls_requested.unwrap_or(0));
println!("Cost: ${:.4}", response.total_cost_dollars.unwrap_or(0.0));
Error Handling
use valyu::{ValyuClient, ContentsRequest, ValyuError};
let request = ContentsRequest::new(vec!["https://example.com".to_string()]);
match client.contents(&request).await {
Ok(response) => {
if !response.success {
eprintln!("Contents extraction failed: {:?}", response.error);
return;
}
// Check for partial failures
if let Some(failed) = response.urls_failed {
if failed > 0 {
println!("{} of {} URLs failed",
failed,
response.urls_requested.unwrap_or(0));
}
}
// Process successful results
if let Some(results) = &response.results {
for (i, result) in results.iter().enumerate() {
println!("Result {}:", i + 1);
println!(" Title: {}", result.title.as_deref().unwrap_or("Untitled"));
println!(" URL: {}", result.url.as_deref().unwrap_or("N/A"));
println!(" Length: {} characters", result.length.unwrap_or(0));
if result.summary_success.unwrap_or(false) {
if let Some(content) = &result.content {
println!(" Summary: {}", content);
}
}
}
}
}
Err(ValyuError::InvalidApiKey) => eprintln!("Invalid API key"),
Err(ValyuError::RateLimitExceeded) => eprintln!("Rate limit exceeded"),
Err(e) => eprintln!("Error: {}", e),
}