Valyu integrates seamlessly with LlamaIndex as a comprehensive tool spec, allowing you to enhance your AI agents and RAG applications with real-time web search and proprietary data sources. The integration provides LLM-ready context from multiple sources including web pages, academic journals, financial data, and more.The package includes two main functions:
search(): Deep search operations with comprehensive parameter control
get_contents(): Extract clean content from specific URLs
The most powerful way to use Valyu is within LlamaIndex agents, where the AI can dynamically decide when and how to search:
import osfrom llama_index.agent.openai import OpenAIAgentfrom llama_index.tools.valyu import ValyuToolSpec# Set API keysos.environ["VALYU_API_KEY"] = "your-valyu-api-key"os.environ["OPENAI_API_KEY"] = "your-openai-api-key"# Initialize Valyu tool with comprehensive configurationvalyu_tool = ValyuToolSpec( api_key=os.environ["VALYU_API_KEY"], max_price=100, # Maximum cost (optional - adjusts automatically if not provided) fast_mode=True, # Enable fast mode for quicker responses # Contents API configuration contents_summary=True, # Enable AI summarization for content extraction contents_extract_effort="normal", # Extraction thoroughness contents_response_length="medium", # Content length per URL)# Create OpenAI agent with Valyu toolsagent = OpenAIAgent.from_tools( valyu_tool.to_tool_list(), verbose=True,)# Example 1: Deep search queryprint("=== Search Example ===")search_response = agent.chat( "What are the key considerations and empirical evidence for implementing statistical arbitrage strategies using cointegrated pairs trading, specifically focusing on the optimal lookback period for calculating correlation coefficients and the impact of transaction costs on strategy profitability in high-frequency trading environments?")print(search_response)# Example 2: URL content extractionprint("\n=== URL Content Extraction Example ===")content_response = agent.chat( "Please extract and summarize the content from these URLs: https://arxiv.org/abs/1706.03762 and https://en.wikipedia.org/wiki/Transformer_(machine_learning_model)")print(content_response)
The ValyuToolSpec supports extensive configuration during initialization:
from llama_index.tools.valyu import ValyuToolSpec# Initialize with comprehensive configurationvalyu_tool = ValyuToolSpec( api_key="your-api-key", verbose=True, # Search API parameters (set at initialization) max_price=100, # Maximum cost in dollars for search operations relevance_threshold=0.5, # Minimum relevance score (0.0-1.0) fast_mode=False, # Quality vs speed trade-off included_sources=["arxiv.org", "pubmed.ncbi.nlm.nih.gov"], # Include specific sources excluded_sources=["example.com", "example.org"], # Exclude sources response_length="medium", # "short", "medium", "large", "max", or int country_code="US", # 2-letter ISO country code for geo-bias # Contents API parameters contents_summary=True, # Enable AI summarization contents_extract_effort="high", # "normal", "high", or "auto" contents_response_length="large", # Content length per URL)# Search with time filtering (parameters set per search)results = valyu_tool.search( query="quantum computing breakthroughs 2024", search_type="all", # "all", "web", or "proprietary" max_num_results=10, start_date="2024-01-01", # Time-filtered search end_date="2024-12-31", fast_mode=None # Uses tool default (False in this case))
import osfrom llama_index.agent.openai import OpenAIAgentfrom llama_index.tools.valyu import ValyuToolSpec# Create specialized research agentresearch_tool = ValyuToolSpec( api_key=os.environ["VALYU_API_KEY"], max_price=100, included_sources=["arxiv.org", "pubmed.ncbi.nlm.nih.gov", "ieee.org"], response_length="large", relevance_threshold=0.7, fast_mode=False # Prioritize quality for research)research_agent = OpenAIAgent.from_tools( research_tool.to_tool_list(), verbose=True, system_prompt="You are a research specialist. Use Valyu to find authoritative academic sources and provide well-cited answers. Focus on peer-reviewed papers and scholarly articles.")# Create analysis agent for current eventsanalysis_tool = ValyuToolSpec( api_key=os.environ["VALYU_API_KEY"], max_price=100, excluded_sources=["example.com", "example.org", "example.net"], fast_mode=True, # Faster for current data response_length="medium", country_code="US")analysis_agent = OpenAIAgent.from_tools( analysis_tool.to_tool_list(), verbose=True, system_prompt="You are a market analyst. Use current data to provide insights and recommendations. Focus on authoritative news sources and financial data.")# Use agents for different purposesprint("=== Research Agent Example ===")research_response = research_agent.chat( "Find recent papers on transformer architecture improvements and summarize key innovations")print(research_response)print("\n=== Analysis Agent Example ===")analysis_response = analysis_agent.chat( "Analyze current market trends in AI chip demand and semiconductor industry")print(analysis_response)
import osfrom llama_index.agent.openai import OpenAIAgentfrom llama_index.tools.valyu import ValyuToolSpec# Create financial research agent with tailored configurationfinancial_tool = ValyuToolSpec( api_key=os.environ["VALYU_API_KEY"], max_price=100, fast_mode=True, # Financial data changes rapidly excluded_sources=["example.com", "example.org"], # Exclude specific domains response_length="medium", country_code="US", # US financial markets focus # Content extraction for financial reports contents_summary=True, contents_extract_effort="high", contents_response_length="large")financial_agent = OpenAIAgent.from_tools( financial_tool.to_tool_list(), verbose=True, system_prompt="""You are a financial research assistant. Use Valyu to search for: - Real-time market data and news - Academic research on financial models - Economic indicators and analysis - Financial reports and regulatory filings Always cite your sources and provide context about data recency.""")# Query financial marketsresponse = financial_agent.chat( "What are the latest developments in cryptocurrency regulation and their impact on institutional adoption? Include both recent news and academic research on the topic.")print(response)
from llama_index.core.agent.workflow import AgentWorkflowfrom llama_index.tools.valyu import ValyuToolSpecvalyu_tool = ValyuToolSpec(api_key=_require_env("VALYU_API_KEY"))# Optimize agent behavior with good system messagessystem_message = """You are an AI research assistant with access to Valyu search.SEARCH GUIDELINES:- Use search_type="proprietary" for academic/scientific queries- Use search_type="web" for current events and general web content- Use search_type="news" for news articles only- Use search_type="all" for comprehensive research- Set higher relevance_threshold (0.6+) for precise results- Use async/await patterns with AgentWorkflow for better performance- Do not use search operators (e.g., site:, OR, AND, quotes). Use natural keyword queries instead.- Always cite sources from search resultsRESPONSE FORMAT:- Provide direct answers based on search results- Include source citations with URLs when available- Mention publication dates for time-sensitive information- Indicate if information might be outdated"""agent = AgentWorkflow.from_tools_or_functions( tools_or_functions=valyu_tool.to_tool_list(), llm=llm, system_prompt=system_message)
For complete query writing guidelines, see the Prompting Guide.
max_price: Maximum cost in dollars for search operations. Only applies when provided. If not provided, adjusts automatically based on search type and max number of results.