Skip to main content
The Datasources API provides a tool manifest for AI agents to discover available data sources. Use it to understand what datasources are available before making search requests with included_sources or excluded_sources.

Basic Usage

from valyu import Valyu

valyu = Valyu()

# List all available datasources
response = valyu.datasources()

print(f"Found {len(response.datasources)} datasources")
for ds in response.datasources:
    print(f"{ds.id}: {ds.name} ({ds.category})")

Methods

datasources()

List all available datasources with optional category filtering.
response = valyu.datasources(category="research")

Parameters

ParameterTypeDescriptionDefault
categorystr | NoneFilter by category (see categories below)None

Available Categories

CategoryDescription
researchAcademic papers (arXiv, PubMed, bioRxiv)
healthcareClinical trials, drug info, health data
marketsStocks, crypto, forex, ETFs
companySEC filings, earnings, insider trades
economicFRED, BLS, World Bank data
predictionsPolymarket, Kalshi
transportationUK Rail, ship tracking
legalCase law, legislation
politicsParliamentary data
patentsGlobal patent filings

datasources_categories()

List all available categories with dataset counts.
response = valyu.datasources_categories()

for cat in response.categories:
    print(f"{cat.id}: {cat.name} ({cat.dataset_count} datasets)")

Response Format

DatasourcesResponse

class DatasourcesResponse:
    success: bool
    error: Optional[str]
    datasources: List[Datasource]

class Datasource:
    id: str                              # e.g., "valyu/valyu-arxiv"
    name: str                            # e.g., "Arxiv"
    description: str                     # Full description
    category: str                        # e.g., "research"
    type: Optional[str]                  # e.g., "paper", "dataset"
    modality: Optional[List[str]]        # e.g., ["text", "images"]
    topics: Optional[List[str]]          # e.g., ["Research Papers", "Physics"]
    languages: Optional[List[str]]       # e.g., ["English"]
    source: Optional[str]                # Data provider
    example_queries: Optional[List[str]] # Sample queries for few-shot prompting
    pricing: Optional[DatasourcePricing] # Cost information
    response_schema: Optional[dict]      # JSON schema for responses
    update_frequency: Optional[str]      # e.g., "Monthly", "Quarterly"
    size: Optional[int]                  # Number of records
    coverage: Optional[DatasourceCoverage] # Date range coverage

class DatasourcePricing:
    cpm: float  # Cost per million tokens

class DatasourceCoverage:
    start_date: Optional[str]
    end_date: Optional[str]

DatasourceCategoriesResponse

class DatasourceCategoriesResponse:
    success: bool
    error: Optional[str]
    categories: List[DatasourceCategory]

class DatasourceCategory:
    id: str              # e.g., "research"
    name: str            # e.g., "Research & Academic"
    description: Optional[str]
    dataset_count: int   # Number of datasources in category

Use Case Examples

Dynamic Source Discovery for AI Agents

Build agents that discover relevant datasources at runtime:
def find_relevant_sources(query_domain: str) -> List[str]:
    """Find datasources relevant to a query domain."""
    valyu = Valyu()

    # Map query domains to categories
    domain_to_category = {
        "academic": "research",
        "medical": "healthcare",
        "financial": "markets",
        "corporate": "company",
        "economic": "economic",
    }

    category = domain_to_category.get(query_domain)
    response = valyu.datasources(category=category)

    if response.success:
        return [ds.id for ds in response.datasources]
    return []

# Use discovered sources in search
sources = find_relevant_sources("academic")
search_response = valyu.search(
    "transformer architecture improvements",
    included_sources=sources
)

Few-Shot Prompting with Example Queries

Use example_queries from datasources to improve search quality:
def get_example_queries(category: str) -> List[str]:
    """Get example queries for a category to use in few-shot prompting."""
    valyu = Valyu()
    response = valyu.datasources(category=category)

    examples = []
    if response.success:
        for ds in response.datasources:
            if ds.example_queries:
                examples.extend(ds.example_queries[:2])
    return examples

# Get examples for research queries
research_examples = get_example_queries("research")
print("Example research queries:")
for example in research_examples:
    print(f"  - {example}")

Cost Estimation

Estimate costs before making search requests:
def estimate_search_cost(category: str) -> dict:
    """Estimate costs for searching a category."""
    valyu = Valyu()
    response = valyu.datasources(category=category)

    if not response.success:
        return {"error": response.error}

    costs = []
    for ds in response.datasources:
        if ds.pricing:
            costs.append({
                "source": ds.id,
                "cpm": ds.pricing.cpm,
                "name": ds.name
            })

    avg_cpm = sum(c["cpm"] for c in costs) / len(costs) if costs else 0

    return {
        "sources": len(costs),
        "average_cpm": avg_cpm,
        "min_cpm": min(c["cpm"] for c in costs) if costs else 0,
        "max_cpm": max(c["cpm"] for c in costs) if costs else 0,
        "details": costs
    }

# Check costs for financial data
costs = estimate_search_cost("markets")
print(f"Average CPM for markets: ${costs['average_cpm']:.2f}")

List All Sources by Category

Get a complete overview of available data:
def list_all_sources():
    """List all datasources organized by category."""
    valyu = Valyu()

    # Get categories first
    categories = valyu.datasources_categories()

    if not categories.success:
        print(f"Error: {categories.error}")
        return

    for cat in categories.categories:
        print(f"\n{cat.name} ({cat.dataset_count} sources)")
        print("-" * 40)

        # Get datasources for this category
        sources = valyu.datasources(category=cat.id)
        if sources.success:
            for ds in sources.datasources:
                pricing = f"${ds.pricing.cpm:.1f} CPM" if ds.pricing else "N/A"
                print(f"  {ds.id}: {ds.name} [{pricing}]")

list_all_sources()

Error Handling

response = valyu.datasources(category="research")

if not response.success:
    print(f"Error fetching datasources: {response.error}")
else:
    print(f"Found {len(response.datasources)} research datasources")
    for ds in response.datasources:
        print(f"  - {ds.id}: {ds.name}")

Using with Search API

Once you’ve discovered relevant datasources, use them with the Search API:
# Discover research datasources
datasources = valyu.datasources(category="research")
research_sources = [ds.id for ds in datasources.datasources]

# Use them in a search
results = valyu.search(
    "latest transformer architecture improvements",
    included_sources=research_sources,
    max_num_results=10
)
For more information on filtering by sources, see the Source Filtering Guide.