Skip to main content
The Datasources API provides a tool manifest for AI agents to discover available data sources. Use it to understand what datasources are available before making search requests with includedSources or excludedSources.

Basic Usage

import { Valyu } from "valyu-js";

const valyu = new Valyu();

// List all available datasources
const response = await valyu.datasources.list();

console.log(`Found ${response.datasources?.length} datasources`);
response.datasources?.forEach(ds => {
  console.log(`${ds.id}: ${ds.name} (${ds.category})`);
});

Methods

datasources.list()

List all available datasources with optional category filtering.
const response = await valyu.datasources.list({ category: "research" });

Parameters

ParameterTypeDescriptionDefault
categoryDatasourceCategoryIdFilter by category (see categories below)undefined

Available Categories

CategoryDescription
researchAcademic papers (arXiv, PubMed, bioRxiv)
healthcareClinical trials, drug info, health data
marketsStocks, crypto, forex, ETFs
companySEC filings, earnings, insider trades
economicFRED, BLS, World Bank data
predictionsPolymarket, Kalshi
transportationUK Rail, ship tracking
legalCase law, legislation
politicsParliamentary data
patentsGlobal patent filings

datasources.categories()

List all available categories with dataset counts.
const response = await valyu.datasources.categories();

response.categories?.forEach(cat => {
  console.log(`${cat.id}: ${cat.name} (${cat.dataset_count} datasets)`);
});

Response Format

DatasourcesListResponse

interface DatasourcesListResponse {
  success: boolean;
  error?: string;
  datasources?: Datasource[];
}

interface Datasource {
  id: string;                              // e.g., "valyu/valyu-arxiv"
  name: string;                            // e.g., "Arxiv"
  description: string;                     // Full description
  category: DatasourceCategoryId;          // e.g., "research"
  type: string;                            // e.g., "paper", "dataset"
  modality: DatasourceModality[];          // e.g., ["text", "images"]
  topics: string[];                        // e.g., ["Research Papers", "Physics"]
  languages?: string[];                    // e.g., ["English"]
  source?: string;                         // Data provider
  example_queries: string[];               // Sample queries for few-shot prompting
  pricing: DatasourcePricing;              // Cost information
  response_schema?: Record<string, any>;   // JSON schema for responses
  update_frequency?: string;               // e.g., "Monthly", "Quarterly"
  size?: number;                           // Number of records
  coverage?: DatasourceCoverage;           // Date range coverage
}

interface DatasourcePricing {
  cpm: number;  // Cost per million tokens
}

interface DatasourceCoverage {
  start_date?: string | null;
  end_date?: string | null;
}

type DatasourceCategoryId =
  | "research"
  | "healthcare"
  | "patents"
  | "markets"
  | "company"
  | "economic"
  | "predictions"
  | "transportation"
  | "legal"
  | "politics";

type DatasourceModality = "text" | "images" | "tabular";

DatasourcesCategoriesResponse

interface DatasourcesCategoriesResponse {
  success: boolean;
  error?: string;
  categories?: DatasourceCategory[];
}

interface DatasourceCategory {
  id: string;              // e.g., "research"
  name: string;            // e.g., "Research & Academic"
  description: string;
  dataset_count: number;   // Number of datasources in category
}

Use Case Examples

Dynamic Source Discovery for AI Agents

Build agents that discover relevant datasources at runtime:
async function findRelevantSources(queryDomain: string): Promise<string[]> {
  const valyu = new Valyu();

  // Map query domains to categories
  const domainToCategory: Record<string, DatasourceCategoryId> = {
    academic: "research",
    medical: "healthcare",
    financial: "markets",
    corporate: "company",
    economic: "economic",
  };

  const category = domainToCategory[queryDomain];
  const response = await valyu.datasources.list({ category });

  if (response.success && response.datasources) {
    return response.datasources.map(ds => ds.id);
  }
  return [];
}

// Use discovered sources in search
const sources = await findRelevantSources("academic");
const searchResponse = await valyu.search(
  "transformer architecture improvements",
  { includedSources: sources }
);

Few-Shot Prompting with Example Queries

Use example_queries from datasources to improve search quality:
async function getExampleQueries(category: DatasourceCategoryId): Promise<string[]> {
  const valyu = new Valyu();
  const response = await valyu.datasources.list({ category });

  const examples: string[] = [];
  if (response.success && response.datasources) {
    for (const ds of response.datasources) {
      if (ds.example_queries) {
        examples.push(...ds.example_queries.slice(0, 2));
      }
    }
  }
  return examples;
}

// Get examples for research queries
const researchExamples = await getExampleQueries("research");
console.log("Example research queries:");
researchExamples.forEach(example => {
  console.log(`  - ${example}`);
});

Cost Estimation

Estimate costs before making search requests:
interface CostEstimate {
  sources: number;
  averageCpm: number;
  minCpm: number;
  maxCpm: number;
  details: Array<{ source: string; cpm: number; name: string }>;
}

async function estimateSearchCost(category: DatasourceCategoryId): Promise<CostEstimate | { error: string }> {
  const valyu = new Valyu();
  const response = await valyu.datasources.list({ category });

  if (!response.success) {
    return { error: response.error || "Unknown error" };
  }

  const costs = response.datasources
    ?.filter(ds => ds.pricing)
    .map(ds => ({
      source: ds.id,
      cpm: ds.pricing.cpm,
      name: ds.name
    })) || [];

  const cpms = costs.map(c => c.cpm);
  const avgCpm = cpms.length ? cpms.reduce((a, b) => a + b, 0) / cpms.length : 0;

  return {
    sources: costs.length,
    averageCpm: avgCpm,
    minCpm: cpms.length ? Math.min(...cpms) : 0,
    maxCpm: cpms.length ? Math.max(...cpms) : 0,
    details: costs
  };
}

// Check costs for financial data
const costs = await estimateSearchCost("markets");
if ("averageCpm" in costs) {
  console.log(`Average CPM for markets: $${costs.averageCpm.toFixed(2)}`);
}

List All Sources by Category

Get a complete overview of available data:
async function listAllSources(): Promise<void> {
  const valyu = new Valyu();

  // Get categories first
  const categoriesResponse = await valyu.datasources.categories();

  if (!categoriesResponse.success || !categoriesResponse.categories) {
    console.error(`Error: ${categoriesResponse.error}`);
    return;
  }

  for (const cat of categoriesResponse.categories) {
    console.log(`\n${cat.name} (${cat.dataset_count} sources)`);
    console.log("-".repeat(40));

    // Get datasources for this category
    const sourcesResponse = await valyu.datasources.list({
      category: cat.id as DatasourceCategoryId
    });

    if (sourcesResponse.success && sourcesResponse.datasources) {
      for (const ds of sourcesResponse.datasources) {
        const pricing = ds.pricing ? `$${ds.pricing.cpm.toFixed(1)} CPM` : "N/A";
        console.log(`  ${ds.id}: ${ds.name} [${pricing}]`);
      }
    }
  }
}

await listAllSources();

Error Handling

const response = await valyu.datasources.list({ category: "research" });

if (!response.success) {
  console.error(`Error fetching datasources: ${response.error}`);
} else {
  console.log(`Found ${response.datasources?.length} research datasources`);
  response.datasources?.forEach(ds => {
    console.log(`  - ${ds.id}: ${ds.name}`);
  });
}

Using with Search API

Once you’ve discovered relevant datasources, use them with the Search API:
// Discover research datasources
const datasources = await valyu.datasources.list({ category: "research" });
const researchSources = datasources.datasources?.map(ds => ds.id) || [];

// Use them in a search
const results = await valyu.search(
  "latest transformer architecture improvements",
  {
    includedSources: researchSources,
    maxNumResults: 10
  }
);
For more information on filtering by sources, see the Source Filtering Guide.