Skip to content

Historical Data API

The qd.data() function provides a fluent interface for fetching historical market data.

Basic Usage

import qldata as qd

# Minimal query
df = qd.data("BTCUSDT", source="binance").last(30).resolution("1h").get()

# Full query with all options
df = qd.data(
    "BTCUSDT",              # Symbol(s)
    source="binance",       # Exchange
    category="spot"         # Market type
) \
    .last(30, "days") \     # Time range
    .resolution("1h") \     # Bar resolution
    .clean() \              # Clean data
    .fill_forward() \       # Fill gaps
    .get()                  # Execute

Function Signature

data module-attribute

data = UnifiedAPI()

Query Builder Methods

.last(n, unit="days")

Set the time range to the last N time units from now.

# Last 30 days (default unit)
.last(30)

# Last 7 days
.last(7, "days")

# Last 24 hours
.last(24, "hours")

# Last 60 minutes
.last(60, "minutes")

Parameters:

Parameter Type Default Description
n int - Number of time units
unit str "days" Time unit: "days", "hours", "minutes"

.range(start, end)

Set an explicit date range.

from datetime import datetime

# Specific date range
.range(
    datetime(2024, 1, 1),
    datetime(2024, 1, 31)
)

# With timezone
from datetime import timezone
.range(
    datetime(2024, 1, 1, tzinfo=timezone.utc),
    datetime(2024, 1, 31, tzinfo=timezone.utc)
)

Parameters:

Parameter Type Description
start datetime Start of range (inclusive)
end datetime End of range (inclusive)

Timezone Handling

If no timezone is provided, UTC is assumed. All returned timestamps are in UTC.


.resolution(timeframe)

Set the bar/candle resolution.

# Using string shortcuts
.resolution("1m")   # 1-minute
.resolution("5m")   # 5-minute
.resolution("15m")  # 15-minute
.resolution("1h")   # 1-hour
.resolution("4h")   # 4-hour
.resolution("1d")   # Daily
.resolution("1w")   # Weekly

# Using Timeframe enum
from qldata import Timeframe
.resolution(Timeframe.HOUR_1)

Supported Resolutions:

String Timeframe Enum Description
"1m" Timeframe.MINUTE_1 1 minute
"3m" Timeframe.MINUTE_3 3 minutes
"5m" Timeframe.MINUTE_5 5 minutes
"15m" Timeframe.MINUTE_15 15 minutes
"30m" Timeframe.MINUTE_30 30 minutes
"1h" Timeframe.HOUR_1 1 hour
"2h" Timeframe.HOUR_2 2 hours
"4h" Timeframe.HOUR_4 4 hours
"6h" Timeframe.HOUR_6 6 hours
"8h" Timeframe.HOUR_8 8 hours
"12h" Timeframe.HOUR_12 12 hours
"1d" Timeframe.DAY_1 1 day
"3d" Timeframe.DAY_3 3 days
"1w" Timeframe.WEEK_1 1 week
"1M" Timeframe.MONTH_1 1 month

.clean(**kwargs)

Apply adaptive data cleaning.

# Basic cleaning (sorts, deduplicates)
.clean()

# Aggressive cleaning
.clean(
    remove_invalid_prices=True,  # Remove zero/negative prices
    validate_ohlc=True,          # Validate OHLC relationships
    remove_outliers=True,        # Remove statistical outliers
    dropna_subset=None           # Columns to check for NaN
)

Parameters:

Parameter Type Default Description
remove_invalid_prices bool False Remove rows with zero or negative prices
validate_ohlc bool False Ensure high ≥ open,close ≥ low
remove_outliers bool False Remove statistical outliers (>3σ)
dropna_subset list[str] None Columns to check for NaN (default: OHLCV)

.fill_forward() / .fill_backward() / .interpolate()

Fill missing values.

# Forward fill (use last known value)
.fill_forward()

# Backward fill (use next known value)
.fill_backward()

# Linear interpolation
.interpolate()
.interpolate(method="linear")   # Default
.interpolate(method="time")     # Time-weighted

.resample(timeframe)

Resample bars to a different timeframe.

# Fetch 1-minute data, resample to hourly
df = qd.data("BTCUSDT", source="binance") \
    .last(1, "days") \
    .resolution("1m") \
    .resample("1h") \
    .get()

Upsampling Not Supported

You can only resample to larger timeframes (e.g., 1m → 1h). Resampling to smaller timeframes is not supported.


.get(**kwargs)

Execute the query and return data.

# Basic execution
df = query.get()

# With options
df = query.get(
    cache=True,         # Use disk cache
    validate=True,      # Validate data
    parallel=False,     # Parallel download (multi-symbol)
    workers=4           # Worker count for parallel
)

Parameters:

Parameter Type Default Description
cache bool Config default Use disk caching
validate bool Config default Validate returned data
parallel bool False Parallel download for multi-symbol
workers int 4 Number of parallel workers

Returns:

  • Single symbol: pandas.DataFrame with OHLCV data
  • Multiple symbols: dict[str, pandas.DataFrame]

Multi-Symbol Queries

Fetch data for multiple symbols at once:

# List of symbols
data = qd.data(["BTCUSDT", "ETHUSDT", "SOLUSDT"], source="binance") \
    .last(7) \
    .resolution("1d") \
    .get()

# Returns: {"BTCUSDT": df1, "ETHUSDT": df2, "SOLUSDT": df3}
for symbol, df in data.items():
    print(f"{symbol}: {len(df)} bars")

Parallel Downloads

For many symbols, use parallel downloads:

# Parallel with 4 workers
data = qd.data(symbols, source="binance") \
    .last(30) \
    .resolution("1h") \
    .get(parallel=True, workers=4)

Optimal Worker Count

  • For most cases, 4-8 workers is optimal
  • More workers may hit rate limits faster
  • Rate limiting is handled automatically

Return Value Format

Single Symbol

Returns a pandas.DataFrame:

df = qd.data("BTCUSDT", source="binance").last(7).resolution("1d").get()

print(df.columns)
# Index(['open', 'high', 'low', 'close', 'volume'], dtype='object')

print(df.index)
# DatetimeIndex(['2024-11-28 00:00:00+00:00', ...], dtype='datetime64[ns, UTC]', name='timestamp')

Multiple Symbols

Returns a dict[str, DataFrame]:

data = qd.data(["BTCUSDT", "ETHUSDT"], source="binance").last(7).resolution("1d").get()

print(type(data))
# <class 'dict'>

print(data.keys())
# dict_keys(['BTCUSDT', 'ETHUSDT'])

Examples

Example 1: Basic Data Fetch

import qldata as qd

# Fetch last 30 days of hourly BTC data
df = qd.data("BTCUSDT", source="binance", category="spot") \
    .last(30) \
    .resolution("1h") \
    .get()

print(f"Fetched {len(df)} bars")
print(f"Date range: {df.index[0]} to {df.index[-1]}")
print(df.head())

Example 2: Clean Data Pipeline

import qldata as qd

# Fetch and clean data
df = qd.data("BTCUSDT", source="binance") \
    .last(30) \
    .resolution("1h") \
    .clean(
        remove_invalid_prices=True,
        remove_outliers=True
    ) \
    .fill_forward() \
    .get()

# Verify no missing values
assert df.isna().sum().sum() == 0

Example 3: Resample Minute to Hourly

import qldata as qd

# Fetch 1-minute data and resample to hourly
df = qd.data("BTCUSDT", source="binance") \
    .last(1, "days") \
    .resolution("1m") \
    .clean() \
    .resample("1h") \
    .get()

print(f"Resampled to {len(df)} hourly bars")

Example 4: Multi-Symbol Comparison

import qldata as qd

# Fetch same timeframe for multiple symbols
symbols = ["BTCUSDT", "ETHUSDT", "SOLUSDT", "BNBUSDT"]

data = qd.data(symbols, source="binance") \
    .last(7) \
    .resolution("1d") \
    .clean() \
    .get(parallel=True, workers=4)

# Compare daily returns
for symbol, df in data.items():
    returns = (df['close'].iloc[-1] / df['close'].iloc[0] - 1) * 100
    print(f"{symbol}: {returns:+.2f}%")

Example 5: Futures vs Spot

import qldata as qd

# Compare spot and futures prices
spot = qd.data("BTCUSDT", source="binance", category="spot") \
    .last(7) \
    .resolution("1h") \
    .get()

futures = qd.data("BTCUSDT", source="binance", category="usdm") \
    .last(7) \
    .resolution("1h") \
    .get()

# Calculate basis (futures - spot)
basis = futures['close'] - spot['close']
print(f"Average basis: ${basis.mean():.2f}")

See Also