COuld you help me to define a function to guarantee im not bypassing the API limits? I tried lots of thinks with the help of ChatGPT, but couldnt arrive in something that doesnt raise error of "reading timeout".
import time
import pandas as pd
def get_lseg_data(universe: list, fields: list, parameters: dict, version: str = "1.0.2"):
ticker_list = list(universe)
max_data_points = 8000
timeout_limit = 300
est_proc_time_per_point = 0.002
# RESPECT API DOCUMENTATION: Max 10,000 data points per request
# Calculate actual data points per ticker based on parameters
days_requested = abs(parameters.get("EDate", 0)) if parameters.get("EDate") else 252 # Default ~1 year
data_points_per_ticker = len(fields) * days_requested
print(f"π Data points per ticker: {data_points_per_ticker}")
# BALANCE SPEED VS RELIABILITY - use maximum possible data points
max_data_points_limit = 9800 # Use 98% of 10,000 limit for safety
calculated_max_items = max(1, max_data_points_limit // data_points_per_ticker)
print(f"π’ Calculation: {max_data_points_limit} Γ· {data_points_per_ticker} = {calculated_max_items} tickers max")
# Use the smaller of calculated limit or original calculation
if version <= "1.0.2":
original_max_items = max_data_points // len(fields) if len(fields) > 0 else 50
else:
max_data_points_per_chunk = (timeout_limit * 0.8) / (est_proc_time_per_point * len(fields))
original_max_items = int(max_data_points_per_chunk // len(fields)) if len(fields) > 0 else 50
print(f"π’ Original calculation gives: {original_max_items} tickers")
max_items = min(calculated_max_items, original_max_items)
print(f"π’ Taking minimum: min({calculated_max_items}, {original_max_items}) = {max_items}")
# Remove the artificial cap that's limiting us
# max_items = max(1, min(max_items, 3)) # This line was capping at 3
max_items = max(1, max_items) # Just ensure it's at least 1
# RESPECT API RATE LIMIT: 5 requests per second = 0.2s minimum gap
# But let's be conservative to avoid other issues
base_sleep = 2 # 2 seconds = 0.5 requests/second (well under 5/second limit)
total_chunks = (len(ticker_list) + max_items - 1) // max_items
print(f"π― Using {max_items} tickers per chunk ({max_items * data_points_per_ticker:,} data points)")
print(f"π Total chunks needed: {total_chunks}")
print(f"β±οΈ Estimated time: {(total_chunks * base_sleep) / 60:.1f} minutes")
results = []
problematic_tickers = []
# Test connection with a small request first
print("π Testing connection with minimal request...")
try:
test_chunk = ticker_list[:1] # Just 1 ticker
test_df = fundamental_and_reference.Definition(
universe=test_chunk,
fields=fields[:1], # Just 1 field
parameters={"SDate": 0, "EDate": -10, "FRQ": "d"} # Just 10 days
).get_data().data.df
print("β
Connection test successful - proceeding with full load...")
time.sleep(5)
except Exception as e:
print(f"β Connection test failed: {str(e)[:100]}...")
print("π Trying to reconnect...")
try:
ld.close_session()
time.sleep(5)
ld_connect()
print("β
Reconnected successfully")
except:
print("β Reconnection failed - continuing anyway...")
time.sleep(5)
ld_connect()
for i in range(0, len(ticker_list), max_items):
chunk = ticker_list[i:i + max_items]
chunk_num = i // max_items + 1
# SINGLE TRY - if it fails, just mark for retry round
try:
df_chunk = fundamental_and_reference.Definition(
universe=chunk,
fields=fields,
parameters=parameters
).get_data().data.df
results.append(df_chunk)
print(f"β Loaded chunk {chunk_num} ({len(chunk)} tickers). Waiting {base_sleep:.1f}s.")
except Exception as e:
error_str = str(e)
print(f"β Chunk {chunk_num} failed: {error_str[:100]}...")
time.sleep(base_sleep)
# Check for authentication errors
try:
ld.close_session()
time.sleep(base_sleep*2)
ld_connect()
print(f"β Reconnected - will retry this chunk")
time.sleep(base_sleep)
# Immediate retry after reconnection
df_chunk = fundamental_and_reference.Definition(
universe=chunk,
fields=fields,
parameters=parameters
).get_data().data.df
results.append(df_chunk)
print(f"β Chunk {chunk_num} successful after reconnection")
except Exception as e2:
print(f"β Reconnection retry failed: {str(e2)[:50]}...")
problematic_tickers.extend(chunk)
# Normal sleep between chunks
if i + max_items < len(ticker_list):
time.sleep(base_sleep)
try:
ld.close_session()
except:
print("Error closing session.")
if results:
df = pd.concat(results, ignore_index=False)
print(f"Retrieved data for {len(df)} records.")
return df, problematic_tickers
else:
print("No data retrieved.")
return None, problematic_tickers
def load_with_single_retry(tickers, fields, parameters):
"""
Simple: First round at full speed, second round for failures only
"""
print(f"π Loading {len(tickers)} tickers (keeping your current speed)...")
# FIRST ROUND - your current speed
data, failed_tickers = get_lseg_data(
universe=tickers,
fields=fields,
parameters=parameters
)
success_count = len(tickers) - len(failed_tickers)
print(f"β
First round: {success_count}/{len(tickers)} successful")
# SECOND ROUND - only if there are failures
if failed_tickers:
print(f"\nπ Retry round for {len(failed_tickers)} failed tickers...")
print("β³ Waiting 60s for API recovery...")
time.sleep(60) # Let API recover
retry_data, still_failed = get_lseg_data(
universe=failed_tickers,
fields=fields,
parameters=parameters
)
# Combine successful data
if data is not None and retry_data is not None:
data = pd.concat([data, retry_data], ignore_index=False)
elif retry_data is not None:
data = retry_data
retry_success = len(failed_tickers) - len(still_failed)
print(f"β
Retry round: {retry_success}/{len(failed_tickers)} successful")
if still_failed:
print(f"β Final failures: {len(still_failed)} tickers")
print(f" Sample: {still_failed[:5]}...")
return data, still_failed
else:
print("π Perfect! No retries needed.")
return data, []
# USAGE - replace your manual retry code with this:
"""
data, final_failed = load_with_single_retry(
tickers=tickers,
fields=['TR.CLOSEPRICE.date','TR.CLOSEPRICE','TR.Volume'],
parameters={"SDate": 0, "EDate": -1100, "FRQ": "d"}
)
"""
π Loading 928 tickers (keeping your current speed)...
π Data points per ticker: 3300
π’ Calculation: 9800 Γ· 3300 = 2 tickers max
π’ Original calculation gives: 2666 tickers
π’ Taking minimum: min(2, 2666) = 2
π― Using 2 tickers per chunk (6,600 data points)
π Total chunks needed: 464
β±οΈ Estimated time: 15.5 minutes
π Testing connection with minimal request...
β Connection test failed: No default session created yet. Please create a session first!...
π Trying to reconnect...
LSEG connected
β
Reconnected successfully
LSEG connected
An error occurred while requesting URL('https://api.refinitiv.com/data/datagrid/beta1/').
ReadTimeout('The read operation timed out')
β Chunk 1 failed: The read operation timed out...
LSEG connected
β Reconnected - will retry this chunk
An error occurred while requesting URL('https://api.refinitiv.com/data/datagrid/beta1/').
ReadTimeout('The read operation timed out')
β Reconnection retry failed: The read operation timed out...
β Loaded chunk 2 (2 tickers). Waiting 2.0s.
An error occurred while requesting URL('https://api.refinitiv.com/data/datagrid/beta1/').
ReadTimeout('The read operation timed out')
β Chunk 3 failed: The read operation timed out...
LSEG connected
β Reconnected - will retry this chunk
An error occurred while requesting URL('https://api.refinitiv.com/data/datagrid/beta1/').
ReadTimeout('The read operation timed out')
β Reconnection retry failed: The read operation timed out...
/usr/local/lib/python3.11/dist-packages/lseg/data/_tools/_dataframe.py:177:FutureWarning: Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`
β Loaded chunk 4 (2 tickers). Waiting 2.0s.
β Loaded chunk 5 (2 tickers). Waiting 2.0s.
β Loaded chunk 6 (2 tickers). Waiting 2.0s.