API timeout

Options
COuld you help me to define a function to guarantee im not bypassing the API limits? I tried lots of thinks with the help of ChatGPT, but couldnt arrive in something that doesnt raise error of "reading timeout".

import time
import pandas as pd

def get_lseg_data(universe: list, fields: list, parameters: dict, version: str = "1.0.2"):
ticker_list = list(universe)
max_data_points = 8000
timeout_limit = 300
est_proc_time_per_point = 0.002

# RESPECT API DOCUMENTATION: Max 10,000 data points per request
# Calculate actual data points per ticker based on parameters
days_requested = abs(parameters.get("EDate", 0)) if parameters.get("EDate") else 252 # Default ~1 year
data_points_per_ticker = len(fields) * days_requested

print(f"📊 Data points per ticker: {data_points_per_ticker}")

# BALANCE SPEED VS RELIABILITY - use maximum possible data points
max_data_points_limit = 9800 # Use 98% of 10,000 limit for safety
calculated_max_items = max(1, max_data_points_limit // data_points_per_ticker)

print(f"🔢 Calculation: {max_data_points_limit} ÷ {data_points_per_ticker} = {calculated_max_items} tickers max")

# Use the smaller of calculated limit or original calculation
if version <= "1.0.2":
original_max_items = max_data_points // len(fields) if len(fields) > 0 else 50
else:
max_data_points_per_chunk = (timeout_limit * 0.8) / (est_proc_time_per_point * len(fields))
original_max_items = int(max_data_points_per_chunk // len(fields)) if len(fields) > 0 else 50

print(f"🔢 Original calculation gives: {original_max_items} tickers")

max_items = min(calculated_max_items, original_max_items)
print(f"🔢 Taking minimum: min({calculated_max_items}, {original_max_items}) = {max_items}")

# Remove the artificial cap that's limiting us
# max_items = max(1, min(max_items, 3)) # This line was capping at 3
max_items = max(1, max_items) # Just ensure it's at least 1

# RESPECT API RATE LIMIT: 5 requests per second = 0.2s minimum gap
# But let's be conservative to avoid other issues
base_sleep = 2 # 2 seconds = 0.5 requests/second (well under 5/second limit)

total_chunks = (len(ticker_list) + max_items - 1) // max_items
print(f"🎯 Using {max_items} tickers per chunk ({max_items * data_points_per_ticker:,} data points)")
print(f"📈 Total chunks needed: {total_chunks}")
print(f"⏱️ Estimated time: {(total_chunks * base_sleep) / 60:.1f} minutes")

results = []
problematic_tickers = []

# Test connection with a small request first
print("🔍 Testing connection with minimal request...")
try:
test_chunk = ticker_list[:1] # Just 1 ticker
test_df = fundamental_and_reference.Definition(
universe=test_chunk,
fields=fields[:1], # Just 1 field
parameters={"SDate": 0, "EDate": -10, "FRQ": "d"} # Just 10 days
).get_data().data.df
print("✅ Connection test successful - proceeding with full load...")
time.sleep(5)
except Exception as e:
print(f"❌ Connection test failed: {str(e)[:100]}...")
print("🔄 Trying to reconnect...")
try:
ld.close_session()
time.sleep(5)
ld_connect()
print("✅ Reconnected successfully")
except:
print("❌ Reconnection failed - continuing anyway...")
time.sleep(5)
ld_connect()

for i in range(0, len(ticker_list), max_items):
chunk = ticker_list[i:i + max_items]
chunk_num = i // max_items + 1

# SINGLE TRY - if it fails, just mark for retry round
try:
df_chunk = fundamental_and_reference.Definition(
universe=chunk,
fields=fields,
parameters=parameters
).get_data().data.df
results.append(df_chunk)
print(f"✓ Loaded chunk {chunk_num} ({len(chunk)} tickers). Waiting {base_sleep:.1f}s.")

except Exception as e:
error_str = str(e)
print(f"⚠ Chunk {chunk_num} failed: {error_str[:100]}...")
time.sleep(base_sleep)

# Check for authentication errors
try:
ld.close_session()
time.sleep(base_sleep*2)
ld_connect()
print(f"✓ Reconnected - will retry this chunk")
time.sleep(base_sleep)

# Immediate retry after reconnection
df_chunk = fundamental_and_reference.Definition(
universe=chunk,
fields=fields,
parameters=parameters
).get_data().data.df
results.append(df_chunk)
print(f"✓ Chunk {chunk_num} successful after reconnection")

except Exception as e2:
print(f"⚠ Reconnection retry failed: {str(e2)[:50]}...")
problematic_tickers.extend(chunk)

# Normal sleep between chunks
if i + max_items < len(ticker_list):
time.sleep(base_sleep)

try:
ld.close_session()
except:
print("Error closing session.")

if results:
df = pd.concat(results, ignore_index=False)
print(f"Retrieved data for {len(df)} records.")
return df, problematic_tickers
else:
print("No data retrieved.")
return None, problematic_tickers


def load_with_single_retry(tickers, fields, parameters):
"""
Simple: First round at full speed, second round for failures only
"""
print(f"🚀 Loading {len(tickers)} tickers (keeping your current speed)...")

# FIRST ROUND - your current speed
data, failed_tickers = get_lseg_data(
universe=tickers,
fields=fields,
parameters=parameters
)

success_count = len(tickers) - len(failed_tickers)
print(f"✅ First round: {success_count}/{len(tickers)} successful")

# SECOND ROUND - only if there are failures
if failed_tickers:
print(f"\n🔄 Retry round for {len(failed_tickers)} failed tickers...")
print("⏳ Waiting 60s for API recovery...")
time.sleep(60) # Let API recover

retry_data, still_failed = get_lseg_data(
universe=failed_tickers,
fields=fields,
parameters=parameters
)

# Combine successful data
if data is not None and retry_data is not None:
data = pd.concat([data, retry_data], ignore_index=False)
elif retry_data is not None:
data = retry_data

retry_success = len(failed_tickers) - len(still_failed)
print(f"✅ Retry round: {retry_success}/{len(failed_tickers)} successful")

if still_failed:
print(f"❌ Final failures: {len(still_failed)} tickers")
print(f" Sample: {still_failed[:5]}...")

return data, still_failed

else:
print("🎉 Perfect! No retries needed.")
return data, []


# USAGE - replace your manual retry code with this:
"""
data, final_failed = load_with_single_retry(
tickers=tickers,
fields=['TR.CLOSEPRICE.date','TR.CLOSEPRICE','TR.Volume'],
parameters={"SDate": 0, "EDate": -1100, "FRQ": "d"}
)
"""






🚀 Loading 928 tickers (keeping your current speed)...
📊 Data points per ticker: 3300
🔢 Calculation: 9800 ÷ 3300 = 2 tickers max
🔢 Original calculation gives: 2666 tickers
🔢 Taking minimum: min(2, 2666) = 2
🎯 Using 2 tickers per chunk (6,600 data points)
📈 Total chunks needed: 464
⏱️ Estimated time: 15.5 minutes
🔍 Testing connection with minimal request...
❌ Connection test failed: No default session created yet. Please create a session first!...
🔄 Trying to reconnect...
LSEG connected
✅ Reconnected successfully
LSEG connected
An error occurred while requesting URL('https://api.refinitiv.com/data/datagrid/beta1/').
ReadTimeout('The read operation timed out')
⚠ Chunk 1 failed: The read operation timed out...
LSEG connected
✓ Reconnected - will retry this chunk
An error occurred while requesting URL('https://api.refinitiv.com/data/datagrid/beta1/').
ReadTimeout('The read operation timed out')
⚠ Reconnection retry failed: The read operation timed out...
✓ Loaded chunk 2 (2 tickers). Waiting 2.0s.
An error occurred while requesting URL('https://api.refinitiv.com/data/datagrid/beta1/').
ReadTimeout('The read operation timed out')
⚠ Chunk 3 failed: The read operation timed out...
LSEG connected
✓ Reconnected - will retry this chunk
An error occurred while requesting URL('https://api.refinitiv.com/data/datagrid/beta1/').
ReadTimeout('The read operation timed out')
⚠ Reconnection retry failed: The read operation timed out...
/usr/local/lib/python3.11/dist-packages/lseg/data/_tools/_dataframe.py:177:FutureWarning: Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`
✓ Loaded chunk 4 (2 tickers). Waiting 2.0s.
✓ Loaded chunk 5 (2 tickers). Waiting 2.0s.
✓ Loaded chunk 6 (2 tickers). Waiting 2.0s.

Answers

  • Hello @augusto_krappa

    The following error message does not look like the API limit

    An error occurred while requesting URL('https://api.refinitiv.com/data/datagrid/beta1/').
    ReadTimeout('The read operation timed out')
    

    It looks like a connection timeout between the API and Server or the Server timeout.

    If it is the API limit (you can find more detail here), the message should be like thses

    • Error Code 429 | Client Error: Too Many requests
    • Error code 400 | Backend error.

    You may try to increase the timeout value in the library configuration file. Please see how to do it on my colleague's answers on this old post.

  • could you help me to "merge" the solution from referred post to my code please? Im a bit confused by the many methods available.



    my code:
    def ld_connect(APP_KEY = os.environ['REFINITIV_EDP_API'], USER = os.environ['REFINITIV_USER'], PASS = os.environ['REFINITIV_PASSWORD']):
    session = ld.session.platform.Definition(
    app_key = APP_KEY,
    grant = ld.session.platform.GrantPassword(
    username = USER,
    password = PASS
    ),
    signon_control = True
    ).get_session()

    session.open()
    ld.session.set_default(session)
    print("LSEG connected")
    return session

    ……
    df_chunk = fundamental_and_reference.Definition(
    universe=chunk,
    fields=fields,
    parameters=parameters
    ).get_data().data.df


    ————————————————————————-


    referred post:



    config = rd.get_config()
    config.set_param("logs.transports.console.enabled", True)
    config.set_param("logs.level", "debug")
    config.set_param("http.request-timeout", 60)
    rd.open_session()