Hello,
I am running Eikon API through Python to retrieve quarterly historical information on certain indices from 2000-2022.
However, my extract has certain unexpected empty cells for common fields like Market Cap and Total Return which are available through Datastream (Excel).
Below is an example count of missing data with the FTSE100
The empty cells are more pronounced with the S&P 500 around 36 to 14 missing market cap info, hence the more the stock the more the missing data.
Below is my current code for your reference
#THIS CODE GENERATES OUTPUT ON A QUARTERLY BASIS FOR SELECTED FIELDS AND INDICES, SPECIFICALLY RELATED TO ESG SCORES
import eikon as ek
import refinitiv.data as rd
import concurrent.futures
from retry import retry
import pandas as pd
from tqdm import tqdm
from datetime import datetime
from dateutil.relativedelta import relativedelta
#The API key is obtained from Eikon's platform and might change occasionally (check before running the code)
ek.set_app_key('Enter key here')
rd.open_session()
# Specifying the desired months and days (quarters in our case)
month_days = [('01-01', '04-01', '07-01', '10-01')]
# Specifying the desired years(the range will not include the last year)
years = range(2000, 2023)
# Create a Pandas Excel, the file will be exported with all results
excel_filename = 'ESG_SCORES_QUARTERLY.xlsx'
# Add retry decorator to fetch_data function
@retry(tries=3, delay=2, backoff=2)
def fetch_data(selected_date, formatted_EDate):
try:
return rd.get_data(
universe=[f'0#.FTSE({selected_date.replace("-", "")})'],
fields=[
'TR.CommonName',
'TR.ISINCode',
f'TR.EnvironmentPillarScore(SDate={selected_date})',
f'TR.SocialPillarScore(SDate={selected_date})',
f'TR.GovernancePillarScore(SDate={selected_date})',
f'TR.TRESGScore(SDate={selected_date})',
f'TR.TRESGScoreGrade(SDate={selected_date})',
f'TR.CompanyMarketCap(SDate={selected_date})',
f'TR.CompanyMarketCap.Currency',
f'TR.PriceClose(SDate={selected_date})',
f'TR.OPENPRICE(SDate={selected_date})',
f'TR.SharesOutstandingCommonTotal(SDate={selected_date})',
f'TR.TotalReturn(SDate={selected_date},EDate={formatted_EDate})',
]
)
except Exception as e:
print(f"Error fetching data for {selected_date}: {e}")
if hasattr(e, 'response'):
print(f"API Response: {e.response}")
return pd.DataFrame()
# Use ThreadPoolExecutor with adjusted max_workers
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
# Create ExcelWriter object outside the loop
with pd.ExcelWriter(excel_filename, engine='xlsxwriter') as writer:
for year in years:
all_dfs = []
for month_day in month_days:
for md in month_day:
selected_date = f'{year}-{md}'
selected_date_dt = pd.to_datetime(selected_date)
EDate = selected_date_dt + relativedelta(days=90)
formatted_EDate = EDate.strftime('%Y-%m-%d')
all_dates = [selected_date]
batch_size = 5 # Adjust the batch size based on the API rate limits
for i in range(0, len(all_dates), batch_size):
batch_dates = all_dates[i:i+batch_size]
future_to_date = {executor.submit(fetch_data, date, formatted_EDate): date for date in tqdm(batch_dates, desc=f'Year: {year}, Batch: {i//batch_size + 1}/{len(all_dates)//batch_size + 1}')}
for future in concurrent.futures.as_completed(future_to_date):
date = future_to_date[future]
try:
df = future.result()
if not df.empty:
df['Extract Date'] = date
df['TR-End Date'] = formatted_EDate
all_dfs.append(df)
except Exception as e:
print(f"Error processing data for {date}: {e}")
result_df = pd.concat(all_dfs, ignore_index=True)
result_df.to_excel(writer, sheet_name=str(year), index=False)
# Display a message
print(f'Data saved to {excel_filename}')
#Some stock indices codes, We add 0# plus the indices code to get the constituents
#FTSE100 - 0#.FTSE
#FTSE250 - 0#.FTMC
#FTSE350 - 0#.FTLC
#S&P500 - 0#.SPX
#EUROSTOXX - 0#.STOXXE