I have this working code to identify when is the first period that the constituent lists has data. It correctly states that the oldest constituent list avaialble for S&P 500 (list code LS&PCOMP+MMYY) is 0989 or September 1989. How do I apply this code to multiple constituent codes for around 90 indices?
from datetime import datetime
from dateutil.relativedelta import relativedelta
import pandas as pd
def find_earliest_constituent_date(index_prefix, start_mmyy):
current_date = datetime.strptime(start_mmyy, "%m%y")
last_valid_date = None
while True:
mmyy = current_date.strftime("%m%y")
ticker = f"{index_prefix}{mmyy}|L"
print(f"Checking: {ticker}")
try:
result = ds.get_data(tickers=ticker, fields=["MNEM"], kind=0)
# Check if Value column contains any actual data (not NA or empty)
if isinstance(result, pd.DataFrame) and "Value" in result.columns:
values = result["Value"].dropna().astype(str).str.strip()
if not values.eq("NA").all() and not values.eq("").all():
last_valid_date = current_date
current_date -= relativedelta(months=1)
continue
# All values are NA or empty — stop and return last valid
if last_valid_date:
earliest_mmyy = last_valid_date.strftime("%m%y")
print(f"\n✅ Earliest available constituent list: {index_prefix}{earliest_mmyy}|L")
return earliest_mmyy
else:
print("⚠️ No valid data found at all.")
return None
except Exception as e:
print(f"Error querying {ticker}: {e}")
return None
Example usage
find_earliest_constituent_date("LS&PCOMP", "0191") # Start from Jan 1991