Hi all, i am trying to retrieve a list of fields for a set of bond cusip codes, however i am getting an inconsistent output. First when i retrieve i see that the dates/indexes instead of going from 2010-01-31 to 2024-12-31 it starts in 1968 and goes until 2024. Additionally i have lots of missing values that should be there in the dataframe. Can you please help me to with this query. Here is the code;
import os
# Chunking parameters
chunk_size = 10 # Number of instruments to process in each chunk
sleep_time = 2 # Time (in seconds) to wait between API calls
# Backup file paths
backup_data_file = 'backup_all_data.csv'
retrieved_cusips_file = 'retrieved_cusips.csv'
# Load progress if backup files exist
if os.path.exists(backup_data_file):
all_data = pd.read_csv(backup_data_file)
else:
all_data = pd.DataFrame()
if os.path.exists(retrieved_cusips_file):
retrieved_cusips = pd.read_csv(retrieved_cusips_file)['cusip'].tolist()
else:
retrieved_cusips = []
# Compute remaining cusips to process
cusips_remaining = [cusip for cusip in cusip_bonds if cusip not in retrieved_cusips]
# Process in chunks
for i in range(0, len(cusips_remaining), chunk_size):
chunk = cusips_remaining[i:i + chunk_size]
try:
# Retrieve data for the current chunk
df = ld.get_history(
universe=chunk,
fields=[
'TR.FIIssueDate',
'TR.FiMaturityDate',
'TR.NICouponRate',
'EFF_DURTN',
'TR.IssueSizeUSD',
'TR.CA.AmtOutstanding',
'TR.Volatility200D',
'TR.OASAnalytics',
],
interval="monthly",
start='2010-01-31',
end='2024-12-31'
)
# Append the results to the main DataFrame
all_data = pd.concat([all_data, df],axis=1)
# Update retrieved_cusips list
retrieved_cusips.extend(chunk)
# Save current progress
all_data.to_csv(backup_data_file, index=True)
pd.DataFrame({'cusip': retrieved_cusips}).to_csv(retrieved_cusips_file, index=True)
# Display progress
print(f"Processed chunk {i // chunk_size + 1} of {len(cusips_remaining) // chunk_size + 1}")
# Sleep between chunks
time.sleep(sleep_time)
except Exception as e:
print(f"Error occurred while processing chunk {i // chunk_size + 1}: {e}")
print("Progress saved. You can resume from the last successful chunk.")
break
# Transform the combined DataFrame into 2D panel data format if all chunks are processed
if len(retrieved_cusips) == len(cusip_bonds):
panel_data = all_data.stack(level=0).reset_index()
# Dynamically extract column names from the original DataFrame
extracted_columns = ['Date', 'cusip_bonds'] + list(all_data.columns.get_level_values(1).unique())
# Assign the extracted column names to the panel_data
panel_data.columns = extracted_columns
# Ensure the Date column is sorted from oldest to newest
panel_data['Date'] = pd.to_datetime(panel_data['Date']) # Convert to datetime if not already
panel_data = panel_data.sort_values(by='Date').reset_index(drop=True)
# Save the final DataFrame to a CSV file
panel_data.to_csv('final_bond_chars.csv', index=True)
print("Data processing complete. File saved as 'final_bond_chars.csv'.")
else:
print("Processing incomplete. Backup saved. Resume later.")
Thanks