why its not working ?

import os, time, pandas as pd
import refinitiv.data as rd
from refinitiv.data.content import filings as rfil

CSV_FILE_PATH = "/Users/prince/Downloads/nwsheet.csv"
COMPANY_NAME_COLUMN = "Company Name"
COMPANY_ID_COLUMN = "Identifier" # RICs like AAPL.OQ, RELI.NS, etc.
DOWNLOAD_DIR = "/Users/prince/Downloads/x"
START_YEAR, END_YEAR = 2015, 2023
LSEG_API_KEY = "" # or set env var RD_APP_KEY
ANNUAL_REPORT_CATEGORY_ID = 1 # MidLevelCategory: 1 == "Annual"

def ensure_dir(p):
if not os.path.exists(p):
os.makedirs(p)

def filings_query(ric: str, y: int) -> str:
# Filings query syntax uses field:value with AND
# Common fields: RIC, DateFrom, DateTo, MidLevelCategory
return f'RIC:{ric} AND DateFrom:{y}-01-01 AND DateTo:{y}-12-31 AND MidLevelCategory:{ANNUAL_REPORT_CATEGORY_ID}'

def main():
rd.open_session(app_key=LSEG_API_KEY) # or just rd.open_session() if RD_APP_KEY env var set
ensure_dir(DOWNLOAD_DIR)

try:
    df = pd.read_csv(CSV_FILE_PATH)
    ids = df[COMPANY_ID_COLUMN].astype(str).tolist()
    names = df[COMPANY_NAME_COLUMN].astype(str).tolist()
except FileNotFoundError:
    print(f"CSV not found: {CSV_FILE_PATH}")
    return
except KeyError as e:
    print(f"Missing column in CSV: {e}")
    return

print(f"Found {len(ids)} companies")

for ric, name in zip(ids, names):
    if not ric or ric == "nan":
        continue

    safe = name.replace(" ", "_").replace("/", "_")
    company_dir = os.path.join(DOWNLOAD_DIR, safe)
    ensure_dir(company_dir)
    print(f"\nProcessing {name} ({ric})")

    for year in range(START_YEAR, END_YEAR + 1):
        try:
            query = filings_query(ric, year)

            # 1) Search the Filings catalog
            search_resp = rfil.search(query=query, top=5)   # returns a DataFrame in .data.df
            results = getattr(search_resp, "data", None)
            df_res = getattr(results, "df", pd.DataFrame())

            if df_res.empty:
                print(f"  {year}: no annual report found")
                continue

            # Pick the best candidate (you can filter further on Title/Language if needed)
            row = df_res.iloc[0]
            doc_id = row.get("documentId") or row.get("DocumentId")  # naming differs across library versions
            title  = row.get("documentTitle") or row.get("DocumentTitle") or "Annual Report"

            if not doc_id:
                print(f"  {year}: search returned rows without documentId")
                continue

            out_path = os.path.join(company_dir, f"{safe}_Annual_Report_{year}.pdf")

            # 2) Download the document binary
            doc_resp = rfil.get_document(document_id=doc_id)   # returns .data.raw or .data.content depending on version
            binary = getattr(doc_resp.data, "raw", None) or getattr(doc_resp.data, "content", None)

            if not binary:
                print(f"  {year}: could not retrieve document content for {doc_id}")
                continue

            with open(out_path, "wb") as f:
                f.write(binary)
            print(f"  {year}: downloaded '{title}' -> {out_path}")

        except Exception as e:
            print(f"  {year}: error: {e}")

        time.sleep(0.75)  # be nice to the API

# close session (guard if none)
sess = rd.get_default_session()
if sess and getattr(sess, "is_open", False):
    rd.close_session()

if name == "main":
main()

Answers

  • Jirapongse
    Jirapongse ✭✭✭✭✭

    @Prince

    Thank you for reaching out to us.

    You can refer to the sample code in this Using AI modeling to interpret 10-Q filings article. It uses a GraphQL expression.

    To the best of my knowledge, the Filing APIs are compatible with the Platform session. They may not function properly in the Desktop session due to permission limitations.

    You can reach out directly to your LSEG account or sales team to verify your permissions.