import argparse as ap import json import os import pandas as pd import requests import shutil import sys import time import threading from datetime import datetime,timedelta,date from functools import partial from io import StringIO from multiprocessing import Pool instrument_mapping={"9028621": {'aapl_opt': '0x078ec919971c943c', 'tsla_opt': '0x078fbd890eac977a', 'goog_opt': '0x078fbbb7b70c976a', 'amd_opt': '0x0790101cf70c98d4', 'ba_opt':'0x07900e4afb2c98c4', 'baba_opt':'0x0790101bc20c98d4', 'nvda_opt':'0x079011ecda5c98ed', 'shop_opt':'0x07900f335eac98cc', 'googl_opt':'0x079012d5f9ec98f8', 'fb_opt':'0x079011047ecc98e0', 'msft_opt':'0x07900f32c5ec98cc', 'nflx_opt':'0x07901012fe4c98d4', 'amzn_opt':'0x07900e40ddcc98c4', 'spy_opt':'0x079108e5ee1c9da1', 'spxw_opt':'0x0791058803bc9d79'}} base_url = "https://selectapi.datascope.refinitiv.com/RestApi/v1/" auth_req_url = base_url+"Authentication/RequestToken" auth_valid_url = base_url+"Authentication/ValidateToken" get_scheduled_url = base_url + "Extractions/Schedules" get_report_status_url = base_url + "Extractions/ReportExtractions" download_extracted_url = base_url + "Extractions/ExtractedFiles" market_depth_report_url = base_url + "Extractions/TickHistoryMarketDepthReportTemplates" intraday_summaries_report_url = base_url + "Extractions/TickHistoryIntradaySummariesReportTemplates" historical_reference_report_url = base_url + "Extractions/HistoricalReferenceReportTemplates" THTS_REPORT_URL = base_url + "Extractions/TickHistoryTimeAndSalesReportTemplates" requestHeaders={ "Prefer":"respond-async", "Content-Type":"application/json" } def daterange(start_date, end_date): '''Generates individual date between a given date-range''' for day_delta in range(int((end_date - start_date).days) + 1): bdate = start_date + timedelta(day_delta) if bdate.isoweekday() in range(1,6): yield bdate def getAuthToken(credData): received = requests.post(url=auth_req_url, json=credData, headers=requestHeaders,verify=False) if received.status_code == 200 : json_resp = json.loads(received.text.encode('ascii', 'ignore')) token = json_resp["value"] return token else : print(received.status_code, received.text) exit() def create_report_thts(token, name, start, end): '''Creates Report''' requestHeaders["Authorization"] = "token " + token odt="#DataScope.Select.Api.Extractions.ReportTemplates.TickHistoryTimeAndSalesReportTemplate" req_body={ "@odata.type": odt, "ShowColumnHeaders": "true", "CompressionType": "GZip", "Name": name, "Headers": [ ], "Trailers": [ ], "ContentFields": [ {"FieldName": "Trade - Open Interest"} ], "Condition": { "ReportDateRangeType": "Range", "QueryStartDate": start + 'T04:00:00.000Z', "QueryEndDate": end + 'T19:59:59.999Z', "DateRangeTimeZone": "Local Exchange Time Zone", "MessageTimeStampIn": "GmtUtc", "DisplaySourceRIC": "true", "ApplyCorrectionsAndCancellations": "false", } } received = requests.post(THTS_REPORT_URL,json=req_body,headers=requestHeaders,verify=False) json_resp = json.loads(received.text.encode('ascii','ignore')) # print received.text,received.status_code if received.status_code==201 : return json_resp["ReportTemplateId"] else: print (received.text,received.status_code) exit() def deleteReport(token,reportId): requestHeaders["Authorization"] = "token " + token url = base_url + "Extractions/ReportTemplates('"+reportId+"')" r = requests.delete(url, headers = requestHeaders) def creteImmSchedule(token, name, listId, reportId): requestHeaders["Authorization"] = "token " + token requestHeaders["Prefer"] ="respond-async" requestBody={ "Name": name, "TimeZone": "UTC", "Recurrence": { "@odata.type": "#DataScope.Select.Api.Extractions.Schedules.SingleRecurrence", "IsImmediate": "true" }, "Trigger": { "@odata.type": "#DataScope.Select.Api.Extractions.Schedules.ImmediateTrigger" }, "ListId": listId, "ReportTemplateId": reportId } r = requests.post( get_scheduled_url, json = requestBody, headers = requestHeaders ) jsonResponse = json.loads(r.text.encode('ascii','ignore')) if(r.status_code ==201): return jsonResponse["ScheduleId"] else: print(r.text,r.status_code) exit() def deleteSchedule(token,schedId): requestHeaders["Authorization"] = "token " + token url = get_scheduled_url+"('"+schedId+"')" r = requests.delete(url, headers = requestHeaders) def getLatestData( token, scheduleId ): requestHeaders["Authorization"] = "token " + token url = get_scheduled_url+ "('" + scheduleId + "')/LastExtraction" # print(url) r = requests.get(url, headers=requestHeaders) if(r.status_code==200): jsonResponse = json.loads(r.text.encode('ascii', 'ignore')) return jsonResponse["ReportExtractionId"] elif(r.status_code==204): while( r.status_code == 204): print("Received 204. Waiting 30 secs") time.sleep(30) r = requests.get( url, headers = requestHeaders) jsonResponse = json.loads(r.text.encode('ascii','ignore')) return jsonResponse["ReportExtractionId"] else: print("Can't get report id. Here's some debug info: ",r.status_code, r.text) def getReportFiles( token, extractionId ): requestHeaders["Authorization"] = "token " + token url = get_report_status_url + "('" + extractionId + "')/Files" r = requests.get( url, headers=requestHeaders ) if(r.status_code == 200): jsonResponse = json.loads(r.text.encode('ascii', 'ignore'))["value"] #print(jsonResponse) fileTuple = {} if(jsonResponse[0]["FileType"]=="Full"): fileTuple["file"] = jsonResponse[0]["ExtractedFileId"] fileTuple["notes"] = jsonResponse[1]["ExtractedFileId"] else: fileTuple["notes"] = jsonResponse[0]["ExtractedFileId"] fileTuple["file"] = jsonResponse[1]["ExtractedFileId"] return fileTuple else: print("Can't get extracted files. Here's some debug info: ",r.status_code, r.text) def downloadReportFiles( token, fileId, outfile): requestHeaders["Authorization"] = "token " + token requestHeaders["Content-Type"] = "text/plain" requestHeaders["Accept-Encoding"] = "gzip" requestHeaders["X-Direct-Download"] = "true" url = download_extracted_url + "('" + fileId + "')/$value" # print(url, requestHeaders) r = requests.get( url, headers=requestHeaders, stream=True ) if(r.status_code == 302): print(r) r.raw.decode_content = False print(r.status_code, r.headers["Content-Type"]) #, r.headers["Content-Encoding"], r.headers["Content-Length"] fileName = outfile chunk_size = 1024 rr = r.raw with open(fileName, 'wb') as fd: shutil.copyfileobj(rr, fd, chunk_size) def multi_date_download(start_date, end_date, name, token): dt_dwnld = start_date[0:4] + start_date[5:7] + start_date[8:] print(dt_dwnld) day_of_week = datetime.strptime(dt_dwnld, '%Y%m%d').weekday() if day_of_week < 5: print("Weekday") pass else: # 5 Sat, 6 Sun print("Weekend") return outFileName = dt_dwnld + "_" + name + ".csv.gz" notesFileName = dt_dwnld + "_" + name + "_notes" + ".csv.gz" base_dir = f'/REUTERS/CBOE/OI/{dt_dwnld}/' if not os.path.isdir(base_dir): os.makedirs(base_dir) print(f"{name} {dt_dwnld} Start-Time: {datetime.now()}") # create report reportTemplate = create_report_thts(token, 'thts_OI_' + name + '_' + dt_dwnld, start_date, end_date) # use instr_list to create sched listId = instrument_mapping["9028621"][name] # get sched ID schedId=creteImmSchedule(token, dt_dwnld + "_" + name + "_OI", listId, reportTemplate) # get extraction ID extractionId = getLatestData(token, schedId) # get list of extractable files extractedFiles = getReportFiles(token, extractionId) # download files downloadReportFiles(token, extractedFiles["file"], base_dir + outFileName) downloadReportFiles(token, extractedFiles["notes"], base_dir + notesFileName) # delete report and schedule deleteSchedule(token,schedId) deleteReport(token, reportTemplate) print(f"{name} {dt_dwnld} End-Time: {datetime.now()}") def main(): print(f"Start-Time: {datetime.now()}") sym_name = sys.argv[1] start_date = datetime.strptime(sys.argv[2], '%Y-%m-%d') end_date = datetime.strptime(sys.argv[3], '%Y-%m-%d') credData = sys.argv[4] for dt_obj in daterange(start_date, end_date): # generate token requestHeaders["Content-Type"] = "application/json" token = getAuthToken(credData) multi_date_download(datetime.strftime(dt_obj, '%Y-%m-%d'), datetime.strftime(dt_obj, '%Y-%m-%d'), sym_name, token) print(f"End-Time: {datetime.now()}") if __name__ == "__main__": main()