I am trying to download depth data from Tick History Market Depth/Legacy market depth using REST API from this post.
As you already know, depth data is quite large. so I would like to save the files by date. i.e. instead of having 1 giant file, I have one file for each day that contains depth data for all stocks in the list. I would also love to separate by stock and then by date as well but by date is fine for now.
How would I go about it? The following is my code. I use Python 3.6 with Pycharm by the way. And I am not really good with Python. I normally use SAS.
1. The JSON file that specifies the fields to download
{
"ExtractionRequest": {
"@odata.type": "#ThomsonReuters.Dss.Api.Extractions.ExtractionRequests.TickHistoryMarketDepthExtractionRequest",
"ContentFieldNames": [
"Ask Price",
"Ask Size",
"Bid Price",
"Bid Size"
],
"IdentifierList": {
"@odata.type": "#ThomsonReuters.Dss.Api.Extractions.ExtractionRequests.InstrumentListIdentifierList",
"InstrumentListId":"0x06698c5d00301db4"
},
"Condition": {
"View": "NormalizedLL2",
"NumberOfLevels": 5,
"MessageTimeStampIn": "GmtUtc",
"ReportDateRangeType": "Range",
"QueryStartDate": "1996-01-01T00:00:00.000Z",
"QueryEndDate": "2018-06-06T23:59:59.999Z",
"DisplaySourceRIC": "True"
}
}
}
And here is the code to run and get data:
#!/usr/bin/python
# -*- coding: UTF-8 -*-
from json import dumps, loads, load
from requests import post
from requests import get
from time import sleep
from getpass import _raw_input as input
from getpass import getpass
from getpass import GetPassWarning
from collections import OrderedDict
import os
import gzip
import pandas as pd
_outputFilePath="./"
_outputFileName="TestOutput"
_retryInterval=int(30) #value in second used by Pooling loop to check request status on the server
_jsonFileName="TickHistoricalRequest.json"
def RequestNewToken(username="",password=""):
_AuthenURL = "https://hosted.datascopeapi.reuters.com/RestApi/v1/Authentication/RequestToken"
_header= {}
_header['Prefer']='respond-async'
_header['Content-Type']='application/json; odata.metadata=minimal'
_data={'Credentials':{
'Password':password,
'Username':username
}
}
print("Send Login request")
resp=post(_AuthenURL,json=_data,headers=_header)
if resp.status_code!=200:
message="Authentication Error Status Code: "+ str(resp.status_code) +" Message:"+dumps(loads(resp.text),indent=4)
raise Exception(str(message))
return loads(resp.text)['value']
def ExtractRaw(token,json_payload):
try:
_extractRawURL="https://hosted.datascopeapi.reuters.com/RestApi/v1/Extractions/ExtractRaw"
#Setup Request Header
_header={}
_header['Prefer']='respond-async'
_header['Content-Type']='application/json; odata.metadata=minimal'
_header['Accept-Charset']='UTF-8'
_header['Authorization']='Token'+token
#Post Http Request to DSS server using extract raw URL
resp=post(_extractRawURL,data=None,json=json_payload,headers=_header)
#Print Status Code return from HTTP Response
print("Status Code="+str(resp.status_code) )
#Raise exception with error message if the returned status is not 202 (Accepted) or 200 (Ok)
if resp.status_code!=200:
if resp.status_code!=202:
message="Error: Status Code:"+str(resp.status_code)+" Message:"+resp.text
raise Exception(message)
#Get location from header, URL must be https so we need to change it using string replace function
_location=str.replace(resp.headers['Location'],"http://","https://")
print("Get Status from "+str(_location))
_jobID=""
#pooling loop to check request status every 2 sec.
while True:
resp=get(_location,headers=_header)
_pollstatus = int(resp.status_code)
if _pollstatus==200:
break
else:
print("Status:"+str(resp.headers['Status']))
sleep(_retryInterval) #wait for _retyInterval period and re-request the status to check if it already completed
# Get the jobID from HTTP response
json_resp = loads(resp.text)
_jobID = json_resp.get('JobId')
print("Status is completed the JobID is "+ str(_jobID)+ "\n")
# Check if the response contains Notes.If the note exists print it to console.
if len(json_resp.get('Notes')) > 0:
print("Notes:\n======================================")
for var in json_resp.get('Notes'):
print(var)
print("======================================\n")
# Request should be completed then Get the result by passing jobID to RAWExtractionResults URL
_getResultURL = str("https://hosted.datascopeapi.reuters.com/RestApi/v1/Extractions/RawExtractionResults(\'" + _jobID + "\')/$value")
print("Retrieve result from " + _getResultURL)
resp=get(_getResultURL,headers=_header,stream=True)
#Write Output to file.
outputfilepath = str(_outputFilePath + _outputFileName + str(os.getpid()) + '.csv.gz')
if resp.status_code==200:
with open(outputfilepath, 'wb') as f:
f.write(resp.raw.read())
print("Write output to "+outputfilepath+" completed\n\n")
print("Below is sample data from "+ outputfilepath)
#Read data from csv.gz and shows output from dataframe head() and tail()
df=pd.read_csv(outputfilepath,compression='gzip')
print(df.head())
print("....")
print(df.tail())
except Exception as ex:
print("Exception occrus:", ex)
return
def main():
try:
#Request a new Token
print("Login to DSS Server")
_DSSUsername=input('Enter DSS Username:')
try:
_DSSPassword=getpass(prompt='Enter DSS Password:')
_token=RequestNewToken(_DSSUsername,_DSSPassword)
except GetPassWarning as e:
print(e)
print("Token="+_token+"\n")
#Read the HTTP request body from JSON file. So you can change the request in JSON file instead.
queryString = {}
with open(_jsonFileName, "r") as filehandle:
queryString=load(filehandle,object_pairs_hook=OrderedDict)
#print(queryString)
ExtractRaw(_token,queryString)
except Exception as e:
print(e)
print(__name__)
if __name__=="__main__":
main()
Sorry for the long post!