# coding: utf-8 # In[2]: #==================================================================================== #On Demand request demo code, using the following steps: # - Authentication token request # - On Demand extraction request # - Extraction status polling request # Extraction notes retrieval # - Data retrieval and save to disk (the data file is gzipped) # Includes AWS download capability #==================================================================================== #Set these parameters before running the code: filePath = "/Users/u8007966/Downloads/" #Location to save downloaded files fileNameRoot = "Python_Test" #Root of the name for the downloaded files myUsername = "9008163" myPassword = "Myscope123" useAws = True #Set the last parameter above to: # False to download from TRTH servers # True to download from Amazon Web Services cloud (recommended, it is faster) #Imports: import requests import json import shutil import time import urllib3 import gzip #==================================================================================== #Step 1: token request requestUrl = "https://hosted.datascopeapi.reuters.com/RestApi/v1/Authentication/RequestToken" requestHeaders={ "Prefer":"respond-async", "Content-Type":"application/json" } requestBody={ "Credentials": { "Username": "9008163", "Password": "********" } } r1 = requests.post(requestUrl, json=requestBody,headers=requestHeaders) if r1.status_code == 200 : jsonResponse = json.loads(r1.text.encode('ascii', 'ignore')) token = jsonResponse["value"] print ('Authentication token (valid 24 hours):') print (token) else: print ('Replace myUserName and myPassword with valid credentials, then repeat the request') # In[7]: #Step 2: send an on demand extraction request using the received token requestUrl='https://hosted.datascopeapi.reuters.com/RestApi/v1/Extractions/ExtractRaw' requestHeaders={ "Prefer":"respond-async", "Content-Type":"application/json", "Authorization": "token " + token } requestBody={ "ExtractionRequest": { "@odata.type": "#ThomsonReuters.Dss.Api.Extractions.ExtractionRequests.TickHistoryIntradaySummariesExtractionRequest", "ContentFieldNames": [ "Open", "High", "Low", "Last", "Volume", "No. Trades", "Open Bid", "High Bid", "Low Bid", "Close Bid", "No. Bids", "Open Ask", "High Ask", "Low Ask", "Close Ask", "No. Asks" ], "IdentifierList": { "@odata.type": "#ThomsonReuters.Dss.Api.Extractions.ExtractionRequests.InstrumentIdentifierList", "InstrumentIdentifiers": [{ "Identifier": "EHLLIEUR09", "IdentifierType": "Ric" }], "UseUserPreferencesForValidationOptions": "false" }, "Condition": { "MessageTimeStampIn": "GmtUtc", "ReportDateRangeType": "Range", "QueryStartDate": "2018-09-16T00:00:00.000Z", "QueryEndDate": "2018-09-20T23:59:59.999Z", "SummaryInterval": "OneHour", "TimebarPersistence": "true", "DisplaySourceRIC": "true" } } } r2 = requests.post(requestUrl, json=requestBody,headers=requestHeaders) #Display the HTTP status of the response #Initial response status (after approximately 30 seconds wait) is usually 202 status_code = r2.status_code print ("HTTP status of the response: " + str(status_code)) # In[8]: #Step 3: if required, poll the status of the request using the received location URL. #Once the request has completed, retrieve the jobId and extraction notes. #If status is 202, display the location url we received, and will use to poll the status of the extraction request: if status_code == 202 : requestUrl = r2.headers["location"] print ('Extraction is not complete, we shall poll the location URL:') print (str(requestUrl)) requestHeaders={ "Prefer":"respond-async", "Content-Type":"application/json", "Authorization":"token " + token } #As long as the status of the request is 202, the extraction is not finished; #we must wait, and poll the status until it is no longer 202: while (status_code == 202): print ('As we received a 202, we wait 30 seconds, then poll again (until we receive a 200)') time.sleep(30) r3 = requests.get(requestUrl,headers=requestHeaders) status_code = r3.status_code print ('HTTP status of the response: ' + str(status_code)) #When the status of the request is 200 the extraction is complete; #we retrieve and display the jobId and the extraction notes (it is recommended to analyse their content)): if status_code == 200 : r3Json = json.loads(r3.text.encode('ascii', 'ignore')) jobId = r3Json["JobId"] print ('\njobId: ' + jobId + '\n') notes = r3Json["Notes"] print ('Extraction notes:\n' + notes[0]) #If instead of a status 200 we receive a different status, there was an error: if status_code != 200 : print ('An error occured. Try to run this cell again. If it fails, re-run the previous cell.\n') # In[9]: #Step 4: get the extraction results, using the received jobId. #Decompress the data and display it on screen. #Skip this step if you asked for a large data set, and go directly to step 5 ! #We also save the data to disk; but note that if you use AWS it will be saved as a GZIP, #otherwise it will be saved as a CSV ! #This discrepancy occurs because we allow automatic decompression to happen when retrieving #from TRTH, so we end up saving the decompressed contents. #IMPORTANT NOTE: #The code in this step is only for demo, to display some data on screen. #Avoid using this code in production, it will fail for large data sets ! #See step 5 for production code. requestUrl = "https://hosted.datascopeapi.reuters.com/RestApi/v1/Extractions/RawExtractionResults" + "('" + jobId + "')" + "/$value" #AWS requires an additional header: X-Direct-Download if useAws: requestHeaders={ "Prefer":"respond-async", "Content-Type":"text/plain", "Accept-Encoding":"gzip", "X-Direct-Download":"true", "Authorization": "token " + token } else: requestHeaders={ "Prefer":"respond-async", "Content-Type":"text/plain", "Accept-Encoding":"gzip", "Authorization": "token " + token } r4 = requests.get(requestUrl,headers=requestHeaders) if useAws: print ('Content response headers (AWS server): type: ' + r4.headers["Content-Type"] + '\n') #AWS does not set header Content-Encoding="gzip", so the requests call does not decompress. #We therefore decompress using a separate call (to the gzip library). uncompressedData = gzip.decompress(r4.content).decode("utf-8") #We save the original compressed data (to save space): fileName = filePath + fileNameRoot + ".step4.csv.gz" print ('Saving compressed data to file: ' + fileName + ' ... please be patient') else: print ('Content response headers (TRTH server): type: ' + r4.headers["Content-Type"] + ' - encoding: ' + r4.headers["Content-Encoding"] + '\n') #The requests call automatically decompresses the data, if header Content-Encoding="gzip". uncompressedData = r4.text #We save the uncompressed data (because it was automatically decompressed): fileName = filePath + fileNameRoot + ".step4.csv" print ('Saving uncompressed data to file: ' + fileName + ' ... please be patient') #Save to file: with open(fileName, 'wb') as fd: for chunk in r4.iter_content(chunk_size=1024): fd.write(chunk) fd.close print ('Finished saving data to file:' + fileName + '\n') #Display data: print ('Decompressed data:\n' + uncompressedData) #Note: variable uncompressedData stores all the data. #This is not a good practice, that can lead to issues with large data sets. #We only use it here as a convenience for the demo, to keep the code very simple. # In[10]: #Step 5: get the extraction results, using the received jobId. #We also save the compressed data to disk, as a GZIP. #We only display a few lines of the data. #IMPORTANT NOTE: #This code is much better than that of step 4; it should not fail even with large data sets. #If you need to manipulate the data, read and decompress the file, instead of decompressing #data from the server on the fly. #This is the recommended way to proceed, to avoid data loss issues. #For more information, see the related document: # Advisory: avoid incomplete output - decompress then download requestUrl = "https://hosted.datascopeapi.reuters.com/RestApi/v1/Extractions/RawExtractionResults" + "('" + jobId + "')" + "/$value" #AWS requires an additional header: X-Direct-Download if useAws: requestHeaders={ "Prefer":"respond-async", "Content-Type":"text/plain", "Accept-Encoding":"gzip", "X-Direct-Download":"true", "Authorization": "token " + token } else: requestHeaders={ "Prefer":"respond-async", "Content-Type":"text/plain", "Accept-Encoding":"gzip", "Authorization": "token " + token } r5 = requests.get(requestUrl,headers=requestHeaders,stream=True) #Ensure we do not automatically decompress the data on the fly: r5.raw.decode_content = False if useAws: print ('Content response headers (AWS server): type: ' + r5.headers["Content-Type"] + '\n') #AWS does not set header Content-Encoding="gzip". else: print ('Content response headers (TRTH server): type: ' + r5.headers["Content-Type"] + ' - encoding: ' + r5.headers["Content-Encoding"] + '\n') #Next 2 lines display some of the compressed data, but if you uncomment them save to file fails #print ('20 bytes of compressed data:') #print (r5.raw.read(20)) print ('Saving compressed data to file:' + fileName + ' ... please be patient') fileName = filePath + fileNameRoot + ".step5.csv.gz" chunk_size = 1024 rr = r5.raw with open(fileName, 'wb') as fd: shutil.copyfileobj(rr, fd, chunk_size) fd.close print ('Finished saving compressed data to file:' + fileName + '\n') #Now let us read and decompress the file we just created. #For the demo we limit the treatment to a few lines: maxLines = 10 print ('Read data from file, and decompress at most ' + str(maxLines) + ' lines of it:') uncompressedData = "" count = 0 with gzip.open(fileName, 'rb') as fd: for line in fd: dataLine = line.decode("utf-8") #Do something with the data: print (dataLine) uncompressedData = uncompressedData + dataLine count += 1 if count >= maxLines: break fd.close() #Note: variable uncompressedData stores all the data. #This is not a good practice, that can lead to issues with large data sets. #We only use it here as a convenience for the next step of the demo, to keep the code very simple. #In production one would handle the data line by line (as we do with the screen display) # In[11]: #Step 6 (cosmetic): formating the response received in step 4 or 5 using a panda dataframe from io import StringIO import pandas as pd timeSeries = pd.read_csv(StringIO(uncompressedData)) timeSeries # In[ ]: