requestUrl = "https://hosted.datascopeapi.reuters.com/RestApi/v1/Authentication/RequestToken" requestHeaders={ "X-Direct-Download": "true", "Prefer":"respond-async", "Content-Type":"application/json" } requestBody={ "Credentials": { "Username": xxx, "Password": "xxx" } } r1 = requests.post(requestUrl, json=requestBody,headers=requestHeaders) if r1.status_code == 200 : jsonResponse = json.loads(r1.text.encode('ascii', 'ignore')) token = jsonResponse["value"] print ('Authentication token (valid 24 hours):') print (token) else: print ('Replace myUserName and myPassword with valid credentials, then repeat the request') #Step 2: send an on demand extraction request using the received token requestUrl='https://hosted.datascopeapi.reuters.com/RestApi/v1/Extractions/ExtractRaw' requestHeaders={ "X-Direct-Download": "true", "Prefer":"respond-async", "Content-Type":"application/json", "Authorization": "token " + token } requestBody={ "ExtractionRequest": { "@odata.type": "#ThomsonReuters.Dss.Api.Extractions.ExtractionRequests.TickHistoryRawExtractionRequest", "IdentifierList": { "@odata.type": "#ThomsonReuters.Dss.Api.Extractions.ExtractionRequests.InstrumentIdentifierList", "InstrumentIdentifiers": generateChainRICs(sd,nc,bt,dTable), "ValidationOptions": { "AllowHistoricalInstruments": "true" } }, "Condition": { "MessageTimeStampIn": "LocalExchangeTime", #To be linked to Instrument's time zone - a mapping table needed #"ApplyCorrectionsAndCancellations": "false", "ReportDateRangeType": "Range", "QueryStartDate": sd+openString, #"T18:00:00.000Z", #To be user defined - linked to variables "QueryEndDate": ed+closeString, #"T17:00:00.000Z", #To be user defined - linked to variables "ExtractBy": "Ric", "SortBy": "SingleByRic", "DomainCode": "MarketPrice", "Fids" : fi, # "70", "DateRangeTimeZone" : "UTC", "DisplaySourceRIC": "true" } } } r2 = requests.post(requestUrl, json=requestBody,headers=requestHeaders) #Step 3: poll the status of the request using the received location URL. #Once the request has completed, retrieve the jobId and extraction notes. requestUrl = r2.headers["location"] requestHeaders={ "X-Direct-Download": "true", "Prefer":"respond-async", "Content-Type":"application/json", "Authorization":"token " + token } r3 = requests.get(requestUrl,headers=requestHeaders) while (r3.status_code == 202): print ('As we received a 202, we wait 900 seconds, then poll again (until we receive a 200)') time.sleep(900) r3 = requests.get(requestUrl,headers=requestHeaders) if r3.status_code == 200 : r3Json = json.loads(r3.text.encode('ascii', 'ignore')) jobId = r3Json["JobId"] if r3.status_code != 200 : print ('An error occured. Try to run this cell again. If it fails, re-run the previous cell.\n') #Step 5: get the extraction results, using the received jobId. #We also save the compressed data to disk, as a GZIP. #We only display a few lines of the data. #IMPORTANT NOTE: #This code is much better than that of step 4; it should not fail even with large data sets. #If you need to manipulate the data, read and decompress the file, instead of decompressing #data from the server on the fly. #This is the recommended way to proceed, to avoid data loss issues. #For more information, see the related document: # Advisory: avoid incomplete output - decompress then download requestUrl = "https://hosted.datascopeapi.reuters.com/RestApi/v1/Extractions/RawExtractionResults" + "('" + jobId + "')" + "/$value" print "Job ID" print jobId #AWS requires an additional header: X-Direct-Download if useAws: requestHeaders={ "Prefer":"respond-async", "Content-Type":"text/plain", "Accept-Encoding":"gzip", "X-Direct-Download":"true", "Authorization": "token " + token, 'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.143 Safari/537.36', "X-Client-Session-Id":"Direct AWS" } else: requestHeaders={ "Prefer":"respond-async", "Content-Type":"text/plain", "Accept-Encoding":"gzip", "Authorization": "token " + token, 'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.143 Safari/537.36', "X-Client-Session-Id":"From DSS" } #import pdb;pdb.set_trace() r5 = requests.get(requestUrl,headers=requestHeaders,stream=True) time.sleep(3) #Ensure we do not automatically decompress the data on the fly: r5.raw.decode_content = False if useAws: print ('Content response headers (AWS server): type: ' + r5.headers["Content-Type"] + '\n') #AWS does not set header Content-Encoding="gzip". else: print ('Content response headers (TRTH server): type: ' + r5.headers["Content-Type"] + ' - encoding: ' + r5.headers["Content-Encoding"] + '\n')