import requests import json import shutil import time class Credentials(object): requestUrl = "https://hosted.datascopeapi.reuters.com/RestApi/v1/Authentication/RequestToken" requestHeaders={ "Prefer":"respond-async", "Content-Type":"application/json" } requestBody={ "Credentials": { "Username": "myUserName", "Password": "myPassword" } } def __init__(self, uname,pwd): self.requestBody['Credentials']['Username'] = uname self.requestBody['Credentials']['Password'] = pwd usr = 'username' pwd = 'password' c = Credentials (usr,pwd) filePath = "C:/Users/kauffn/Downloads/trth/" fileNameRoot = "Python_Test" r1 = requests.post(c.requestUrl, json=c.requestBody,headers=c.requestHeaders) if r1.status_code == 200 : jsonResponse = json.loads(r1.text.encode('ascii', 'ignore')) token = jsonResponse["value"] print ('Authentication token (valid 24 hours):') print (token) else: print ('Please replace myUserName and myPassword with valid credentials, then repeat the request') # In[6]: #Step 2: send an on demand extraction request using the received token requestUrl='https://hosted.datascopeapi.reuters.com/RestApi/v1/StandardExtractions/Packages' requestHeaders={ "Prefer":"respond-async", "Content-Type":"application/json", "charset":"utf-8", "Authorization": "token " + token } r2 = requests.post(requestUrl, headers=requestHeaders) #r2 = requests.post(requestUrl,headers=requestHeaders) #Display the response status, and the location url to use to get the status of the extraction request #Initial response status (after approximately 30 seconds wait) will be 202 print (r2.status_code) #print (r2.headers["location"]) # In[7]: #Step 3: poll the status of the request using the received location URL, and get the jobId and extraction notes ''' #When the status of the request is 200 the extraction is complete, we display the jobId and the extraction notes print ('response status = ' + str(r3.status_code)) while (r3.status_code == 202): print ('As we received a 202, we wait 30 seconds, then poll again (until we receive a 200)') time.sleep(30) r3 = requests.get(requestUrl,headers=requestHeaders) print ('response status = ' + str(r3.status_code)) if r3.status_code == 200 : r3Json = json.loads(r3.text.encode('ascii', 'ignore')) jobId = r3Json["JobId"] print ('\njobId: ' + jobId + '\n') notes = r3Json["Notes"] print ('Extraction notes:\n' + notes[0]) if r3.status_code != 200 : print ('There was an error. Try running this cell again. If it fails, re-run the previous cell.\n') # In[9]: #Step 5: get the extraction results, using the received jobId, save the compressed data to disk #This is a variant of step 4, where we save the data in compressed format on disk, without displaying it requestUrl = "https://hosted.datascopeapi.reuters.com/RestApi/v1/Extractions/RawExtractionResults" + "('" + jobId + "')" + "/$value" requestHeaders={ "Prefer":"respond-async", "Content-Type":"text/plain", "Accept-Encoding":"gzip", "Authorization": "token " + token } r5 = requests.get(requestUrl,headers=requestHeaders,stream=True) #Ensure we do not automatically decompress the data on the fly: r5.raw.decode_content = False print ('Response headers for content: type: ' + r5.headers["Content-Type"] + ' - encoding: ' + r5.headers["Content-Encoding"] + '\n') #Next 2 lines display some of the compressed data, but if you uncomment them the save to file will fail #print ('100 bytes of compressed data:') #print (r5.raw.read(100)) fileName = filePath + fileNameRoot + ".csv.gz" chunk_size = 1024 rr = r5.raw with open(fileName, 'wb') as fd: shutil.copyfileobj(rr, fd, chunk_size) print ('Saved compressed data to file:' + fileName) '''