downloader.py
#Imports import pandas as pd import numpy as np import requests import time from datetime import date #Global variables hideoutput = False #Helper functions def getStockValues(symbol,name,apikey): #Load data from alphavantage json = requests.get(url='https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol='+symbol+'&outputsize=full&apikey='+apikey).json() if("Error Message" in json): outputError(json["Error Message"] + " This is likely due to the fact that the stock symbol was not found.") exit() if("Note" in json): outputError(json["Note"] + " This is likely due to the fact that you set premium key to True but did not supply a premium key.") exit() json = json["Time Series (Daily)"] df = pd.DataFrame.from_dict(json, orient='index') df.reset_index(level=0, inplace=True) #Rename columns and assign the right data types df.columns = ['Date', name+'Open',name+'High',name+'Low',name+'CloseNonAdjusted',name+'Close',name+'Volume',name+'Dividend',name+'Coefficient'] df['Date']=df['Date'].astype("datetime64") df[name+"Open"] = pd.to_numeric(df[name+"Open"]) df[name+"High"] = pd.to_numeric(df[name+"High"]) df[name+"Low"] = pd.to_numeric(df[name+"Low"]) df[name+"CloseNonAdjusted"] = pd.to_numeric(df[name+"CloseNonAdjusted"]) df[name+"Close"] = pd.to_numeric(df[name+"Close"]) df[name+"Volume"] = pd.to_numeric(df[name+"Volume"]) df[name+"Dividend"] = pd.to_numeric(df[name+"Dividend"]) df[name+"Coefficient"] = pd.to_numeric(df[name+"Coefficient"]) #Delete unnecessary columns df = df.drop(name+"CloseNonAdjusted", axis=1) return df def output(message): global hideoutput #Output status messages if(not hideoutput): print(message) def outputError(message): print("> Error: " + message) def download(stocklist,apikey,premiumkey,suppressoutput,startdate,enddate): #Set the output global hideoutput hideoutput = suppressoutput #Initialize variables fulldata = "" stockdata = "" counter = 0 if(premiumkey): sleeptime = 0 else: sleeptime = 15 key = apikey #Check if the arguments are valid if(not apikey): outputError("No apikey found.") exit() if(np.datetime64(startdate)>np.datetime64(enddate)): outputError("The enddate has to be later than the startdate.") exit() output("\nArguments passed") output("> Key: " + str(key)) output("> Stocks: ") output(stocklist) #Download the stock data output("\nDownloading new stock data") output("> This will take about " + str( round((len(stocklist) * sleeptime + len(stocklist * 5)) / 60))+ " minute(s)") #Loop through all stock symbols for index, row in stocklist.iterrows(): #Download new stock data and merge it with existing data output("> Downloading: "+str(row['Name'])+ " (" +str(counter+1) +" of "+ str(len(stocklist.index) )+ ")") stockdata = getStockValues(row['Symbol'],row['Name'],apikey) if(counter == 0): fulldata = stockdata else: fulldata = fulldata.merge(stockdata, on="Date", how="outer") counter = counter +1; output("> Merging: " + row["Name"]) if(not(counter == len(stocklist.index))): if(sleeptime > 0): output("> Sleeping for "+ str(sleeptime)+" seconds to not exceed the API limit.") time.sleep(sleeptime) #Filter for start and enddate output("> Filtering timeframe from " + startdate + " to "+ enddate) if(startdate): fulldata = fulldata[(fulldata['Date'] >= np.datetime64(startdate) )] if(enddate): fulldata = fulldata[(fulldata['Date'] <= np.datetime64(enddate) )] output("> All stocks were fetched.") return fulldata #Public functions def downloadFromDict(stockdict,apikey,premiumkey,suppressoutput,startdate,enddate): #Transform the dict try: stocklist = pd.DataFrame(stockdict.items(), columns=['Symbol', 'Name']) except: outputError("The dict was malformated. Please use the format {Symbol:Name,Symbol:Name}") exit() results = download(stocklist,apikey,premiumkey,suppressoutput,startdate,enddate) return results def downloadFromCSV(stockcsv,apikey,premiumkey,suppressoutput,startdate,enddate): #Transform the CSV try: stocklist = pd.read_csv(stockcsv) except: outputError("The csv was malformated. Please use the format \nSymbol,Name\nSymbol,Name") exit() results = download(stocklist,apikey,premiumkey,suppressoutput,startdate,enddate) return results