#!/usr/bin/env python ################################################## # Name: pyQ - Python Quote Grabber # Author: Rimon Barr # Start date: 10 January 2002 # Purpose: Retrieve stock quote data in Python # License: GPL 2.0 ################################################## # Activity log: # # 10/01/02 - Initial release # 14/10/02 - Yahoo changed url format # 31/10/02 - More convenient programmatic interface and local caching # 21/09/04 - Updated by Alberto Santini to accomodate Yahoo changes # 27/01/05 - Updated by Alberto Santini to accomodate Yahoo changes # 11/01/07 - Updated by Ehud Ben-Reuven, Historical currency exchnage tickers # (e.g. USDEUR=X) are retrieved from www.oanda.com import os, sys, re, traceback, getopt, urllib, string, anydbm, time Y2KCUTOFF=60 __version__ = "0.5" CACHE='stocks.db' DEBUG = 1 def showVersion(): print 'pyQ v'+__version__+', by Rimon Barr:' print 'Python Yahoo Quote fetching utility' def showUsage(): print showVersion() print print 'Usage: pyQ [-i] [start_date [end_date]] ticker [ticker...]' print ' rimdu -h | -v' print print ' -h, -?, --help display this help information' print ' -v, --version display version' print ' -i, --stdin tickers fed on stdin, one per line' print print ' date formats are yyyymmdd' print ' if enddate is omitted, it is assume to be the same as startdate' print ' if startdate is omitted, we use *current* stock tables' print ' and otherwise, use historical stock tables.' print ' (current stock tables will give previous close price before' print ' market closing time.)' print ' tickers are exactly what you would type at finance.yahoo.com' print ' output format: "ticker, date (yyyymmdd), open, high, low, close, vol"' print ' date formats are yyyymmdd' print ' tickers are exactly what you would type at finance.yahoo.com' print ' output format: ticker, date, open, high, low, close, volume' print ' Its possible to retrieve currency exchange rates from the historical tables only' print ' The yahoo ticker for an exchange rate is of the format USDEUR=X' print ' the output format is "ticker, date, exchange"' print print 'Send comments, suggestions and bug reports to .' print def usageError(): print 'rimdu: command syntax error' print 'Try `rimdu --help\' for more information.' def isInt(i): try: int(i) return 1 except: return 0 def splitLines(buf): lines=string.split(buf, '\n') lines=filter(lambda x:x, lines) def removeCarriage(s): if s[-1]=='\r': return s[:-1] else: return s lines=[removeCarriage(l) for l in lines] return lines def parseDate(d): '''convert yyyymmdd string to tuple (yyyy, mm, dd)''' return (d[:-4], d[-4:-2], d[-2:]) def yy2yyyy(yy): global Y2KCUTOFF; yy=int(yy) % 100 if yyint(d2): raise 'd1 must be smaller than d2' d1 = time.mktime(time.strptime(d1, '%Y%m%d')) d2 = time.mktime(time.strptime(d2, '%Y%m%d'))+1 dates = [] while d1 < d2: dates.append(time.strftime('%Y%m%d', time.localtime(d1))) d1 = d1 + DAYSECS return dates def aggDates(dates): '''Aggregate list of dates (yyyymmdd) in range pairs''' if not dates: return [] aggs = [] dates=[int(date) for date in dates] dates.sort() high=dates.pop(0) low=high for date in dates: if date==high+1: high=date else: aggs.append( (low, high) ) high=date; low=high aggs.append( (low, high) ) aggs = [ (str(low),str(high)) for (low, high) in aggs] return aggs def getRate(d1,d2,ticker): if not (len(ticker)==8 and ticker.endswith('=X')): raise Exception('Illegal FX rate ticker') cur1=ticker[0:3] cur2=ticker[3:6] def yyyymmdd2mmddyy(d): return d[4:6]+'%2F'+d[6:8]+'%2F'+d[2:4] def mmddyy2yyyymmdd(d): if len(d)!=10 or d[2]!='/' or d[5]!='/': raise Exception('Illegal date format') return d[6:10]+d[0:2]+d[3:5] d1=yyyymmdd2mmddyy(d1) d2=yyyymmdd2mmddyy(d2) url = 'http://www.oanda.com/convert/fxhistory' query = ( ('lang','en'), ('date1',d1), ('date',d2), ('date_fmt','us'), ('exch',cur1), ('exch2',''), ('expr',cur2), ('expr2',''), ('margin_fixed','0'), ('SUBMIT','Get+Table'), ('format','CSV'), ('redirected','1') ) query = map(lambda (var, val): '%s=%s' % (var, str(val)), query) query = string.join(query, '&') query1 = 'lang=en&date1=%s&date=%s&date_fmt=us&exch=%s&exch2=&expr=%s&expr2=&margin_fixed=0&&SUBMIT=Get+Table&format=CSV&redirected=1'%(d1,d2,cur1,cur2) page = urllib.urlopen(url+'?'+query).read().splitlines() table=False result=[] for l in page: if l.startswith('
'):
				table=True
				l=l[5:]
		elif l.startswith('
'): table=False if table: l=string.split(l, ',') l[0]=mmddyy2yyyymmdd(l[0]) l=[ticker]+l result.append(l) return result def getTicker(d1, d2, ticker): if len(ticker)==8 and ticker.endswith('=X'): return getRate(d1,d2,ticker) if DEBUG: print 'Quering Yahoo!... for %s (%s-%s)' % (ticker, d1, d2) d1=parseDate(d1) d2=parseDate(d2) url='http://ichart.finance.yahoo.com/table.csv' query = ( ('a', '%02d' % (int(d1[1])-1)), ('b', d1[2]), ('c', d1[0]), ('d', '%02d' % (int(d2[1])-1)), ('e', d2[2]), ('f', d2[0]), ('s', ticker), ('y', '0'), ('g', 'd'), ('ignore', '.csv'), ) query = map(lambda (var, val): '%s=%s' % (var, str(val)), query) query = string.join(query, '&') url=url+'?'+query f=urllib.urlopen(url) buf=f.read() lines=splitLines(buf) if re.match('no prices', lines[0], re.I): return lines=lines[1:len(lines)] result = [] def processLine(l, t=ticker): l=string.split(l, ',') l[0]=dd_mmm_yy2yyyymmdd(l[0]) l=[t]+l result.append(l) for l in lines: processLine(l) return result def getCachedTicker(d1, d2, ticker, forcefailed=0): '''Get tickers, hopefully from cache. d1, d2 = yyyymmdd starting and ending ticker = symbol string forcefailed = integer for cachebehaviour =0 : do not retry failed data points >0 : retry failed data points n times -1 : retry failed data points, reset retry count -2 : ignore cache entirely, refresh ALL data points''' dates = allDates(d1, d2) # get from cache data = {} db = anydbm.open(CACHE, 'c') for d in dates: try: data[ (d, ticker) ] = db[ `(d, ticker)` ] except KeyError: pass # forced failed if forcefailed: for k in data.keys(): if (forcefailed==-2 or (forcefailed==-1 and type(eval(data[k]))==type(0)) or eval(data[k]) < forcefailed): del data[k] # compute missing cached = [d for d,ticker in data.keys()] missing = [d for d in dates if d not in cached] for d1, d2 in aggDates(missing): try: tmp = getTicker(d1, d2, ticker) for t in tmp: _, d, datum = t[0], t[1], t[2:] data[ (d, ticker) ] = db[ `(d, ticker)` ] = `datum` except: pass # failed cached = [d for d,ticker in data.keys()] failed = [d for d in missing if d not in cached] for d in failed: try: times = eval(db[ `(d, ticker)` ]) except: times = 0 if forcefailed<0: times = 1 if times < forcefailed: times = times + 1 data [ (d, ticker) ] = db[ `(d, ticker)` ] = `times` # result result = [] for d in dates: datum = eval(data[(d,ticker)]) if type(datum) != type(0): result.append( [ticker, d] + datum ) return result def getTickers(d1, d2, tickers, forcefailed=0): '''Get tickers. d1, d2 = yyyymmdd starting and ending tickers = list of symbol strings forcefailed = integer for cachebehaviour =0 : do not retry failed data points >0 : retry failed data points n times -1 : retry failed data points, reset retry count -2 : ignore cache entirely, refresh ALL data points''' result = [] for t in tickers: result = result + getCachedTicker(d1, d2, t, forcefailed) return result def getTickersNowChunk(tickers): url='http://finance.yahoo.com/d/quotes.csv'; tickers=string.join(tickers) query={ 's':tickers, 'f':'sohgpv', 'e':'.csv' } url=url+'?'+urllib.urlencode(query) f=urllib.urlopen(url) buf=f.read() lines=splitLines(buf) result = [] def processLine(l): l=string.split(l, ',') l[0]=string.lower(l[0][1:-1]) t=time.localtime() l.insert(1, '%4d%02d%02d' % (t[0], t[1], t[2])) result.append(l) for l in lines: processLine(l) return result def getTickersNow(tickers): result = [] while tickers: result = result + getTickersNowChunk(tickers[:150]) tickers=tickers[150:] return result def main(): # parse options try: opts, args = getopt.getopt(sys.argv[1:], 'hv?i', ['help', 'version', 'stdin']) except getopt.GetoptError: usageError() return # process options stdin=0 for o, a in opts: if o in ("-h", "--help", "-?"): showUsage() return if o in ("-v", "--version"): showVersion() return if o in ("-i", "--stdin"): stdin=1 t=time.localtime() startdate='%4d%02d%02d' % (t[0], t[1], t[2]) enddate=startdate today=1 tickers=[] argpos=-1 for a in args: argpos=argpos+1 if argpos==0 and isInt(a): startdate=enddate=a today=0 continue if argpos==1 and isInt(a): enddate=a if a=='0': enddate='%4d%02d%02d' % (t[0], t[1], t[2]) continue tickers=tickers+[a] if stdin: tickers=tickers+splitLines(sys.stdin.read()) if not len(tickers): showUsage() return if today: result = getTickersNow(tickers) for l in result: print string.join(l, ',') else: result = getTickers(startdate, enddate, tickers) for l in result: print string.join(l, ',') try: if __name__=='__main__': main() except KeyboardInterrupt: traceback.print_exc() print 'Break!'