SVD congress 2012

1664 days ago by kcrisman

This is a worksheet from Marshall Hampton at the University of Minnesota, Duluth.

import csv, urllib2 # get the data U = urllib2.urlopen('ftp://voteview.com/dtaord/hou112kh.ord') f = file(DATA+'hou112kh.ord','w') f.write(U.read()) f.close() 
       
# read the data into a list with open(DATA + 'hou112kh.ord', 'rb') as csvfile: reader = csv.reader(csvfile, delimiter=',') vote_data = [row for row in reader] 
       
# parse the data cdict = {} for q in d: qq = q[0] name = qq[25:36].strip() party = qq[20:23] state = qq[12:19] vdata = vector([int(x) for x in qq[36:].strip()]) cdict[name] = [vdata,party,state] 
       
# convert votes to numbers def modv(v): q = [] for i in range(len(v)): s = v[i] if s in [1,2,3]: q.append(1.0) elif s in [4,5,6]: q.append(-1.0) else: q.append(0.0) q = vector(q) return q sks = sorted(cdict.keys()) vmat = [modv(cdict[q][0]) for q in sks] # throw out "votes" where no one voted # these are simple roll calls or something? nzcols = [] for j in range(1608): v = vector([vmat[i][j] for i in range(402)]) if norm(v) > 1.0: nzcols.append(v) # subtract the mean vector nzm = mean(nzcols) for i in range(len(nzcols)): nzcols[i] += -nzm # compute the SVD smat = matrix(RDF,nzcols) u,s,v = smat.SVD() # matrix to project onto first two vectors uproj = matrix([u[0],-u[1]]) 
       
# projected points prepts = [uproj*smat.transpose()[i] for i in range(402)] # convert to Graphics objects (points) and color them pts = Graphics() names = Graphics() fsize = 7 # fontsize, you might want to adjust this for i,k in enumerate(sks): coords = uproj*smat.transpose()[i] nc = [coords[0],coords[1]-1] if cdict[k][1]=='100': pts = pts + point(coords,rgbcolor='blue') names = names + text(k,nc, rgbcolor='blue', fontsize=fsize) elif cdict[k][1]=='200': pts = pts + point(coords,rgbcolor='red') names = names + text(k,nc, rgbcolor='red', fontsize=fsize) else: pts = pts + point(coords,rgbcolor='black') names = names + text(k,nc, rgbcolor='black', fontsize=fsize) show(pts+names, axes=False)