R Tools for FEC Campaign Finance Disclosure Data

UPDATE 10/18/2011:

Thanks to some of the comments, I was able to pare this down using R’s read.fwf() function. Here’s the new version.


# makeData_campaignFinance_v1_1.R -- copyright 10.18.2011, christopher compeau (email: my last name aht gmail dot com)
# thanks to the commentors on swordofcrom.wordpress.com for their help with read.fwf()

# use as you please but please attribute credit to christopher compeau if you publish anything
# the use of the FEC campaign finance data is subject to the rules on the FEC website
# have fun my babies. bonus points if you get yourself on some conrgessional campaign's shit list.

# this uses the 2011-2012 detailed discoloser data files at http://www.fec.gov/finance/disclosure/ftpdet.shtml
# still to be done: write tools for amended individual contributions files and other stuff as yet undiscovered.

# overpunch tool
overpunch = function(x) {
  # remove leading zeroes
  amount = sub("^0+","",x)
  sign = rep(1,length(x))
  changeChar = c(
    expression(sub("\\[$","0",amount)),
    expression(sub("\\]$","0",amount)),
    expression(sub("[{}]$","0",amount)),
    expression(sub("[AJ]$","1",amount)),
    expression(sub("[BK]$","2",amount)),
    expression(sub("[CL]$","3",amount)),
    expression(sub("[DM]$","4",amount)),
    expression(sub("[EN]$","5",amount)),
    expression(sub("[FO]$","6",amount)),
    expression(sub("[GP]$","7",amount)),
    expression(sub("[HQ]$","8",amount)),
    expression(sub("[IR]$","9",amount))
    )
  changes1 = grep("\\]$",amount)
  changes2 = grep("[JKLMNOPQR}]$",amount)
  sign[c(changes1,changes2)] = -1
  for (i in 1:length(changeChar)) {
    amount = eval(changeChar[i])
  }
  holder = as.numeric(sign) * as.numeric(amount)
  return(holder)  
}

# Committee Master File
writeLines(iconv(sub("\t","/t",readLines("~/Projects/campaign_finance/data/raw/committeeMaster_2011_2012.dta")),from="ASCII",to="UTF8"),"~/Projects/campaign_finance/data/preprocessed/committeeMaster_2011_2012_UTF8.dta")
cmteeMasterNames = c("cmID","cmNAME","treasurer","streetOne","streetTwo","cityTown","state","zip","cmDESIG","cmTYPE","cmPARTY","fileFreq","groupCategory","orgName","candidateID")
cmteeMaster = read.fwf("~/Projects/campaign_finance/data/preprocessed/committeeMaster_2011_2012_UTF8.dta",c(9,90,38,34,34,18,2,5,1,1,3,1,1,38,9),comment.char="",strip.white=TRUE,col.names=cmteeMasterNames)
  
# Candidate Master File
writeLines(iconv(sub("\t","/t",readLines("~/Projects/campaign_finance/data/raw/candidateMaster_2011_2012.dta")),from="ASCII",to="UTF8"),"~/Projects/campaign_finance/data/preprocessed/candidateMaster_2011_2012_UTF8.dta")
candMasterNames = c('cndID','cndName','partyDesig1','filler1','partyDesig3','seatStatus','filler2','candidateStatus','streetOne','streetTwo','cityTown','state','zip','principalCommID','electionYear','currentDistrict')
candMaster = read.fwf(file="~/Projects/campaign_finance/data/preprocessed/candidateMaster_2011_2012_UTF8.dta",c(9,38,3,3,3,1,1,1,34,34,18,2,5,9,2,2),comment.char="",strip.white=TRUE,col.names=candMasterNames)
  
# Individual Contributions
writeLines(iconv(sub("\t","/t",readLines("~/Projects/campaign_finance/data/raw/individualContributions_2011_2012.dta")),from="ASCII",to="UTF8"),"~/Projects/campaign_finance/data/preprocessed/individualContributions_2011_2012_UTF8.dta")
individualNames = c('filerID','amendIndicator','reportType','primaryGeneral','microfilmLocation','transactionType','contributorName','cityTown','state','zip','occupation','month','transactionDay','transactionCentury','transactionYear','amount','otherID','fecRecord')
individual = read.fwf(file="~/Projects/campaign_finance/data/preprocessed/individualContributions_2011_2012_UTF8.dta",c(9,1,3,1,11,3,34,18,2,5,35,2,2,2,2,7,9,7),comment.char="",strip.white=TRUE,col.names=individualNames)
individual$amount = overpunch(individual$amount)
  
# Contributions from Committees
writeLines(iconv(sub("\t","/t",readLines("~/Projects/campaign_finance/data/raw/candidatesFromCommittees_2011_2012.dta")),from="ASCII",to="UTF8"),"~/Projects/campaign_finance/data/preprocessed/candidatesFromCommittees_2011_2012_UTF8.dta")
candFromCommitteesNames = c('filerID','amendIndicator','reportType','primaryGeneral','microfilmLocation','transactionType','transactionMonth','transactionDay','transactionCentury','transactionYear','amount','otherID','candidateID','fecRecord')
candFromCommittees = read.fwf(file="~/Projects/campaign_finance/data/preprocessed/candidatesFromCommittees_2011_2012_UTF8.dta",c(9,1,3,1,11,3,2,2,2,2,7,9,9,7),comment.char="", strip.white=TRUE, col.names=candFromCommitteesNames)
candFromCommittees$amount = overpunch(candFromCommittees$amount)

# Transaction from committee to another
writeLines(iconv(sub("\t","/t",readLines("~/Projects/campaign_finance/data/raw/committeeToCommittee_2011_2012.dta")),from="ASCII",to="UTF8"),"~/Projects/campaign_finance/data/preprocessed/committeeToCommittee_2011_2012_UTF8.dta")
commToCommNames = c('filerID','amendIndicator','reportType','primaryGeneral','microfilmLocation','transactionType','contributorName','cityTown','state','zip','occupation','month','transactionDay','transactionCentury','transactionYear','amount','otherID','fecRecord')
commToComm = read.fwf(file="~/Projects/campaign_finance/data/preprocessed/committeeToCommittee_2011_2012_UTF8.dta",c(9,1,3,1,11,3,34,18,2,5,35,2,2,2,2,7,9,7),comment.char="", strip.white=TRUE, col.names=commToCommNames)
commToComm$amount = overpunch(commToComm$amount)

ORIGINAL POST 10/17/2011:

For my first contribution to the blog, I wanted to make some kind of enlightening visualization of campaign finance disclosure data from the Federal Election Commission’s website. It looks like they’re working on some new, easy-to-use data dumps here, but I decided to try to use the more detailed data files here┬ábecause I couldn’t really tell the difference between the two data pages, and as a rule I always of for the most granular unaggregated data when I have a choice.

Anyway, the FEC dumps the data in some weird fixed-width COBOL format that kept me from using any of the read.delim functions to get the data into R, so I had to write a bunch of little parsing functions for each data file. I spent all day yesterday on these little helpers and I haven’t yet had the opportunity to do anything interesting with the data, so I decided that I would just post the code and work on some visualizations later this week.

So in summary, this code makes each of the FEC data dump file into R data frames:

  • Committee Master File: cmteeMaster
  • Candidate Master File: candMaster
  • Individual Contributions: individuals
  • Contributions to Candidates from Committees: candFromCommittees
  • Transactions between Committees: commToComm
This data is DIRTY, and it still needs a lot of work… this code just gets it into data frames. More to come.

# makeData_campaignFinance_v1_0.R -- copyright 10.17.2011, christopher compeau (email: my last name aht gmail dot com)

# use as you please but please attribute credit to christopher compeau if you publish anything
# the use of the FEC campaign finance data is subject to the rules on the FEC website
# have fun my babies. bonus points if you get yourself on some conrgessional campaign's shit list.

# this uses the 2011-2012 detailed discoloser data files at http://www.fec.gov/finance/disclosure/ftpdet.shtml
# still to be done: write tools for amended individual contributions files and other stuff as yet undiscovered.  

# RAW DATA FILE PARSING TOOLS

trim.trailing <- function (x) {sub("\\s+$", "", x)}

# committee master file
cmMaster = function(line) {
  cmID = substr(line,1,9)
  cmNAME = substr(line,10,99)
  treasurer = substr(line,100,137)
  streetOne = substr(line,138,171)
  streetTwo = substr(line,172,205)
  cityTown = substr(line,206,223)
  state = substr(line,224,225)
  zip = substr(line,226,230)
  cmDESIG = substr(line,231,231)
  cmTYPE = substr(line,232,232)
  cmPARTY = substr(line,233,235)
  fileFreq = substr(line,236,236)
  groupCategory = substr(line,237,237)
  orgName = substr(line,238,275)
  candidateID = substr(line,276,284)
  record = c(cmID,cmNAME,treasurer,streetOne,streetTwo,cityTown,state,zip,cmDESIG,cmTYPE,cmPARTY,fileFreq,groupCategory,orgName,candidateID)
  for (i in 1:length(record)) {
    record[i] = trim.trailing(record[i])
  }
  return(record)
}


# candidate master file
candMaster = function(line) {
  cndID = substr(line,1,9) 
  cndName = substr(line,10,47)
  partyDesig1 = substr(line,48,50)
  filler1 = substr(line,51,53)
  partyDesig3 = substr(line,54,56)
  seatStatus = substr(line,57,57)
  filler2 = substr(line,58,58)
  candidateStatus = substr(line,59,59)
  streetOne = substr(line,60,93)
  streetTwo = substr(line,94,127)
  cityTown = substr(line,128,145)
  state = substr(line,146,147)
  zip = substr(line,148,152)
  principalCommID = substr(line,153,161)
  electionYear = substr(line,162,163)
  currentDistrict = substr(line,164,165)
  record = c(cndID,cndName,partyDesig1,filler1,seatStatus,filler2,candidateStatus,streetOne,streetTwo,cityTown,state,zip,principalCommID,electionYear,currentDistrict)
  for (i in 1:length(record)) {
    record[i] = trim.trailing(record[i])
  }
  return(record)
}

# indivudual candidate contributions, committee to committe transactions
indAndComContribution = function(line) {
  filerID = substr(line,1,9)
  amendIndicator = substr(line,10,10)
  reportType = substr(line,11,13)
  primaryGeneral = substr(line,14,14)
  microfilmLocation = substr(line,15,25)
  transactionType = substr(line,26,28)  
  contributorName = substr(line,29,62)
  cityTown = substr(line,63,80)
  state = substr(line,81,82)
  zip = substr(line,83,87)
  occupation = substr(line,88,122)
  month = substr(line,123,124)
  transactionDay = substr(line,125,126)
  transactionCentury = substr(line,127,128)
  transactionYear = substr(line,129,130)
  amount = substr(line,131,137)
  otherID = substr(line,138,146)
  fecRecord = substr(line,147,153)
  record = c(filerID,amendIndicator,reportType,primaryGeneral,microfilmLocation,transactionType,contributorName,cityTown,state,zip,occupation,month,transactionDay,transactionCentury,transactionYear,amount,otherID,fecRecord)
  for (i in 1:length(record)) {
    record[i] = trim.trailing(record[i])
  }
  return(record)
}

# contributions to candidate from committees
candComContibution = function(line) {
  filerID = substr(line,1,9)
  amendIndicator = substr(line,10,10)
  reportType = substr(line,11,13)
  primaryGeneral = substr(line,14,14)
  microfilmLocation = substr(line,15,25)
  transactionType = substr(line,26,28)
  transactionMonth = substr(line,29,30)
  transactionDay = substr(line,31,32)
  transactionCentury = substr(line,33,34)
  transactionYear = substr(line,35,36)
  amount = substr(line,37,43)
  otherID = substr(line,44,52)
  candidateID = substr(line,53,61)
  fecRecord = substr(line,62,68)
  record = c(filerID,amendIndicator,reportType,primaryGeneral,microfilmLocation,transactionType,transactionMonth,transactionDay,transactionCentury,transactionYear,amount,otherID,candidateID,fecRecord)
  for (i in 1:length(record)) {
    record[i] = trim.trailing(record[i])
  }
  return(record)
}


# overpunch tool
overpunch = function(x) {
  # remove leading zeroes
  amount = sub("^0+","",x)
  sign = rep(1,length(x))
  changeChar = c(
    expression(sub("\\[$","0",amount)),
    expression(sub("\\]$","0",amount)),
    expression(sub("[{}]$","0",amount)),
    expression(sub("[AJ]$","1",amount)),
    expression(sub("[BK]$","2",amount)),
    expression(sub("[CL]$","3",amount)),
    expression(sub("[DM]$","4",amount)),
    expression(sub("[EN]$","5",amount)),
    expression(sub("[FO]$","6",amount)),
    expression(sub("[GP]$","7",amount)),
    expression(sub("[HQ]$","8",amount)),
    expression(sub("[IR]$","9",amount))
    )
  changes1 = grep("\\]$",amount)
  changes2 = grep("[JKLMNOPQR}]$",amount)
  sign[c(changes1,changes2)] = -1
  for (i in 1:length(changeChar)) {
    amount = eval(changeChar[i])
  }
  holder = as.numeric(sign) * as.numeric(amount)
  return(holder)  
}

# function using parsing tools to make data frames
# 'expsn' is an unevaluated expression for each parsing tool
# some raw data records are not the length stated in data docs
mkDataFrame = function(data,lineLength,columnNames,expsn) {
  properData = data[nchar(data, allowNA=TRUE)==lineLength]
  nRecords = length(properData)
  finalMatrix = matrix(nrow=length(properData),ncol=length(columnNames))
  for (i in 1:nRecords) { 
    result = eval(expsn)                   
    finalMatrix[i,] = result
  }
  finalDF = as.data.frame(finalMatrix)
  names(finalDF) = columnNames
  return(finalDF)
}

# Now use parsing tools to read data into dataframes    
    
# Committee Master File
cmteeMasterRaw = read.delim(file="~/Projects/campaign_finance/data/committeeMaster_2011_2012.dta", header=FALSE, sep="\n")
cmteeMasterRaw = as.character(cmteeMasterRaw[,1])
cmteeMasterNames = c("cmID","cmNAME","treasurer","streetOne","streetTwo","cityTown","state","zip","cmDESIG","cmTYPE","cmPARTY","fileFreq","groupCategory","orgName","candidateID")
cmteeMaster = mkDataFrame(cmteeMasterRaw,284,cmteeMasterNames,expression(cmMaster(properData[i])))  
  
# Candidate Master File
candMasterRaw = read.delim(file="~/Projects/campaign_finance/data/candidateMaster_2011_2012.dta", header=FALSE, sep="\n")
candMasterRaw = as.character(candMasterRaw[,1])
candMasterNames = c('cndID','cndName','partyDesig1','filler1','seatStatus','filler2','candidateStatus','streetOne','streetTwo','cityTown','state','zip','principalCommID','electionYear','currentDistrict')
candMaster = mkDataFrame(candMasterRaw,165,candMasterNames,expression(candMaster(properData[i])))  
  
# Individual Contributions
individualRaw = read.delim(file="~/Projects/campaign_finance/data/individualContributions_2011_2012.dta", header=FALSE,sep="\n")
individualRaw = as.character(individualRaw[,1])
individualNames = c('filerID','amendIndicator','reportType','primaryGeneral','microfilmLocation','transactionType','contributorName','cityTown','state','zip','occupation','month','transactionDay','transactionCentury','transactionYear','amount','otherID','fecRecord')
individuals = mkDataFrame(individualRaw,153,individualNames,expression(indAndComContribution(properData[i])))
individuals$amount = overpunch(individuals$amount)
  
# Contributions from Committees
candFromCommitteesRaw = read.delim(file="~/Projects/campaign_finance/data/candidatesFromCommittees_2011_2012.dta", header=FALSE, sep="\n")
candFromCommitteesRaw = as.character(candFromCommitteesRaw[,1])
candFromCommitteesNames = c('filerID','amendIndicator','reportType','primaryGeneral','microfilmLocation','transactionType','transactionMonth','transactionDay','transactionCentury','transactionYear','amount','otherID','candidateID','fecRecord')
candFromCommittees = mkDataFrame(candFromCommitteesRaw,68,candFromCommitteesNames,expression(candComContibution(properData[i])))
candFromCommittees$amount = overpunch(candFromCommittees$amount)

# Transaction from committee to another
commToCommRaw = read.delim(file="~/Projects/campaign_finance/data/comitteeToCommittee_2011_2012.dta", header=FALSE, sep="\n")
commToCommRaw = as.character(commToCommRaw[,1])
commToCommNames = c('filerID','amendIndicator','reportType','primaryGeneral','microfilmLocation','transactionType','contributorName','cityTown','state','zip','occupation','month','transactionDay','transactionCentury','transactionYear','amount','otherID','fecRecord')
commToComm = mkDataFrame(commToCommRaw,153,commToCommNames,expression(indAndComContribution(properData[i])))
commToComm$amount = overpunch(commToComm$amount)