[R] Help avoiding setting column type two times
    Martin Møller Skarbiniks Pedersen 
    traxplayer at gmail.com
       
    Thu Nov 30 15:27:55 CET 2017
    
    
  
Hi,
  I think and hope this a good place to ask for code review for a R
beginners?
  I have made a R script which generates a dataset based on 2009 danish
referendum and it does work.
  But I think the code could be better and I would any comments how the
code can be improved.
  At least I would like to know how I avoid converting several of the
columns to factors in the end of the code?
Description of the code:
  It reads a lot of xml-files from ../raw/ and saves a data.frame with
information
from these xml-files.
  In the ../raw/ directiory I have placed the xml-files which I got from
"Statistics Denmark"
  I have also put these xml-files on my website and they can be download
freely from http://20dage.dk/R/referendum-2009/raw.tar.gz
  The code is below but I have also put the code at this place:
http://20dage.dk/R/referendum-2009/convert_from_xml.R
Best Regards
Martin M. S. Pedersen
-------
library(xml2)
convert_one_file <- function(url) {
    x <- read_xml(url)
    Sted <- xml_find_first(x, ".//Sted")
    StedType <- xml_attr(Sted, "Type")
    StedTekst <- xml_text(Sted)
    Parti <- xml_find_all(x, ".//Parti")
    PartiId <- xml_attr(Parti, "Id")
    PartiBogstav <- xml_attr(Parti, "Bogstav")
    PartiNavn <- xml_attr(Parti, "Navn")
    StemmerAntal <- xml_attr(Parti, "StemmerAntal")
    Stemmeberettigede <- xml_integer(xml_find_first(x,
".//Stemmeberettigede"))
    DeltagelsePct <- xml_double(xml_find_first(x, ".//DeltagelsePct"))
    IAltGyldigeStemmer <- xml_integer(xml_find_first(x,
".//IAltGyldigeStemmer"))
    BlankeStemmer <- xml_integer(xml_find_first(x, ".//BlankeStemmer"))
    AndreUgyldigeStemmer <- xml_integer(xml_find_first(x,
".//AndreUgyldigeStemmer"))
    data.frame(cbind(StedType, StedTekst, PartiId, PartiBogstav, PartiNavn,
                 StemmerAntal, Stemmeberettigede, DeltagelsePct,
IAltGyldigeStemmer,
       BlankeStemmer, AndreUgyldigeStemmer), stringsAsFactors = FALSE)
}
raw_path <- "../raw"
filenames <- dir(path = raw_path, pattern = "fintal_.*", full.names = T)
result <- data.frame(StedType = factor(),
                     StedTekst = character(),
                     PartiId   = factor(),
                     PartiBogstav = factor(),
                     PartiNavn    = factor(),
                     StemmerAntal = integer(),
                     Stemmeberettigede = integer(),
                     DeltagelsePct = numeric(),
                     IAltGyldigeStemmer = integer(),
                     BlankeStemmer = integer(),
                     AndreUgyldigeStemmer = integer(),
                     stringsAsFactors = FALSE)
for (i in 1:length(filenames)) {
    #cat(paste0(filenames[i],"\n"))
    returnCode <-  tryCatch({
       result <- rbind(result, convert_one_file(filenames[i]))
    }, error = function(e) {
       cat(paste0(filenames[i]," failed:\n",e,"\n"))
    })
}
result$StedType <- as.factor(result$StedType)
result$PartiId <- as.factor(result$PartiId)
result$PartiBogstav <- as.factor(result$PartiBogstav)
result$PartiNavn <- as.factor(result$PartiNavn)
result$StemmerAntal <- as.integer(result$StemmerAntal)
result$Stemmeberettigede <- as.integer(result$Stemmeberettigede)
result$DeltagelsePct <- as.numeric(result$DeltagelsePct)
result$IAltGyldigeStemmer <- as.integer(result$IAltGyldigeStemmer)
result$BlankeStemmer <- as.integer(result$BlankeStemmer)
result$AndreUgyldigeStemmer <- as.integer(result$AndreUgyldigeStemmer)
str(result)
save(result, file = "folkeafstemning2009.Rdata")
	[[alternative HTML version deleted]]
    
    
More information about the R-help
mailing list