[R] merging two dataframes
    dividend 
    stapnes at gmail.com
       
    Wed Oct 26 12:59:02 CEST 2011
    
    
  
Hello.
Now i tried to do what you told me.
I used the str(fuction), and data$date1 and data3$date1 where both listed
"character". I changed "name" to character but it did not work either.
I also changed all variables to character, with no positive result.
str(data)
'data.frame':   14446 obs. of  15 variables:
 $ id     : chr  "1" "1" "1" "1" ...
 $ compid : chr  "2514" "2514" "2514" "2514" ...
 $ secid  : chr  "15856" "15856" "15856" "15856" ...
 $ name   : chr  "A-pressen" "A-pressen" "A-pressen" "A-pressen" ...
 $ period : chr  "1" "2" "3" "4" ...
 $ date   : chr  "17.05.1980" "17.05.1981" "17.05.1982" "17.05.1983" ...
 $ enddate: chr  "17.05.1981" "17.05.1982" "17.05.1983" "17.05.1984" ...
 $ div    : chr  NA NA NA NA ...
 $ ndivs  : chr  NA NA NA NA ...
 $ posdiv : chr  NA NA NA NA ...
 $ ddiv2  : chr  NA NA NA NA ...
 $ ddiv3  : chr  NA NA NA NA ...
 $ ddiv4  : chr  NA NA NA NA ...
 $ ddiv5  : chr  NA NA NA NA ...
 $ ddiv6  : chr  NA NA NA NA ...
str(data3)
'data.frame':   812354 obs. of  9 variables:
 $ date                  : chr  "02.01.1996" "03.01.1996" "04.01.1996"
"05.01.1996" ...
 $ Securityid            : chr  "6001" "6001" "6001" "6001" ...
 $ Symbol                : chr  "AAV" "AAV" "AAV" "AAV" ...
 $ name                  : chr  "Adresseavisen" "Adresseavisen"
"Adresseavisen" "Adresseavisen" ...
 $ Securitytype          : chr  "Ordinary Shares" "Ordinary Shares"
"Ordinary Shares" "Ordinary Shares" ...
 $ Unadjusted            : chr  "200" "200" "200" "200" ...
 $ Event.adjusted        : chr  "200" "200" "200" "200" ...
 $ Div.and.Event.adjusted: chr  "109,7595375" "109,7595375" "109,7595375"
"109,7595375" ...
 $ Sharesissued          : chr  "1901646" "1901646" "1901646" "1901646" ...
Here is some suitable data for "data"
> dput(data[1:20,])
structure(list(id = c("1", "1", "1", "1", "1", "1", "1", "1", 
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"), 
    compid = c("2514", "2514", "2514", "2514", "2514", "2514", 
    "2514", "2514", "2514", "2514", "2514", "2514", "2514", "2514", 
    "2514", "2514", "2514", "2514", "2514", "2514"), secid = c("15856", 
    "15856", "15856", "15856", "15856", "15856", "15856", "15856", 
    "15856", "15856", "15856", "15856", "15856", "15856", "15856", 
    "15856", "15856", "15856", "15856", "15856"), name = c("A-pressen", 
    "A-pressen", "A-pressen", "A-pressen", "A-pressen", "A-pressen", 
    "A-pressen", "A-pressen", "A-pressen", "A-pressen", "A-pressen", 
    "A-pressen", "A-pressen", "A-pressen", "A-pressen", "A-pressen", 
    "A-pressen", "A-pressen", "A-pressen", "A-pressen"), period = c("1", 
    "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", 
    "13", "14", "15", "16", "17", "18", "19", "20"), date = c("17.05.1980", 
    "17.05.1981", "17.05.1982", "17.05.1983", "17.05.1984", "17.05.1985", 
    "17.05.1986", "17.05.1987", "17.05.1988", "17.05.1989", "17.05.1990", 
    "17.05.1991", "17.05.1992", "17.05.1993", "17.05.1994", "17.05.1995", 
    "17.05.1996", "17.05.1997", "17.05.1998", "17.05.1999"), 
    enddate = c("17.05.1981", "17.05.1982", "17.05.1983", "17.05.1984", 
    "17.05.1985", "17.05.1986", "17.05.1987", "17.05.1988", "17.05.1989", 
    "17.05.1990", "17.05.1991", "17.05.1992", "17.05.1993", "17.05.1994", 
    "17.05.1995", "17.05.1996", "17.05.1997", "17.05.1998", "17.05.1999", 
    "17.05.2000"), div = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    "0", "0", "0", "0", "0", "5", "0", "1.1", "1.2", "1", "0"
    ), ndivs = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, "0", "0", 
    "0", "0", "0", "1", "0", "1", "1", "1", "0"), posdiv = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "1", 
    NA, "1", "1", "1", NA), ddiv2 = c(NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, "0", "0", "0", "0", "0", NA, "0", "1", NA, 
    NA), ddiv3 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    "0", "0", "0", "0", "0", "0", "0", "0", "-1"), ddiv4 = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "0", "0", "0", 
    "0", "0", "0", "0", "0"), ddiv5 = c(NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, "0", "0", "0", "0", "0", "0", 
    "0"), ddiv6 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, "0", "0", "0", "0", "0", "0")), .Names = c("id", 
"compid", "secid", "name", "period", "date", "enddate", "div", 
"ndivs", "posdiv", "ddiv2", "ddiv3", "ddiv4", "ddiv5", "ddiv6"
), row.names = c(NA, 20L), class = "data.frame")
Here is some suitable data for "data3":
> dput(data3[1:20,])
structure(list(date = c("02.01.1996", "03.01.1996", "04.01.1996", 
"05.01.1996", "08.01.1996", "09.01.1996", "10.01.1996", "11.01.1996", 
"12.01.1996", "15.01.1996", "16.01.1996", "17.01.1996", "18.01.1996", 
"19.01.1996", "22.01.1996", "23.01.1996", "24.01.1996", "25.01.1996", 
"26.01.1996", "29.01.1996"), Securityid = c("6001", "6001", "6001", 
"6001", "6001", "6001", "6001", "6001", "6001", "6001", "6001", 
"6001", "6001", "6001", "6001", "6001", "6001", "6001", "6001", 
"6001"), Symbol = c("AAV", "AAV", "AAV", "AAV", "AAV", "AAV", 
"AAV", "AAV", "AAV", "AAV", "AAV", "AAV", "AAV", "AAV", "AAV", 
"AAV", "AAV", "AAV", "AAV", "AAV"), name = c("Adresseavisen", 
"Adresseavisen", "Adresseavisen", "Adresseavisen", "Adresseavisen", 
"Adresseavisen", "Adresseavisen", "Adresseavisen", "Adresseavisen", 
"Adresseavisen", "Adresseavisen", "Adresseavisen", "Adresseavisen", 
"Adresseavisen", "Adresseavisen", "Adresseavisen", "Adresseavisen", 
"Adresseavisen", "Adresseavisen", "Adresseavisen"), Securitytype =
c("Ordinary Shares", 
"Ordinary Shares", "Ordinary Shares", "Ordinary Shares", "Ordinary Shares", 
"Ordinary Shares", "Ordinary Shares", "Ordinary Shares", "Ordinary Shares", 
"Ordinary Shares", "Ordinary Shares", "Ordinary Shares", "Ordinary Shares", 
"Ordinary Shares", "Ordinary Shares", "Ordinary Shares", "Ordinary Shares", 
"Ordinary Shares", "Ordinary Shares", "Ordinary Shares"), Unadjusted =
c("200", 
"200", "200", "200", "200", "200", "200", "200", "200", "200", 
"200", "200", "200", "200", "200", "200", "200", "200", "200", 
"200"), Event.adjusted = c("200", "200", "200", "200", "200", 
"200", "200", "200", "200", "200", "200", "200", "200", "200", 
"200", "200", "200", "200", "200", "200"), Div.and.Event.adjusted =
c("109,7595375", 
"109,7595375", "109,7595375", "109,7595375", "109,7595375", "109,7595375", 
"109,7595375", "109,7595375", "109,7595375", "109,7595375", "109,7595375", 
"109,7595375", "109,7595375", "109,7595375", "109,7595375", "109,7595375", 
"109,7595375", "109,7595375", "109,7595375", "109,7595375"), 
    Sharesissued = c("1901646", "1901646", "1901646", "1901646", 
    "1901646", "1901646", "1901646", "1901646", "1901646", "1901646", 
    "1901646", "1901646", "1901646", "1901646", "1901646", "1901646", 
    "1901646", "1901646", "1901646", "1901646")), .Names = c("date", 
"Securityid", "Symbol", "name", "Securitytype", "Unadjusted", 
"Event.adjusted", "Div.and.Event.adjusted", "Sharesissued"), row.names =
c(NA, 
20L), class = "data.frame")
When I run the function:
 
data4<-merge(data,data3, by=c("name","date1"), all=T)
> dput(data4[1:20,])
structure(list(name = c("A-pressen", "A-pressen", "A-pressen", 
"A-pressen", "A-pressen", "A-pressen", "A-pressen", "A-pressen", 
"A-pressen", "A-pressen", "A-pressen", "A-pressen", "A-pressen", 
"A-pressen", "A-pressen", "A-pressen", "A-pressen", "A-pressen", 
"A-pressen", "A-pressen"), date = c("01.02.1999", "01.02.2000", 
"01.02.2001", "01.02.2002", "01.03.1999", "01.03.2000", "01.03.2001", 
"01.03.2002", "01.04.2003", "01.06.1999", "01.06.2001", "01.07.1999", 
"01.07.2002", "01.07.2003", "01.08.2000", "01.08.2001", "01.08.2002", 
"01.08.2003", "01.09.1999", "01.09.2000"), id = c(NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_), 
    compid = c(NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_
    ), secid = c(NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_), period = c(NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_), enddate = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_), div = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_), ndivs = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_), posdiv = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_), ddiv2 = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_), ddiv3 = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_), ddiv4 = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_), ddiv5 = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_), ddiv6 = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_), Securityid = c("15856", 
    "15856", "15856", "15856", "15856", "15856", "15856", "15856", 
    "15856", "15856", "15856", "15856", "15856", "15856", "15856", 
    "15856", "15856", "15856", "15856", "15856"), Symbol = c("APR", 
    "APR", "APR", "APR", "APR", "APR", "APR", "APR", "APR", "APR", 
    "APR", "APR", "APR", "APR", "APR", "APR", "APR", "APR", "APR", 
    "APR"), Securitytype = c("Ordinary Shares", "Ordinary Shares", 
    "Ordinary Shares", "Ordinary Shares", "Ordinary Shares", 
    "Ordinary Shares", "Ordinary Shares", "Ordinary Shares", 
    "Ordinary Shares", "Ordinary Shares", "Ordinary Shares", 
    "Ordinary Shares", "Ordinary Shares", "Ordinary Shares", 
    "Ordinary Shares", "Ordinary Shares", "Ordinary Shares", 
    "Ordinary Shares", "Ordinary Shares", "Ordinary Shares"), 
    Unadjusted = c("120", "140", "160", "105", "110", "190", 
    "160", "112", "115", "120", "150", "127", "106,5", "154", 
    "155", "160", "111", "155", "127", "205"), Event.adjusted = c("120", 
    "140", "160", "105", "110", "190", "160", "112", "115", "120", 
    "150", "127", "106,5", "154", "155", "160", "111", "155", 
    "127", "205"), Div.and.Event.adjusted = c("111,4092308", 
    "129,9774359", "148,545641", "100,1538462", "102,1251282", 
    "176,3979487", "148,545641", "106,8307692", "109,6923077", 
    "111,4092308", "143,0769231", "117,9081026", "101,5846154", 
    "154", "143,9035897", "152,6153846", "105,8769231", "155", 
    "117,9081026", "190,3241026"), Sharesissued = c("8839643", 
    "8839643", "8854307", "8866191", "8839643", "8839643", "8854307", 
    "8867791", "8885537", "8839643", "8866191", "8839643", "8885537", 
    "8903842", "8854307", "8866191", "8885537", "8903842", "8839643", 
    "8854307")), .Names = c("name", "date", "id", "compid", "secid", 
"period", "enddate", "div", "ndivs", "posdiv", "ddiv2", "ddiv3", 
"ddiv4", "ddiv5", "ddiv6", "Securityid", "Symbol", "Securitytype", 
"Unadjusted", "Event.adjusted", "Div.and.Event.adjusted", "Sharesissued"
), row.names = c(NA, 20L), class = "data.frame")
If I want to just keep the observations from "data" (14446 observations)
merged with "data3". Is it correct to use:
data4<-merge(data,data3, by=c("name","date1"), all=T)
or should I use:
data4<-merge(data,data3, by=c("name","date1"), all.x=F)? 
(Then I get ca 14000 obs, but "NA" in all variables from "data3".
In advance, thank you.
--
View this message in context: http://r.789695.n4.nabble.com/merging-two-dataframes-tp3932869p3940157.html
Sent from the R help mailing list archive at Nabble.com.
    
    
More information about the R-help
mailing list