[R] Can I index a dataframe with a reference from/to a second dataframe?
Thompson, David (MNR)
David.John.Thompson at ontario.ca
Fri Feb 8 21:17:24 CET 2008
Hello,
I am unable to figure out how to code a new column in a data frame based
on an existing column that matches a column in a reference data frame,
in a relational-db fashion. I would like this to maintain a minimum set
of reference tables that may be reused over several similar datasets.
Specifically, I have two data frames as listed below, 'Bos' and 'tree.'
For each case in 'Bos' I want to look up the matching 'spp' code in
'tree' and insert the associated 'type' code into a new 'type' column in
'Bos' as in:
# add type and keep factors from reference list
Bos$type <- tree[as.character(tree$spp)==as.character(Bos$spp),
'type']
Bos$keep <- tree[tree$spp==Bos$spp, 'keep']
And I know I have seen this before but, can't remember where. I have
filtered through many of the threads referencing
'as.numeric(levels(Bos$spp))[as.integer(Bos$spp)]' handling of factors,
any kind of sql reference I could think of, anything 'ODBC'-ish but, I
think this may be an indexing issue. I am trying to compare elements of
two different sized (list) objects (different type objects even?) and
not cycling through Bos$spp to find matches in tree$spp as expected, . .
. , has this an apply solution?
My data frames:
> dput(head(Bos, 30))
structure(list(oplt = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), rplt = c(3,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), tree = c(32, 101, 102, 103, 104, 105,
106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118,
119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129), spp =
structure(c(10L,
10L, 12L, 14L, 10L, 10L, 14L, 10L, 15L, 10L, 9L, 3L, 10L, 10L,
12L, 10L, 13L, 12L, 12L, 10L, 12L, 10L, 10L, 8L, 5L, 2L, 10L,
2L, 12L, 10L), .Label = c("AW", "BD", "BE", "BF", "BW", "BY",
"CB", "HE", "IW", "MH", "MR", "OR", "PO", "SW", "SA"), class =
"factor"),
dbh = c(12.1, 10.1, 63.3, 9, 7.1, 12.1, 13.9, 6.3, 6.1, 7.9,
5.1, 9.8, 7.1, 18.7, 44.2, 28.7, 19.8, 28, 46.6, 9, 61.6,
3.3, 9.1, 8.7, 5.8, 3.1, 11.1, 12.3, 28, 8.6), cc = structure(c(2L,
2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L,
2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L), .Label =
c("dom",
"sup"), class = "factor"), ba = c(114.990145103020,
80.1184666481737,
3147.0040469356, 63.6172512351933, 39.5919214168654,
114.990145103020,
151.746779150021, 31.1724531052447, 29.2246656600190,
49.0166993776348,
20.4282062299676, 75.429639612691, 39.5919214168654,
274.645883758454,
1534.38526793979, 646.924613208844, 307.907495978336,
615.7521601036,
1705.53923570736, 63.6172512351933, 2980.24045490142,
8.55298599939821,
65.0388219109427, 59.4467869875528, 26.4207942166902,
7.54767635024948,
96.7689077121996, 118.8228881404, 615.7521601036, 58.0880481648753
)), .Names = c("oplt", "rplt", "tree", "spp", "dbh", "cc",
"ba"), row.names = c("1", "2", "3", "4", "5", "6", "7", "8",
"9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19",
"20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30"
), class = "data.frame")
> dput(tree)
structure(list(spp = structure(1:33, .Label = c("AB", "AS", "AW",
"BD", "BE", "BF", "BW", "BY", "CA", "CB", "CC", "CE", "DL", "DP",
"EA", "HE", "IW", "LC", "MH", "MM", "MR", "OR", "PO", "PR", "PV",
"PW", "RS", "SA", "SB", "SM", "SW", "VC", "VL"), class = "factor"),
spp.orig = structure(c(1L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 12L,
10L, 13L, 11L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L,
23L, 24L, 25L, 26L, 27L, 28L, 31L, 29L, 30L, 32L, 33L, 34L
), .Label = c("AB", "AMEHUM", "AMESPP", "AW", "BD", "BE",
"BF", "BW", "BY", "CB", "CE", "CORALT", "CORCOR", "DIELON",
"DIRPAL", "EA", "HE", "IW", "LONCAN", "MH", "MM", "MR", "OR",
"PO", "PR", "PRUVIR", "PW", "RIBSPP", "SB", "SM", "SORAME",
"SW", "VIBACE", "VIBALN"), class = "factor"), OPL = structure(c(15L,
7L, 14L, 29L, 13L, 2L, 9L, 8L, 10L, 23L, 1L, 28L, 11L, 12L,
31L, 30L, 17L, 16L, 5L, 6L, 4L, 25L, 22L, 20L, 24L, 21L,
26L, 27L, 19L, 3L, 18L, 32L, 33L), .Label = c("HCORCAN",
"WABIBAL", "WACEPEN", "WACERUB", "WACESAS", "WACESPI", "WAMESPP",
"WBETALL", "WBETPAP", "WCORALT", "WDIELON", "WDIRPAL", "WFAGGRA",
"WFRAAME", "WFRANIG", "WLONCAN", "WOSTVIR", "WPICGLA", "WPICMAR",
"WPINRES", "WPINSTR", "WPOPTRE", "WPRUSER", "WPRUVIV", "WQUERUB",
"WRIBAME", "WSORAME", "WTHUOCC", "WTILAME", "WTSUCAN", "WULMAME",
"WVIBACE", "WVIBLAO"), class = "factor"), form = c(1.1, 1.2,
1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.2, 1.1, 2, 1.1, 1.2, 1.2,
1.1, 1.1, 1.1, 1.2, 1.1, 1.2, 1.1, 1.1, 1.1, 1.1, 1.2, 1.1,
1.2, 1.1, 1.1, 1.2, 1.1, 1.2, 1.2), Type = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L), .Label = c("H", "W"), class = "factor"), keep =
structure(c(1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
1L, 1L), .Label = c("no", "yes"), class = "factor"), Sname =
structure(c(15L,
6L, 14L, 29L, 13L, 1L, 8L, 7L, 9L, 23L, 10L, 28L, 11L, 12L,
31L, 30L, 17L, 16L, 4L, 5L, 3L, 25L, 22L, 20L, 24L, 21L,
26L, 27L, 19L, 2L, 18L, 32L, 33L), .Label = c("Abies balsamea",
"Acer pensylvanicum", "Acer rubrum", "Acer saccharum", "Acer
spicatum",
"Amelanchier", "Betula alleghaniensis", "Betula papyrifera",
"Cornus alternifolia", "Cornus canadensis", "Diervilla lonicera",
"Dirca palustris", "Fagus grandifolia", "Fraxinus americana",
"Fraxinus nigra", "Lonicera canadensis", "Ostrya virginiana",
"Picea glauca", "Picea mariana", "Pinus resinosa", "Pinus strobus",
"Populus tremuloides", "Prunus serotina", "Prunus virginiana",
"Quercus rubra", "Ribes ", "Sorbus americana", "Thuja occidentalis",
"Tilia americana", "Tsuga canadensis", "Ulmus americana",
"Viburnum acerifolium", "Viburnum lantanoides"), class = "factor"),
Cname = structure(c(7L, 27L, 30L, 2L, 3L, 6L, 31L, 33L, 1L,
8L, 10L, 15L, 11L, 21L, 4L, 14L, 20L, 17L, 18L, 23L, 25L,
24L, 29L, 26L, 12L, 16L, 13L, 5L, 9L, 28L, 32L, 22L, 19L), .Label =
c("Alternate-leaved Dogwood",
"American Basswood", "American Beech", "American Elm", "American
Mountain-ash",
"Balsam Fir", "Black Ash", "Black Cherry", "Black Spruce",
"Bunchberry", "Bush Honeysuckle", "Choke Cherry", "Currant",
"Eastern Hemlock", "Eastern White Cedar", "Eastern White Pine",
"Fly Honeysuckle", "Hard Maple", "Hobblebush", "Ironwood",
"Leatherwood", "Maple-leaved Viburnum", "Mountain Maple",
"Northern Red Oak", "Red Maple", "Red Pine", "Serviceberry",
"Striped Maple", "Trembling Aspen", "White Ash", "White Birch",
"White Spruce", "Yellow Birch"), class = "factor")), .Names =
c("spp",
"spp.orig", "OPL", "form", "Type", "keep", "Sname", "Cname"), row.names
= c("1",
"3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14",
"15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25",
"26", "27", "28", "29", "30", "31", "32", "33", "34"), class =
"data.frame")
Thanks, DaveT.
*************************************
Silviculture Data Analyst
Ontario Forest Research Institute
Ontario Ministry of Natural Resources
david.john.thompson at ontario.ca
http://ofri.mnr.gov.on.ca
More information about the R-help
mailing list