[R] exces return by mktcap decile for each year
    jim holtman 
    jholtman at gmail.com
       
    Wed Jul 11 18:20:49 CEST 2007
    
    
  
here is one way of doing it using 'ave':
> dat <- read.table(textConnection("        mc         yr    ret
+  32902.233 01/01/1995  0.426
+  15793.691 01/01/1995  0.024
+   2375.868 01/01/1995  0.660
+  54586.558 01/01/1996  0.497
+  10674.900 01/01/1996  0.405
+    859.656 01/01/1996 -0.033
+    770.963 01/01/1995 -1.248
+    423.480 01/01/1995  0.654
+   2135.504 01/01/1995  0.394
+    696.599 01/01/1995 -0.482
+   5115.476 01/01/1995  0.352
+    821.347 01/01/1995  0.869
+  43329.695 01/01/1995  0.495
+   7975.151 01/01/1995  0.112
+    396.450 01/01/1995  0.956
+    843.870 01/01/1995  0.172
+   2727.037 01/01/1995 -0.358
+    114.584 01/01/1995 -1.015
+   1347.327 01/01/1995 -0.083
+   4592.049 01/01/1995 -0.251
+    674.305 01/01/1995 -0.327
+  39424.887 01/01/1996  0.198
+   4447.383 01/01/1996 -0.045
+   1608.540 01/01/1996 -0.109
+    217.151 01/01/1996  0.539
+   1813.320 01/01/1996  0.754
+    145.170 01/01/1996  0.249
+   3176.298 01/01/1996 -0.202
+  14379.686 01/01/1996  0.013
+   3009.059 01/01/1996 -0.328
+   1781.406 01/01/1996 -0.158
+   2576.215 01/01/1996  0.514
+   1236.317 01/01/1996  0.346
+   3003.735 01/01/1996  0.151
+   1544.003 01/01/1996  0.482
+   7588.657 01/01/1996  0.306
+   1516.625 01/01/1996  0.183
+   1596.098 01/01/1996  0.674
+   2792.192 01/01/1996  0.528
+   1276.702 01/01/1996  0.010
+    875.716 01/01/1996  0.189
+   4858.450 01/01/1995  0.250
+   2033.623 01/01/1995 -0.582
+   2164.125 01/01/1995  0.631"), header=TRUE)
> # quantiles by year (need as grouping in next statement
> dat$qByYr <- ave(dat$mc, dat$yr, FUN=function(x){
+     cut(x, quantile(x, prob=seq(0, 1, .1)), include.lowest=TRUE)
+ })
> # compute the mean for year/quantile
> dat$dec.mean <- ave(dat$ret, dat$yr, dat$qByYr, FUN=mean)
> # mean adjusted return
> dat$mean.adjusted <- dat$ret - dat$dec.mean
> dat
          mc         yr    ret qByYr   dec.mean mean.adjusted
1  32902.233 01/01/1995  0.426    10  0.4605000  -0.034500000
2  15793.691 01/01/1995  0.024     9  0.0680000  -0.044000000
3   2375.868 01/01/1995  0.660     6  0.6455000   0.014500000
4  54586.558 01/01/1996  0.497    10  0.2360000   0.261000000
5  10674.900 01/01/1996  0.405     9  0.3555000   0.049500000
6    859.656 01/01/1996 -0.033     1  0.2516667  -0.284666667
7    770.963 01/01/1995 -1.248     3 -0.1895000  -1.058500000
8    423.480 01/01/1995  0.654     1  0.1983333   0.455666667
9   2135.504 01/01/1995  0.394     5 -0.0940000   0.488000000
10   696.599 01/01/1995 -0.482     2 -0.4045000  -0.077500000
11  5115.476 01/01/1995  0.352     8  0.3010000   0.051000000
12   821.347 01/01/1995  0.869     3 -0.1895000   1.058500000
13 43329.695 01/01/1995  0.495    10  0.4605000   0.034500000
14  7975.151 01/01/1995  0.112     9  0.0680000   0.044000000
15   396.450 01/01/1995  0.956     1  0.1983333   0.757666667
16   843.870 01/01/1995  0.172     4  0.0445000   0.127500000
17  2727.037 01/01/1995 -0.358     7 -0.3045000  -0.053500000
18   114.584 01/01/1995 -1.015     1  0.1983333  -1.213333333
19  1347.327 01/01/1995 -0.083     4  0.0445000  -0.127500000
20  4592.049 01/01/1995 -0.251     7 -0.3045000   0.053500000
21   674.305 01/01/1995 -0.327     2 -0.4045000   0.077500000
22 39424.887 01/01/1996  0.198    10  0.2360000  -0.038000000
23  4447.383 01/01/1996 -0.045     8 -0.1235000   0.078500000
24  1608.540 01/01/1996 -0.109     5  0.1623333  -0.271333333
25   217.151 01/01/1996  0.539     1  0.2516667   0.287333333
26  1813.320 01/01/1996  0.754     5  0.1623333   0.591666667
27   145.170 01/01/1996  0.249     1  0.2516667  -0.002666667
28  3176.298 01/01/1996 -0.202     8 -0.1235000  -0.078500000
29 14379.686 01/01/1996  0.013    10  0.2360000  -0.223000000
30  3009.059 01/01/1996 -0.328     7 -0.0885000  -0.239500000
31  1781.406 01/01/1996 -0.158     5  0.1623333  -0.320333333
32  2576.215 01/01/1996  0.514     6  0.5210000  -0.007000000
33  1236.317 01/01/1996  0.346     2  0.2675000   0.078500000
34  3003.735 01/01/1996  0.151     7 -0.0885000   0.239500000
35  1544.003 01/01/1996  0.482     4  0.5780000  -0.096000000
36  7588.657 01/01/1996  0.306     9  0.3555000  -0.049500000
37  1516.625 01/01/1996  0.183     3  0.0965000   0.086500000
38  1596.098 01/01/1996  0.674     4  0.5780000   0.096000000
39  2792.192 01/01/1996  0.528     6  0.5210000   0.007000000
40  1276.702 01/01/1996  0.010     3  0.0965000  -0.086500000
41   875.716 01/01/1996  0.189     2  0.2675000  -0.078500000
42  4858.450 01/01/1995  0.250     8  0.3010000  -0.051000000
43  2033.623 01/01/1995 -0.582     5 -0.0940000  -0.488000000
44  2164.125 01/01/1995  0.631     6  0.6455000  -0.014500000
>
>
>
>
On 7/11/07, Frank Hansen <hansenfrank at yahoo.com> wrote:
> Hi Jim,
>
> Thanks for getting back on this. I did not see your
> email on the help list. I or you can post this
> solution
>
> You are right I mis-stated about mc. mc is real, it is
> yr that is a factor.
>
> Here is a solution, which works, but it is clunky. I
> thought there might be a better/more R-like less
> for-loop way to do this.
>
> dat <- read.table("test.data", header=TRUE)
>
> if( "new.data" %in% ls()) {
>  rm( new.data)
> }
> yrs <- as.character(unique( dat$yr))
> for (y in yrs) {
>  bool <- as.character(dat$yr) == y
>  tmp.dat <-  dat[ bool,]
>  breaks <- quantile(tmp.dat$mc,
> probs=seq(0,1,0.1),na.rm=TRUE)
>  breaks[1] <- breaks[1]*.9
> # breaks >0, else 1st value not in (a,b] interval
>  cuts <- cut(tmp.dat$mc, breaks)
>  means.by.dec <- by( tmp.dat$ret, cuts, mean)
>  for ( i in seq(1, dim( tmp.dat)[1])) {
>    tmp.dat[i,"dec.mean"] <- means.by.dec[ cuts[i]]
>  }
>  if(! "new.data" %in% ls()) {
>    new.data <- tmp.dat
>  }  else {
>    new.data <- rbind( new.data, tmp.dat)
>  }
> }
>
> Here is some test input data in the file test.data
> ----- test.data -----
>         mc         yr    ret
>  32902.233 01/01/1995  0.426
>  15793.691 01/01/1995  0.024
>   2375.868 01/01/1995  0.660
>  54586.558 01/01/1996  0.497
>  10674.900 01/01/1996  0.405
>    859.656 01/01/1996 -0.033
>    770.963 01/01/1995 -1.248
>    423.480 01/01/1995  0.654
>   2135.504 01/01/1995  0.394
>    696.599 01/01/1995 -0.482
>   5115.476 01/01/1995  0.352
>    821.347 01/01/1995  0.869
>  43329.695 01/01/1995  0.495
>   7975.151 01/01/1995  0.112
>    396.450 01/01/1995  0.956
>    843.870 01/01/1995  0.172
>   2727.037 01/01/1995 -0.358
>    114.584 01/01/1995 -1.015
>   1347.327 01/01/1995 -0.083
>   4592.049 01/01/1995 -0.251
>    674.305 01/01/1995 -0.327
>  39424.887 01/01/1996  0.198
>   4447.383 01/01/1996 -0.045
>   1608.540 01/01/1996 -0.109
>    217.151 01/01/1996  0.539
>   1813.320 01/01/1996  0.754
>    145.170 01/01/1996  0.249
>   3176.298 01/01/1996 -0.202
>  14379.686 01/01/1996  0.013
>   3009.059 01/01/1996 -0.328
>   1781.406 01/01/1996 -0.158
>   2576.215 01/01/1996  0.514
>   1236.317 01/01/1996  0.346
>   3003.735 01/01/1996  0.151
>   1544.003 01/01/1996  0.482
>   7588.657 01/01/1996  0.306
>   1516.625 01/01/1996  0.183
>   1596.098 01/01/1996  0.674
>   2792.192 01/01/1996  0.528
>   1276.702 01/01/1996  0.010
>    875.716 01/01/1996  0.189
>   4858.450 01/01/1995  0.250
>   2033.623 01/01/1995 -0.582
>   2164.125 01/01/1995  0.631
>
> Here is the output which looks ok
>
> > new.data
>          mc         yr    ret   dec.mean
> 1  32902.233 01/01/1995  0.426  0.4605000
> 2   4858.450 01/01/1995  0.250  0.3010000
> 3   2033.623 01/01/1995 -0.582 -0.0940000
> 4   2164.125 01/01/1995  0.631  0.6455000
> 5  15793.691 01/01/1995  0.024  0.0680000
> 6   2375.868 01/01/1995  0.660  0.6455000
> 7    770.963 01/01/1995 -1.248 -0.1895000
> 8    423.480 01/01/1995  0.654  0.1983333
> 9   2135.504 01/01/1995  0.394 -0.0940000
> 10   696.599 01/01/1995 -0.482 -0.4045000
> 11  5115.476 01/01/1995  0.352  0.3010000
> 12   821.347 01/01/1995  0.869 -0.1895000
> 13 43329.695 01/01/1995  0.495  0.4605000
> 14  7975.151 01/01/1995  0.112  0.0680000
> 15   396.450 01/01/1995  0.956  0.1983333
> 16   843.870 01/01/1995  0.172  0.0445000
> 17  2727.037 01/01/1995 -0.358 -0.3045000
> 18   114.584 01/01/1995 -1.015  0.1983333
> 19  1347.327 01/01/1995 -0.083  0.0445000
> 20  4592.049 01/01/1995 -0.251 -0.3045000
> 21   674.305 01/01/1995 -0.327 -0.4045000
> 22 39424.887 01/01/1996  0.198  0.2360000
> 23  4447.383 01/01/1996 -0.045 -0.1235000
> 24  1608.540 01/01/1996 -0.109  0.1623333
> 25   217.151 01/01/1996  0.539  0.2516667
> 26  1813.320 01/01/1996  0.754  0.1623333
> 27   145.170 01/01/1996  0.249  0.2516667
> 28  3176.298 01/01/1996 -0.202 -0.1235000
> 29 14379.686 01/01/1996  0.013  0.2360000
> 30  3009.059 01/01/1996 -0.328 -0.0885000
> 31  1781.406 01/01/1996 -0.158  0.1623333
> 32  2576.215 01/01/1996  0.514  0.5210000
> 33  1236.317 01/01/1996  0.346  0.2675000
> 34  3003.735 01/01/1996  0.151 -0.0885000
> 35  1544.003 01/01/1996  0.482  0.5780000
> 36  7588.657 01/01/1996  0.306  0.3555000
> 37  1516.625 01/01/1996  0.183  0.0965000
> 38 54586.558 01/01/1996  0.497  0.2360000
> 39 10674.900 01/01/1996  0.405  0.3555000
> 40   859.656 01/01/1996 -0.033  0.2516667
> 41  1596.098 01/01/1996  0.674  0.5780000
> 42  2792.192 01/01/1996  0.528  0.5210000
> 43  1276.702 01/01/1996  0.010  0.0965000
> 44   875.716 01/01/1996  0.189  0.2675000
> >
>
> notice that records 1 and 13 fall into the same mc
> decile for the year 1995, and their ret mean is .4605
> and so forth for the other mc deciles in both years.
>
> I'd be interested to know if there is a cleaner way to
> do this. Thanks.
>
> Frank
>
>
>
>
> ____________________________________________________________________________________
> TV dinner still cooling?
> Check out "Tonight's Picks" on Yahoo! TV.
> http://tv.yahoo.com/
>
-- 
Jim Holtman
Cincinnati, OH
+1 513 646 9390
What is the problem you are trying to solve?
    
    
More information about the R-help
mailing list