[R] Distance calculation

arun smartpink111 at yahoo.com
Tue Mar 26 06:53:34 CET 2013


Hi Elisa,

Based on the formula you gave, this is what I got:

dat1<- read.csv("rate.csv",sep=",")

res<- do.call(cbind,lapply(seq_len(nrow(dat1)),function(i) do.call(rbind,lapply(split(rbind(dat1[i,],dat1[-i,]),1:nrow(rbind(dat1[i,],dat1[-i,]))), function(x) {x1<-rbind(dat1[i,],x);colnames(x1)<-gsub("[.]","",colnames(x1));
 if({indx<- colSums(x1[,2:5]==0);indx[1]==0 & indx[2]==0 & indx[3]==1 & indx[4]==2}) #3 peaks 2 peak comparison
 {x2<- x1[order(x1$Peak3t,x1$Npeak3t),];
 with(x2,{abs((Peak1v[1]-Peak1v[2])*(Peak1t[1]-Peak1t[2]))+ abs((Peak2v[1]-Peak2v[2])*(Peak2t[1]-Peak2t[2]))+abs((Peak1v[1]-Peak3v[2])*((Peak1t[1]+12)-Peak3t[2]))+
abs((Npeak1v[1]-Npeak1v[2])*(Npeak1t[1]-Npeak1t[2]))+ abs((Npeak2v[1]-Npeak2v[2])*(Npeak2t[1]-Npeak2t[2]))+abs((Npeak1v[1]-Npeak3v[2])*((Npeak1t[1]+12)-Npeak3t[2]))})
}
else if({indx[1]==0 & indx[2]==0 & indx[3]==1 & indx[4]==1}) #4 peaks 2 peak comparison. Peak3 Peak4 value compared with Peak1
{x4<- x1[order(x1$Peak3t,x1$Peak4t,x1$Npeak3t,x1$Npeak4t),];
with(x4,{abs((Peak1v[1]-Peak1v[2])*(Peak1t[1]-Peak1t[2]))+ abs((Peak2v[1]-Peak2v[2])*(Peak2t[1]-Peak2t[2]))+ abs((Peak1v[1]-Peak3v[2])*((Peak1t[1]+12)-Peak3t[2]))+abs((Peak1v[1]-Peak4v[2])*((Peak1t[1]+12)-Peak4t[2]))+ abs((Npeak1v[1]-Npeak1v[2])*(Npeak1t[1]-Npeak1t[2]))+ abs((Npeak2v[1]-Npeak2v[2])*(Npeak2t[1]-Npeak2t[2]))+ abs((Npeak1v[1]-Npeak3v[2])*((Npeak1t[1]+12)-Npeak3t[2]))+abs((Npeak1v[1]-Npeak4v[2])*((Npeak1t[1]+12)-Npeak4t[2])) })
}
else if({indx[1]==0 & indx[2]==1 & indx[3]==2 & indx[4]==2}) #2 peaks 1 peak comparison
{x5<- x1[order(x1$Peak2t,x1$Npeak2t),];
with(x5,{abs((Peak1v[1]-Peak1v[2])*(Peak1t[1]-Peak1t[2]))+abs((Peak1v[1]-Peak2v[2])*((Peak1t[1]+12)-Peak2t[2]))+ abs((Npeak1v[1]-Npeak1v[2])*(Npeak1t[1]-Npeak1t[2]))+abs((Npeak1v[1]-Npeak2v[2])*((Npeak1t[1]+12)-Npeak2t[2])) })

}

else if({indx[1]==0 & indx[2]==1 & indx[3]==1 & indx[4]==2 }) #3 peak 1 peak comparison 
{x6<- x1[order(x1$Peak2t,x1$Peak3t,x1$Npeak2t,x1$Npeak3t),];
with(x6,{abs((Peak1v[1]-Peak1v[2])*(Peak1t[1]-Peak1t[2]))+ abs((Peak1v[1]-Peak2v[2])*((Peak1t[1]+12)-Peak2t[2]))+ abs((Peak1v[1]-Peak3v[2])*((Peak1t[1]+12)-Peak3t[2])) + abs((Npeak1v[1]-Npeak1v[2])*(Npeak1t[1]-Npeak1t[2]))+ abs((Npeak1v[1]-Npeak2v[2])*((Npeak1t[1]+12)-Npeak2t[2]))+ abs((Npeak1v[1]-Npeak3v[2])*((Npeak1t[1]+12)-Npeak3t[2])) })
}
else if({indx[1]==0 & indx[2]==0 & indx[3]==0 & indx[4]==1}) # 4 peak 3 peak comparison
{x7<- x1[order(x1$Peak4t,x1$Npeak4t),];
with(x7,{abs((Peak1v[1]-Peak1v[2])*(Peak1t[1]-Peak1t[2]))+abs((Peak2v[1]-Peak2v[2])*(Peak2t[1]-Peak2t[2]))+abs((Peak3v[1]-Peak3v[2])*(Peak3t[1]-Peak3t[2]))+ abs((Peak1v[1]-Peak4v[2])*((Peak1t[1]+12)-Peak4t[2]))+ abs((Npeak1v[1]-Npeak1v[2])*(Npeak1t[1]-Npeak1t[2]))+abs((Npeak2v[1]-Npeak2v[2])*(Npeak2t[1]-Npeak2t[2]))+abs((Npeak3v[1]-Npeak3v[2])*(Npeak3t[1]-Npeak3t[2]))+ abs((Npeak1v[1]-Npeak4v[2])*((Npeak1t[1]+12)-Npeak4t[2])) })

}
else if({indx[1]==0 & indx[2]==1 & indx[3]==1 & indx[4]==1}) #4 peak 1 peak comparison
{x8<- x1[order(x1$Peak2t,x1$Peak3t,x1$Peak4t,x1$Npeak2t,x1$Npeak3t,x1$Npeak4t),];
with(x8,{abs((Peak1v[1]-Peak1v[2])*(Peak1t[1]-Peak1t[2]))+ abs((Peak1v[1]-Peak2v[2])*((Peak1t[1]+12)-Peak2t[2]))+ abs((Peak1v[1]-Peak3v[2])*((Peak1t[1]+12)-Peak3t[2]))+abs((Peak1v[1]-Peak4v[2])*((Peak1t[1]+12)-Peak4t[2]))+ abs((Npeak1v[1]-Npeak1v[2])*(Npeak1t[1]-Npeak1t[2]))+ abs((Npeak1v[1]-Npeak2v[2])*((Npeak1t[1]+12)-Npeak2t[2]))+ abs((Npeak1v[1]-Npeak3v[2])*((Npeak1t[1]+12)-Npeak3t[2]))+abs((Npeak1v[1]-Npeak4v[2])*((Npeak1t[1]+12)-Npeak4t[2]))})

}
else ({ #cases where peaks are similar
with(x1,{abs((Peak1v[1]-Peak1v[2])*(Peak1t[1]-Peak1t[2]))+abs((Peak2v[1]-Peak2v[2])*(Peak2t[1]-Peak2t[2]))+abs((Peak3v[1]-Peak3v[2])*(Peak3t[1]-Peak3t[2]))+abs((Peak4v[1]-Peak4v[2])*(Peak4t[1]-Peak4t[2]))+ abs((Npeak1v[1]-Npeak1v[2])*(Npeak1t[1]-Npeak1t[2]))+abs((Npeak2v[1]-Npeak2v[2])*(Npeak2t[1]-Npeak2t[2]))+abs((Npeak3v[1]-Npeak3v[2])*(Npeak3t[1]-Npeak3t[2]))+abs((Npeak4v[1]-Npeak4v[2])*(Npeak4t[1]-Npeak4t[2])) })
})

}))))
res2<-do.call(cbind,lapply(seq_len(ncol(res)),function(i) c(c(tail(res[seq(1,i,1),i],-1),0),res[-c(1:i),i])))
row.names(res2)<-1:nrow(res2)
res2[1:5,1:5]
#      [,1]     [,2]     [,3]     [,4]    [,5]
#1   0.0000   0.0000   0.0000 857.6834   0.000
#2   0.0000   0.0000   0.0000 611.1167   0.000
#3   0.0000   0.0000   0.0000 854.3765   0.000
#4 857.6834 611.1167 854.3765   0.0000 579.756
#5   0.0000   0.0000   0.0000 579.7560   0.000
dim(res2)
#[1] 124 124



I also validated each of the cases by calculating the values:
For example:  stations 1 and 4, your previous email indicated

""
but the distance between 1 and 4 is not 379.1364, actually its 1495.01

""

Based on my calculation:

dat1[c(1,4),]
#  St. Peak1.t. Peak2.t. Peak3.t. Peak4.t. Npeak1.t. Npeak2.t. Npeak3.t.
#1   1        5       10        0        0         7        13         0
#4   4        4        8       10        0         6         9        14
 # Npeak4.t. Peak1.v. Peak2.v. Peak3.v. Peak4.v. Npeak1.v. Npeak2.v. Npeak3.v.
#1         0 56.28785 17.43170  0.00000        0  49.66025 81.759761    0.0000
#4         0 27.91471 12.06137 11.52018        0  10.29134  5.062441   17.8527
 # Npeak4.v.
#1         0
#4         0
 abs((56.28785-27.91471)*(5-4))+abs((17.43170-12.06137)*(10-8))+abs((56.28785-11.52018)*((5+12)-10))+abs((49.66025-10.29134)*(7-6))+abs((81.759761-5.062441)*(13-9))+abs((49.66025-17.8527)*((7+12)-14))
#[1] 857.6834 #which is the same value I got in the distance matrix.

If the formula is different, please let me know.
A.K.







________________________________
From: eliza botto <eliza_botto at hotmail.com>
To: "smartpink111 at yahoo.com" <smartpink111 at yahoo.com> 
Sent: Monday, March 25, 2013 3:50 AM
Subject: RE: Distance calculation



Dear Arun,
I have a slight problem with this coding for the calculation of distance matrix. The text files contains that problem where as data is in excel file.
kindly help me out.. :(

Elisa 


> Date: Fri, 22 Mar 2013 06:46:18 -0700
> From: smartpink111 at yahoo.com
> Subject: Re: Distance calculation
> To: eliza_botto at hotmail.com
> CC: r-help at r-project.org
> 
> Hi Elisa,
> 
> I hope this is what you wanted.
> 
> 
> dat1<-read.csv("peaks.csv",sep=",")
> #Subset
> dat2<-dat1[1:5,]
> res1<-do.call(cbind,lapply(seq_len(nrow(dat2)),function(i) do.call(rbind,lapply(split(rbind(dat2[i,],dat2[-i,]),1:nrow(rbind(dat2[i,],dat2[-i,]))), function(x) {x1<-rbind(dat2[i,],x); abs((x1$Peak1.v.[1]-x1$Peak1.v.[2])*(x1$Peak1.t.[1]-x1$Peak1.t.[2]))+abs((x1$Peak2.v.[1]-x1$Peak2.v.[2])*(x1$Peak2.t.[1]-x1$Peak2.t.[2]))+abs((x1$Npeak1.v.[1]-x1$Npeak1.v.[2])*(x1$Npeak1.t.[1]-x1$Npeak1.t.[2]))+abs((x1$Npeak2.v.[1]-x1$Npeak2.v.[2])*(x1$Npeak2.t.[1]-x1$Npeak2.t.[2]))}))))
> res2<-do.call(cbind,lapply(seq_len(ncol(res1)),function(i) c(c(tail(res1[seq(1,i,1),i],-1),0),res1[-c(1:i),i])))
> row.names(res2)<-1:nrow(res2)
>  res2
> #      [,1]     [,2]     [,3]     [,4]     [,5]
> #1   0.0000   0.0000   0.0000 379.1364   0.0000
> #2   0.0000   0.0000   0.0000 312.8267   0.0000
> #3   0.0000   0.0000   0.0000 379.6576   0.0000
> #4 379.1364 312.8267 379.6576   0.0000 324.4063
> #5   0.0000   0.0000   0.0000 324.4063   0.0000
> 
> resWhole<-do.call(cbind,lapply(seq_len(nrow(dat1)),function(i) do.call(rbind,lapply(split(rbind(dat1[i,],dat1[-i,]),1:nrow(rbind(dat1[i,],dat1[-i,]))), function(x) {x1<-rbind(dat1[i,],x); abs((x1$Peak1.v.[1]-x1$Peak1.v.[2])*(x1$Peak1.t.[1]-x1$Peak1.t.[2]))+abs((x1$Peak2.v.[1]-x1$Peak2.v.[2])*(x1$Peak2.t.[1]-x1$Peak2.t.[2]))+abs((x1$Npeak1.v.[1]-x1$Npeak1.v.[2])*(x1$Npeak1.t.[1]-x1$Npeak1.t.[2]))+abs((x1$Npeak2.v.[1]-x1$Npeak2.v.[2])*(x1$Npeak2.t.[1]-x1$Npeak2.t.[2]))}))))
> res2Whole<-do.call(cbind,lapply(seq_len(ncol(resWhole)),function(i) c(c(tail(resWhole[seq(1,i,1),i],-1),0),resWhole[-c(1:i),i])))
> row.names(res2Whole)<-1:nrow(res2Whole)
> dim(res2Whole)
> #[1] 124 124
> res2Whole[1:5,1:5]
> #      [,1]     [,2]     [,3]     [,4]     [,5]
> #1   0.0000   0.0000   0.0000 379.1364   0.0000
> #2   0.0000   0.0000   0.0000 312.8267   0.0000
> #3   0.0000   0.0000   0.0000 379.6576   0.0000
> #4 379.1364 312.8267 379.6576   0.0000 324.4063
> #5   0.0000   0.0000   0.0000 324.4063   0.0000
> 
> A.K.
> 
> 
> 
> ________________________________
> From: eliza botto <eliza_botto at hotmail.com>
> To: "smartpink111 at yahoo.com" <smartpink111 at yahoo.com> 
> Sent: Friday, March 22, 2013 8:26 AM
> Subject: 
> 
> 
> 
> Dear Arun,
> I hope you are fine. 
>  the attached text file has my recent question and excel file contains the data. 
> 
> 
> thanks in advance
> 
> Elisa



More information about the R-help mailing list