#dataAnalysis.r
#source("dataAnalysis.r")
my.var<-function(x){
tmp<-x-mean(x);
tmp1<-sum(tmp^2)/length(x);
return(tmp1)
}
my.sigma=function(x){return(sqrt(my.var(x)))}
my.std=function(x){
return((x-mean(x))/my.sigma(x))
}
print.data=function(x){
cat("x=",x,"\n")
cat("mean(x)=",mean(x),"\n")
cat("x-mean(x)=",x-mean(x),"\n")
cat("(x-mean(x))^2=",(x-mean(x))^2,"\n")
cat("var(x)=",my.var(x),"\n")
cat("sigma(x)=",my.sigma(x),"\n")
cat("std(x)=",my.std(x),"\n")
}
x=seq(3,10,2)
print.data(x)
h=c(172,160,162,164,170,168,166)
w=c(60,50,52,58,62,56,54)
cat("heigh=",h,"\n")
cat("weigth=",w,"\n")
par(mfrow=c(3,1))
plot(h,w,xlab="heigh",ylab="weigth",main="scatter x,y")
z.h=my.std(h)
z.w=my.std(w)
cat("std(h)=(h-mean(h))/sigma(h)=",z.h,"\n")
cat("std(w)=(w-mean(w))/sigma(w)=",z.w,"\n")
plot(z.h,z.w,xlab="standardize heigh",ylab="standardize weigth",main="scatter z.h,z.w")
cat("r=sum(z.h*z.w)/n=",sum(z.h*z.w)/length(h),"\n")
my.relation=function(x,y){
Sxy=sum((x-mean(x))*(y-mean(y)));
Sxx=sum((x-mean(x))^2);
Syy=sum((y-mean(y))^2);
r=Sxy/sqrt(Sxx*Syy);
cat("Sxy=",Sxy,"\n");
cat("Sxx=",Sxx,"\n");
cat("Syy=",Syy,"\n");
cat("r=Sxy/sqrt((Sxx*Syy))=",r,"\n");
}
my.relation(h,w)
#regression
#y-mu.y=Sxy/Sxx(x-mu.x)
y=w
x=h
mu.y=mean(y)
mu.x=mean(x)
Sxy=sum((x-mean(x))*(y-mean(y)));
Sxx=sum((x-mean(x))^2);
x.min=min(x)
x.max=max(x)
xx=seq(x.min,x.max,1)
yy=Sxy/Sxx*(xx-mu.x)+mu.y
lm( y~x )
abline(lm( y~x ))