2018年6月18日 星期一

R program

#dataAnalysis.r
#source("dataAnalysis.r")
my.var<-function(x){
tmp<-x-mean(x);
tmp1<-sum(tmp^2)/length(x);
return(tmp1)
}

my.sigma=function(x){return(sqrt(my.var(x)))}

my.std=function(x){
return((x-mean(x))/my.sigma(x))
}

print.data=function(x){
cat("x=",x,"\n")
cat("mean(x)=",mean(x),"\n")
cat("x-mean(x)=",x-mean(x),"\n")
cat("(x-mean(x))^2=",(x-mean(x))^2,"\n")
cat("var(x)=",my.var(x),"\n")
cat("sigma(x)=",my.sigma(x),"\n")
cat("std(x)=",my.std(x),"\n")
}

x=seq(3,10,2)
print.data(x)

h=c(172,160,162,164,170,168,166)
w=c(60,50,52,58,62,56,54)

cat("heigh=",h,"\n")
cat("weigth=",w,"\n")
par(mfrow=c(3,1))

plot(h,w,xlab="heigh",ylab="weigth",main="scatter x,y")

z.h=my.std(h)
z.w=my.std(w)

cat("std(h)=(h-mean(h))/sigma(h)=",z.h,"\n")
cat("std(w)=(w-mean(w))/sigma(w)=",z.w,"\n")

plot(z.h,z.w,xlab="standardize heigh",ylab="standardize weigth",main="scatter z.h,z.w")

cat("r=sum(z.h*z.w)/n=",sum(z.h*z.w)/length(h),"\n")

my.relation=function(x,y){
Sxy=sum((x-mean(x))*(y-mean(y)));
Sxx=sum((x-mean(x))^2);
Syy=sum((y-mean(y))^2);
r=Sxy/sqrt(Sxx*Syy);
cat("Sxy=",Sxy,"\n");
cat("Sxx=",Sxx,"\n");
cat("Syy=",Syy,"\n");
cat("r=Sxy/sqrt((Sxx*Syy))=",r,"\n");
}

my.relation(h,w)
#regression
#y-mu.y=Sxy/Sxx(x-mu.x)
y=w
x=h
mu.y=mean(y)
mu.x=mean(x)
Sxy=sum((x-mean(x))*(y-mean(y)));
Sxx=sum((x-mean(x))^2);
x.min=min(x)
x.max=max(x)
xx=seq(x.min,x.max,1)
yy=Sxy/Sxx*(xx-mu.x)+mu.y

 lm( y~x )
abline(lm( y~x ))

2018年6月12日 星期二

R for mean, median, mode, var, sd

> x=seq(3,10,2)
> x

[1] 3 5 7 9

> mean(x)

[1] 6

> mean(x+2)

[1] 8

> mean(3*x)

[1] 18

> mean(3*x+2)

[1] 20

> x-mean(x)

[1] -3 -1 1 3

> (x-mean(x))^2

[1] 9 1 1 9

> sum((x-mean(x))^2)/length(x)

[1] 5

> ((x+2)-mean(x+2))^2

[1] 9 1 1 9

> sum(((x+2)-mean(x+2))^2)/length(x)

[1] 5

> (3*x-mean(3*x))^2

[1] 81 9 9 81

> sum(((3*x)-mean(3*x))^2)/length(x)

[1] 45

> sum(((3*x+2)-mean(3*x+2))^2)/length(x)

[1] 45

> x=c(1,2,3,3,3,4,5,6,8)
> x

[1] 1 2 3 3 3 4 5 6 8

> median(x)

[1] 3

getmode <- function(v) {
uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
> getmode(x)

[1] 3

> x=c(1,4,2,rep(3,3),4,5,6,8)
> x

[1] 1 4 2 3 3 3 4 5 6 8

> sort(x)

[1] 1 2 3 3 3 4 4 5 6 8

> length(x)

[1] 10

> x_sort=sort(x)
> x_sort

[1] 1 2 3 3 3 4 4 5 6 8

> sum(x_sort[5]:x_sort[6])/2

[1] 3.5

> median(x)

[1] 3.5

> getmode(x)

[1] 3