數據分析(.pdf)
https://drive.google.com/file/d/1MkHQE4npyMjiCbvtzAlWw22mLJqU0xsi/view?usp=sharing
(.odt )
https://drive.google.com/file/d/1FRMMlbju6ffDR7j5LyUPCBSMfsRyxzkK/view?usp=sharing
2018年12月20日 星期四
2018年6月18日 星期一
R program
#dataAnalysis.r
#source("dataAnalysis.r")
my.var<-function(x){
tmp<-x-mean(x);
tmp1<-sum(tmp^2)/length(x);
return(tmp1)
}
my.sigma=function(x){return(sqrt(my.var(x)))}
my.std=function(x){
return((x-mean(x))/my.sigma(x))
}
print.data=function(x){
cat("x=",x,"\n")
cat("mean(x)=",mean(x),"\n")
cat("x-mean(x)=",x-mean(x),"\n")
cat("(x-mean(x))^2=",(x-mean(x))^2,"\n")
cat("var(x)=",my.var(x),"\n")
cat("sigma(x)=",my.sigma(x),"\n")
cat("std(x)=",my.std(x),"\n")
}
x=seq(3,10,2)
print.data(x)
h=c(172,160,162,164,170,168,166)
w=c(60,50,52,58,62,56,54)
cat("heigh=",h,"\n")
cat("weigth=",w,"\n")
par(mfrow=c(3,1))
plot(h,w,xlab="heigh",ylab="weigth",main="scatter x,y")
z.h=my.std(h)
z.w=my.std(w)
cat("std(h)=(h-mean(h))/sigma(h)=",z.h,"\n")
cat("std(w)=(w-mean(w))/sigma(w)=",z.w,"\n")
plot(z.h,z.w,xlab="standardize heigh",ylab="standardize weigth",main="scatter z.h,z.w")
cat("r=sum(z.h*z.w)/n=",sum(z.h*z.w)/length(h),"\n")
my.relation=function(x,y){
Sxy=sum((x-mean(x))*(y-mean(y)));
Sxx=sum((x-mean(x))^2);
Syy=sum((y-mean(y))^2);
r=Sxy/sqrt(Sxx*Syy);
cat("Sxy=",Sxy,"\n");
cat("Sxx=",Sxx,"\n");
cat("Syy=",Syy,"\n");
cat("r=Sxy/sqrt((Sxx*Syy))=",r,"\n");
}
my.relation(h,w)
#regression
#y-mu.y=Sxy/Sxx(x-mu.x)
y=w
x=h
mu.y=mean(y)
mu.x=mean(x)
Sxy=sum((x-mean(x))*(y-mean(y)));
Sxx=sum((x-mean(x))^2);
x.min=min(x)
x.max=max(x)
xx=seq(x.min,x.max,1)
yy=Sxy/Sxx*(xx-mu.x)+mu.y
lm( y~x )
abline(lm( y~x ))
#source("dataAnalysis.r")
my.var<-function(x){
tmp<-x-mean(x);
tmp1<-sum(tmp^2)/length(x);
return(tmp1)
}
my.sigma=function(x){return(sqrt(my.var(x)))}
my.std=function(x){
return((x-mean(x))/my.sigma(x))
}
print.data=function(x){
cat("x=",x,"\n")
cat("mean(x)=",mean(x),"\n")
cat("x-mean(x)=",x-mean(x),"\n")
cat("(x-mean(x))^2=",(x-mean(x))^2,"\n")
cat("var(x)=",my.var(x),"\n")
cat("sigma(x)=",my.sigma(x),"\n")
cat("std(x)=",my.std(x),"\n")
}
x=seq(3,10,2)
print.data(x)
h=c(172,160,162,164,170,168,166)
w=c(60,50,52,58,62,56,54)
cat("heigh=",h,"\n")
cat("weigth=",w,"\n")
par(mfrow=c(3,1))
plot(h,w,xlab="heigh",ylab="weigth",main="scatter x,y")
z.h=my.std(h)
z.w=my.std(w)
cat("std(h)=(h-mean(h))/sigma(h)=",z.h,"\n")
cat("std(w)=(w-mean(w))/sigma(w)=",z.w,"\n")
plot(z.h,z.w,xlab="standardize heigh",ylab="standardize weigth",main="scatter z.h,z.w")
cat("r=sum(z.h*z.w)/n=",sum(z.h*z.w)/length(h),"\n")
my.relation=function(x,y){
Sxy=sum((x-mean(x))*(y-mean(y)));
Sxx=sum((x-mean(x))^2);
Syy=sum((y-mean(y))^2);
r=Sxy/sqrt(Sxx*Syy);
cat("Sxy=",Sxy,"\n");
cat("Sxx=",Sxx,"\n");
cat("Syy=",Syy,"\n");
cat("r=Sxy/sqrt((Sxx*Syy))=",r,"\n");
}
my.relation(h,w)
#regression
#y-mu.y=Sxy/Sxx(x-mu.x)
y=w
x=h
mu.y=mean(y)
mu.x=mean(x)
Sxy=sum((x-mean(x))*(y-mean(y)));
Sxx=sum((x-mean(x))^2);
x.min=min(x)
x.max=max(x)
xx=seq(x.min,x.max,1)
yy=Sxy/Sxx*(xx-mu.x)+mu.y
lm( y~x )
abline(lm( y~x ))
2018年5月23日 星期三
數據分析
Excel 函數 | 中文意義 | 語法 |
---|---|---|
sum | 加總 | =sum(B2:B11) 從B2到B11做加總 |
count | 計數 | =count(range) |
average | 平均數 | =average(range) |
varp | 母體變異數 | |
standarize | 正規化 | =standarize(x,mean,stdevp) |
median | 中位數 | |
mode | 眾數 | |
sqrt | 開根號 | |
rank | 排名 | |
round | 四捨五入 | =round(range,小數點第幾位) |
trunc | 小數捨去 | |
IF | 如果 | =IF(condition, true, false) |
min | 最小值 | |
max | 最大值 | |
slope | 斜率 | |
power | 次方 | |
pi | 圓周率 | |
exp | 指數 | |
fact | 階乘 | |
permut | 排列 | |
combine | 組合 | |
log | 對數 | |
ln | 自然對數 | |
sumif | =sumif(condition,true,false) |
期望值
‹X›=E(X)=∑ xi p(xi)=∑xi ⁄n
‹ aX+b ›=a<X>+b=E(aX+b)=aE(X)+b
加權總分=∑ wi xi
加權平均=∑ wi xi ⁄ ∑ wi
變異數
var(X)=∑(xi-μ)2/n=∑xi2/n - μ2
var(aX+b)=a2 var(X)
var(X)=<(X-<X>)2>=<X2-2X<X>+<X>2>=<X2>-<X>2
var(aX+b)=<(aX+b-<aX+b>)2>=a2<(X-<X>)2>=a2<(X-<X>)2>=a2var(X)
標準差=√ 變異數
σ(X)=√var(X)
σ (aX+b)=|a|σ(X)
Lab
X={1,2,3,4,5}
X+2={2,4,6,8,10}
3X={3,6,9,12,15}
3X+2={5,8,11,14,17}
solve the list
<X>,<X+2>,<3X>,<3X+2>
var(X),var(X+2),var(3X),var(3X+2)
σ(X),σ(X+2),σ(3X),σ(3X+2)
標準化
X'=(X-μ)/σ
正規化
[min,max]--->[0,1]
(max-x)/(max-min)=(1-x)/1
x in [min,max]---> x=? in [0,1]
相關係數r
正相關r> 0,負相關r<0,零相關r=0
Sxy=∑i(xi-μx)(yi-μy)
Sxx=∑i(xi-μx)2
Syy=∑i(yi-μy)2
r=Sxy/√(Sxx Syy)
迴歸線
y-μy=Sxy/Sxx(x-μx)
斜率=slope=Sxy/Sxx
訂閱:
文章 (Atom)