source: https://github.com/derekfranks/practice_assignment
To begin, download this
file and unzip it into your R working directory.
http://s3.amazonaws.com/practice_assignment/diet_data.zip
> setwd("D:/temp/r_temp")
> list.files("diet_data")
[1] "Andy.csv" "David.csv" "John.csv"
[4] "Mike.csv" "Steve.csv"
> andy<-read.csv("diet_data/Andy.csv")
> head(andy)
Patient.Name Age Weight Day
1 Andy 30 140 1
2 Andy 30 140 2
3 Andy 30 140 3
4 Andy 30 139 4
5 Andy 30 138 5
6 Andy 30 138 6
> length(andy$Day)
[1] 30
> nrow(andy)
[1] 30
> ncol(andy)
[1] 4
> length(andy)
[1] 4
> dim(andy)
[1] 30 4
> str(andy)
'data.frame': 30 obs. of 4 variables:
$ Patient.Name: Factor w/ 1 level "Andy": 1 1 1 1 1 1 1 1 1 1 ...
$ Age : int 30 30 30 30 30 30 30 30 30 30 ...
$ Weight : int 140 140 140 139 138 138 138 138 138 138 ...
$ Day : int 1 2 3 4 5 6 7 8 9 10 ...
> summary(andy)
Patient.Name Age Weight Day
Andy:30 Min. :30 Min. :135.0 Min. : 1.00
1st Qu.:30 1st Qu.:137.0 1st Qu.: 8.25
Median :30 Median :137.5 Median :15.50
Mean :30 Mean :137.3 Mean :15.50
3rd Qu.:30 3rd Qu.:138.0 3rd Qu.:22.75
Max. :30 Max. :140.0 Max. :30.00
> names(andy)
[1] "Patient.Name" "Age" "Weight" "Day"
> andy[1,"Weight"]
[1] 140
> andy[1,"weight"]
NULL
> andy[30,"Weight"]
[1] 135
> andy[which(andy$day)==30,"Weight"]
Error in which(andy$day) : argument to 'which' is not logical
> andy[which(andy$Day)==30,"Weight"]
Error in which(andy$Day) : argument to 'which' is not logical
> andy[which(andy$Day==30),"Weight"]
[1] 135
> andy[which(andy[,Day]==30),"Weight"]
Error in `[.data.frame`(andy, , Day) : object 'Day' not found
> andy[which(andy[,"Day"]==30),"Weight"]
[1] 135
> subset(andy$Weight, andy$Day==30)
[1] 135
> andy_start<-subset[andy$Weight,andy$Day==1]
Error in subset[andy$Weight, andy$Day == 1] :
object of type 'closure' is not subsettable
> andy_start<-subset(andy$Weight,andy$Day==1)
> andy_start
[1] 140
> andy_start<-andy[1,"Weight"]
> andy_end<-andy[30,"Weight"]
> andy_loss<-andy_start - andy_end
> andy_loss
[1] 5
> files<-list.files("diet_data")
> files
[1] "Andy.csv" "David.csv" "John.csv" "Mike.csv" "Steve.csv"
> files[1]
[1] "Andy.csv"
> files[-1]
[1] "David.csv" "John.csv" "Mike.csv" "Steve.csv"
> files[2]
[1] "David.csv"
> files[3:5]
[1] "John.csv" "Mike.csv" "Steve.csv"
> files[c(1,3)]
[1] "Andy.csv" "John.csv"
> head(read.csv(files[3])) Error in file(file, "rt") : cannot open the connection In addition: Warning message: In file(file, "rt") : cannot open file 'John.csv': No such file or directory > files_full <- list.files("diet_data",full.names=TRUE) > files_full [1] "diet_data/Andy.csv" "diet_data/David.csv" "diet_data/John.csv" [4] "diet_data/Mike.csv" "diet_data/Steve.csv" > head(read.csv(files_full[3])) Patient.Name Age Weight Day 1 John 22 175 1 2 John 22 175 2 3 John 22 175 3 4 John 22 175 4 5 John 22 175 5 6 John 22 175 6 > andy_david<-rbind(andy,david) Error in rbind(andy, david) : object 'david' not found > david<-read.csv("diet_data/David.csv") > head(david) Patient.Name Age Weight Day 1 David 35 210 1 2 David 35 209 2 3 David 35 209 3 4 David 35 209 4 5 David 35 209 5 6 David 35 209 6 > andy_david<-rbind(andy,david) > head(andy_david) Patient.Name Age Weight Day 1 Andy 30 140 1 2 Andy 30 140 2 3 Andy 30 140 3 4 Andy 30 139 4 5 Andy 30 138 5 6 Andy 30 138 6 > tail(andy_david) Patient.Name Age Weight Day 55 David 35 203 25 56 David 35 203 26 57 David 35 202 27 58 David 35 202 28 59 David 35 202 29 60 David 35 201 30 > andy_david<-rbind(andy,read.csv(files_full[2])) > head(andy_david) Patient.Name Age Weight Day 1 Andy 30 140 1 2 Andy 30 140 2 3 Andy 30 140 3 4 Andy 30 139 4 5 Andy 30 138 5 6 Andy 30 138 6 > tail(andy_david) Patient.Name Age Weight Day 55 David 35 203 25 56 David 35 203 26 57 David 35 202 27 58 David 35 202 28 59 David 35 202 29 60 David 35 201 30
> day_25<-subset(andy_david[25,]) > day_25 Patient.Name Age Weight Day 25 Andy 30 135 25 > day_26<-subset(andy_david[which(andy_david$Day==26),]) > day_26 Patient.Name Age Weight Day 26 Andy 30 135 26 56 David 35 203 26 > day25<-andy_david[which(andy_david$Day == 25),] > day25 Patient.Name Age Weight Day 25 Andy 30 135 25 55 David 35 203 25
> for(i in 1:5){print [i]} Error in print[i] : object of type 'closure' is not subsettable > for(i in 1:5){print (i} Error: unexpected '}' in "for(i in 1:5){print (i}" > for(i in 1:5){print (i)} [1] 1 [1] 2 [1] 3 [1] 4 [1] 5 > for(i in 1:5){ + dat <- rbind(dat,read.csv(files_full[i])) + } Error in rbind(dat, read.csv(files_full[i])) : object 'dat' not found > dat<-data.frame() > for(i in 1:5){ + dat <- rbind(dat,read.csv(files_full[i])) + } > str(dat) 'data.frame': 150 obs. of 4 variables: $ Patient.Name: Factor w/ 5 levels "Andy","David",..: 1 1 1 1 1 1 1 1 1 1 ... $ Age : int 30 30 30 30 30 30 30 30 30 30 ... $ Weight : int 140 140 140 139 138 138 138 138 138 138 ... $ Day : int 1 2 3 4 5 6 7 8 9 10 ...
> for(i in 1:5 ){ + dat2<-data.frame() + dat2<-rbind(dat2,read.csv(files_full[i])) + } > str(dat2) 'data.frame': 30 obs. of 4 variables: $ Patient.Name: Factor w/ 1 level "Steve": 1 1 1 1 1 1 1 1 1 1 ... $ Age : int 55 55 55 55 55 55 55 55 55 55 ... $ Weight : int 225 225 225 224 224 224 223 223 223 223 ... $ Day : int 1 2 3 4 5 6 7 8 9 10 ... > list.files("diet_data") [1] "Andy.csv" "David.csv" "John.csv" "Mike.csv" "Steve.csv" > head(dat2) Patient.Name Age Weight Day 1 Steve 55 225 1 2 Steve 55 225 2 3 Steve 55 225 3 4 Steve 55 224 4 5 Steve 55 224 5 6 Steve 55 224 6
Because we put dat2<- data.frame() inside of the loop, dat2 is being rewritten with each pass of the loop. So we only end up with the data from the last file in our list.
> median(dat$Weight) [1] NA > nrow(dat) [1] 150 > ncol(dat) [1] 4 > dim(dat) [1] 150 4 > median(dat$Weight,na.rm=TRUE) [1] 190 > dat_30<-dat[which(dat$Day==30),] > dat_30 Patient.Name Age Weight Day 30 Andy 30 135 30 60 David 35 201 30 90 John 22 177 30 120 Mike 40 192 30 150 Steve 55 214 30 > median(dat_30$Weight) [1] 192
> weightmedian<-function(directory,day){ + files_list<-list.files(directory,full.names=TRUE) #Create a list of files + dat<-data.frame() #creates an empty data frame + for(i in 1:5) { #loops through the files, rbinding them together + dat<-rbind(dat,read.csv(files_list[i])) + } + dat_subset<-dat[which(dat[,"Day"]==day),] #subsets the rows that match the 'day' arguments + median(dat_subset[,"Weight"],na.rm=TRUE) #identifies the median weight + } > weightmedian(directory="diet_data",day=20) [1] 197.5 > weightmedian("diet_data",4) [1] 188 |
'Python, R 분석과 프로그래밍' 카테고리의 다른 글
dim, str, plot (0) | 2015.10.29 |
---|---|
도수분포표와 히스토그램 (0) | 2015.08.12 |
[데이터 처리 & 분석 실무] 데이터 타입 - 데이터 프레임, 판별, 변환 (0) | 2015.02.12 |
[데이터 처리 & 분석 실무] 데이터 타입 - 스칼라, 벡터, 리스트, 행렬, 배열 (0) | 2015.02.12 |
[텍스트 마이닝 연습] 2014 대통령 신년사 분석 (0) | 2015.01.14 |