데이터마이너를 꿈꾸며 :: R Practice

R Practice - diet_data

Python, R 분석과 프로그래밍 2015. 1. 19. 12:38

source: https://github.com/derekfranks/practice_assignment

To begin, download this file and unzip it into your R working directory.
http://s3.amazonaws.com/practice_assignment/diet_data.zip

> setwd("D:/temp/r_temp")

> list.files("diet_data")

[1] "Andy.csv" "David.csv" "John.csv"

[4] "Mike.csv" "Steve.csv"

> andy<-read.csv("diet_data/Andy.csv")

> head(andy)

Patient.Name Age Weight Day

1 Andy 30 140 1

2 Andy 30 140 2

3 Andy 30 140 3

4 Andy 30 139 4

5 Andy 30 138 5

6 Andy 30 138 6

> length(andy$Day)

[1] 30

> nrow(andy)

[1] 30

> ncol(andy)

[1] 4

> length(andy)

[1] 4

> dim(andy)

[1] 30 4

> str(andy)

'data.frame': 30 obs. of 4 variables:

$ Patient.Name: Factor w/ 1 level "Andy": 1 1 1 1 1 1 1 1 1 1 ...

$ Age : int 30 30 30 30 30 30 30 30 30 30 ...

$ Weight : int 140 140 140 139 138 138 138 138 138 138 ...

$ Day : int 1 2 3 4 5 6 7 8 9 10 ...

> summary(andy)

Patient.Name Age Weight Day

Andy:30 Min. :30 Min. :135.0 Min. : 1.00

1st Qu.:30 1st Qu.:137.0 1st Qu.: 8.25

Median :30 Median :137.5 Median :15.50

Mean :30 Mean :137.3 Mean :15.50

3rd Qu.:30 3rd Qu.:138.0 3rd Qu.:22.75

Max. :30 Max. :140.0 Max. :30.00

> names(andy)

[1] "Patient.Name" "Age" "Weight" "Day"

> andy[1,"Weight"]

[1] 140

> andy[1,"weight"]

NULL

> andy[30,"Weight"]

[1] 135

> andy[which(andy$day)==30,"Weight"]

Error in which(andy$day) : argument to 'which' is not logical

> andy[which(andy$Day)==30,"Weight"]

Error in which(andy$Day) : argument to 'which' is not logical

> andy[which(andy$Day==30),"Weight"]

[1] 135

> andy[which(andy[,Day]==30),"Weight"]

Error in `[.data.frame`(andy, , Day) : object 'Day' not found

> andy[which(andy[,"Day"]==30),"Weight"]

[1] 135

> subset(andy$Weight, andy$Day==30)

[1] 135

> andy_start<-subset[andy$Weight,andy$Day==1]

Error in subset[andy$Weight, andy$Day == 1] :

object of type 'closure' is not subsettable

> andy_start<-subset(andy$Weight,andy$Day==1)

> andy_start

[1] 140

> andy_start<-andy[1,"Weight"]

> andy_end<-andy[30,"Weight"]

> andy_loss<-andy_start - andy_end

> andy_loss

[1] 5

> files<-list.files("diet_data")

> files

[1] "Andy.csv" "David.csv" "John.csv" "Mike.csv" "Steve.csv"

> files[1]

[1] "Andy.csv"

> files[-1]

[1] "David.csv" "John.csv" "Mike.csv" "Steve.csv"

> files[2]

[1] "David.csv"

> files[3:5]

[1] "John.csv" "Mike.csv" "Steve.csv"

> files[c(1,3)]

[1] "Andy.csv" "John.csv"

> head(read.csv(files[3]))

Error in file(file, "rt") : cannot open the connection

In addition: Warning message:

In file(file, "rt") :

cannot open file 'John.csv': No such file or directory

> files_full <- list.files("diet_data",full.names=TRUE)

> files_full

[1] "diet_data/Andy.csv" "diet_data/David.csv" "diet_data/John.csv"

[4] "diet_data/Mike.csv" "diet_data/Steve.csv"

> head(read.csv(files_full[3]))

Patient.Name Age Weight Day

1 John 22 175 1

2 John 22 175 2

3 John 22 175 3

4 John 22 175 4

5 John 22 175 5

6 John 22 175 6

> andy_david<-rbind(andy,david)

Error in rbind(andy, david) : object 'david' not found

> david<-read.csv("diet_data/David.csv")

> head(david)

Patient.Name Age Weight Day

1 David 35 210 1

2 David 35 209 2

3 David 35 209 3

4 David 35 209 4

5 David 35 209 5

6 David 35 209 6

> andy_david<-rbind(andy,david)

> head(andy_david)

Patient.Name Age Weight Day

1 Andy 30 140 1

2 Andy 30 140 2

3 Andy 30 140 3

4 Andy 30 139 4

5 Andy 30 138 5

6 Andy 30 138 6

> tail(andy_david)

Patient.Name Age Weight Day

55 David 35 203 25

56 David 35 203 26

57 David 35 202 27

58 David 35 202 28

59 David 35 202 29

60 David 35 201 30

> andy_david<-rbind(andy,read.csv(files_full[2]))

> head(andy_david)

Patient.Name Age Weight Day

1 Andy 30 140 1

2 Andy 30 140 2

3 Andy 30 140 3

4 Andy 30 139 4

5 Andy 30 138 5

6 Andy 30 138 6

> tail(andy_david)

Patient.Name Age Weight Day

55 David 35 203 25

56 David 35 203 26

57 David 35 202 27

58 David 35 202 28

59 David 35 202 29

60 David 35 201 30

> day_25<-subset(andy_david[25,])

> day_25

Patient.Name Age Weight Day

25 Andy 30 135 25

> day_26<-subset(andy_david[which(andy_david$Day==26),])

> day_26

Patient.Name Age Weight Day

26 Andy 30 135 26

56 David 35 203 26

> day25<-andy_david[which(andy_david$Day == 25),]

> day25

Patient.Name Age Weight Day

25 Andy 30 135 25

55 David 35 203 25

> for(i in 1:5){print [i]}

Error in print[i] : object of type 'closure' is not subsettable

> for(i in 1:5){print (i}

Error: unexpected '}' in "for(i in 1:5){print (i}"

> for(i in 1:5){print (i)}

[1] 1

[1] 2

[1] 3

[1] 4

[1] 5

> for(i in 1:5){

+ dat <- rbind(dat,read.csv(files_full[i]))

+ }

Error in rbind(dat, read.csv(files_full[i])) : object 'dat' not found

> dat<-data.frame()

> for(i in 1:5){

+ dat <- rbind(dat,read.csv(files_full[i]))

+ }

> str(dat)

'data.frame': 150 obs. of 4 variables:

$ Patient.Name: Factor w/ 5 levels "Andy","David",..: 1 1 1 1 1 1 1 1 1 1 ...

$ Age : int 30 30 30 30 30 30 30 30 30 30 ...

$ Weight : int 140 140 140 139 138 138 138 138 138 138 ...

$ Day : int 1 2 3 4 5 6 7 8 9 10 ...

> for(i in 1:5 ){

+ dat2<-data.frame()

+ dat2<-rbind(dat2,read.csv(files_full[i]))

+ }

> str(dat2)

'data.frame': 30 obs. of 4 variables:

$ Patient.Name: Factor w/ 1 level "Steve": 1 1 1 1 1 1 1 1 1 1 ...

$ Age : int 55 55 55 55 55 55 55 55 55 55 ...

$ Weight : int 225 225 225 224 224 224 223 223 223 223 ...

$ Day : int 1 2 3 4 5 6 7 8 9 10 ...

> list.files("diet_data")

[1] "Andy.csv" "David.csv" "John.csv" "Mike.csv" "Steve.csv"

> head(dat2)

Patient.Name Age Weight Day

1 Steve 55 225 1

2 Steve 55 225 2

3 Steve 55 225 3

4 Steve 55 224 4

5 Steve 55 224 5

6 Steve 55 224 6

Because we put dat2<- data.frame() inside of the loop, dat2 is being rewritten with each pass of the loop. So we only end up with the data from the last file in our list.

> median(dat$Weight)

[1] NA

> nrow(dat)

[1] 150

> ncol(dat)

[1] 4

> dim(dat)

[1] 150 4

> median(dat$Weight,na.rm=TRUE)

[1] 190

> dat_30<-dat[which(dat$Day==30),]

> dat_30

Patient.Name Age Weight Day

30 Andy 30 135 30

60 David 35 201 30

90 John 22 177 30

120 Mike 40 192 30

150 Steve 55 214 30

> median(dat_30$Weight)

[1] 192

> weightmedian<-function(directory,day){

+ files_list<-list.files(directory,full.names=TRUE) #Create a list of files

+ dat<-data.frame() #creates an empty data frame

+ for(i in 1:5) { #loops through the files, rbinding them together

+ dat<-rbind(dat,read.csv(files_list[i]))

+ }

+ dat_subset<-dat[which(dat[,"Day"]==day),] #subsets the rows that match the 'day' arguments

+ median(dat_subset[,"Weight"],na.rm=TRUE) #identifies the median weight

+ }

> weightmedian(directory="diet_data",day=20)

[1] 197.5

> weightmedian("diet_data",4)

[1] 188

'Python, R 분석과 프로그래밍' 카테고리의 다른 글

dim, str, plot (0)	2015.10.29
도수분포표와 히스토그램 (0)	2015.08.12
[데이터 처리 & 분석 실무] 데이터 타입 - 데이터 프레임, 판별, 변환 (0)	2015.02.12
[데이터 처리 & 분석 실무] 데이터 타입 - 스칼라, 벡터, 리스트, 행렬, 배열 (0)	2015.02.12
[텍스트 마이닝 연습] 2014 대통령 신년사 분석 (0)	2015.01.14

Posted by 마르띤

데이터마이너를 꿈꾸며

R Practice - diet_data

'Python, R 분석과 프로그래밍' 카테고리의 다른 글

링크

카테고리

최근에 올라온 글

최근에 받은 트랙백

글 보관함

티스토리툴바