'코세라'에 해당되는 글 1건

  1. 2015.01.19 R Practice - diet_data
반응형

source: https://github.com/derekfranks/practice_assignment

To begin, download this file and unzip it into your R working directory.
http://s3.amazonaws.com/practice_assignment/diet_data.zip

 

> setwd("D:/temp/r_temp")

> list.files("diet_data")

[1] "Andy.csv"  "David.csv" "John.csv" 

[4] "Mike.csv"  "Steve.csv"



> andy<-read.csv("diet_data/Andy.csv")

> head(andy)

  Patient.Name Age Weight Day

1         Andy  30    140   1

2         Andy  30    140   2

3         Andy  30    140   3

4         Andy  30    139   4

5         Andy  30    138   5

6         Andy  30    138   6

 

> length(andy$Day)

[1] 30

> nrow(andy)

[1] 30

> ncol(andy)

[1] 4

> length(andy)

[1] 4

> dim(andy)

[1] 30  4

> str(andy)

'data.frame':  30 obs. of  4 variables:

 $ Patient.Name: Factor w/ 1 level "Andy": 1 1 1 1 1 1 1 1 1 1 ...

 $ Age         : int  30 30 30 30 30 30 30 30 30 30 ...

 $ Weight      : int  140 140 140 139 138 138 138 138 138 138 ...

 $ Day         : int  1 2 3 4 5 6 7 8 9 10 ...

> summary(andy)

 Patient.Name      Age         Weight           Day       

 Andy:30      Min.   :30   Min.   :135.0   Min.   : 1.00  

              1st Qu.:30   1st Qu.:137.0   1st Qu.: 8.25  

              Median :30   Median :137.5   Median :15.50  

              Mean   :30   Mean   :137.3   Mean   :15.50  

              3rd Qu.:30   3rd Qu.:138.0   3rd Qu.:22.75  

              Max.   :30   Max.   :140.0   Max.   :30.00  

> names(andy)

[1] "Patient.Name" "Age"          "Weight"       "Day" 

 

> andy[1,"Weight"]

[1] 140

> andy[1,"weight"]

NULL

> andy[30,"Weight"]

[1] 135

> andy[which(andy$day)==30,"Weight"]

Error in which(andy$day) : argument to 'which' is not logical

> andy[which(andy$Day)==30,"Weight"]

Error in which(andy$Day) : argument to 'which' is not logical

> andy[which(andy$Day==30),"Weight"]

[1] 135

> andy[which(andy[,Day]==30),"Weight"]

Error in `[.data.frame`(andy, , Day) : object 'Day' not found

> andy[which(andy[,"Day"]==30),"Weight"]

[1] 135

> subset(andy$Weight, andy$Day==30)

[1] 135

 

> andy_start<-subset[andy$Weight,andy$Day==1]

Error in subset[andy$Weight, andy$Day == 1] : 

  object of type 'closure' is not subsettable

> andy_start<-subset(andy$Weight,andy$Day==1)

> andy_start

[1] 140

> andy_start<-andy[1,"Weight"]

> andy_end<-andy[30,"Weight"]

> andy_loss<-andy_start - andy_end

> andy_loss

[1] 5

 

> files<-list.files("diet_data")

> files

[1] "Andy.csv"  "David.csv" "John.csv"  "Mike.csv"  "Steve.csv"

> files[1]

[1] "Andy.csv"

> files[-1]

[1] "David.csv" "John.csv"  "Mike.csv"  "Steve.csv"

> files[2]

[1] "David.csv"

> files[3:5]

[1] "John.csv"  "Mike.csv"  "Steve.csv"

> files[c(1,3)]

[1] "Andy.csv" "John.csv"


> head(read.csv(files[3]))

Error in file(file, "rt") : cannot open the connection

In addition: Warning message:

In file(file, "rt") :

  cannot open file 'John.csv': No such file or directory

> files_full <- list.files("diet_data",full.names=TRUE)

> files_full

[1] "diet_data/Andy.csv"  "diet_data/David.csv" "diet_data/John.csv"

[4] "diet_data/Mike.csv"  "diet_data/Steve.csv"

> head(read.csv(files_full[3]))

  Patient.Name Age Weight Day

1         John  22    175   1

2         John  22    175   2

3         John  22    175   3

4         John  22    175   4

5         John  22    175   5

6         John  22    175   6


> andy_david<-rbind(andy,david)

Error in rbind(andy, david) : object 'david' not found

> david<-read.csv("diet_data/David.csv")

> head(david)

  Patient.Name Age Weight Day

1        David  35    210   1

2        David  35    209   2

3        David  35    209   3

4        David  35    209   4

5        David  35    209   5

6        David  35    209   6

> andy_david<-rbind(andy,david)

> head(andy_david)

  Patient.Name Age Weight Day

1         Andy  30    140   1

2         Andy  30    140   2

3         Andy  30    140   3

4         Andy  30    139   4

5         Andy  30    138   5

6         Andy  30    138   6

> tail(andy_david)

   Patient.Name Age Weight Day

55        David  35    203  25

56        David  35    203  26

57        David  35    202  27

58        David  35    202  28

59        David  35    202  29

60        David  35    201  30


> andy_david<-rbind(andy,read.csv(files_full[2]))

> head(andy_david)

  Patient.Name Age Weight Day

1         Andy  30    140   1

2         Andy  30    140   2

3         Andy  30    140   3

4         Andy  30    139   4

5         Andy  30    138   5

6         Andy  30    138   6

> tail(andy_david)

   Patient.Name Age Weight Day

55        David  35    203  25

56        David  35    203  26

57        David  35    202  27

58        David  35    202  28

59        David  35    202  29

60        David  35    201  30

 

> day_25<-subset(andy_david[25,])

> day_25

   Patient.Name Age Weight Day

25         Andy  30    135  25

> day_26<-subset(andy_david[which(andy_david$Day==26),])

> day_26

   Patient.Name Age Weight Day

26         Andy  30    135  26

56        David  35    203  26

> day25<-andy_david[which(andy_david$Day == 25),]

> day25

   Patient.Name Age Weight Day

25         Andy  30    135  25

55        David  35    203  25

 

> for(i in 1:5){print [i]}

Error in print[i] : object of type 'closure' is not subsettable

> for(i in 1:5){print (i}

Error: unexpected '}' in "for(i in 1:5){print (i}"

> for(i in 1:5){print (i)}

[1] 1

[1] 2

[1] 3

[1] 4

[1] 5

> for(i in 1:5){

+        dat <- rbind(dat,read.csv(files_full[i]))

+ }

Error in rbind(dat, read.csv(files_full[i])) : object 'dat' not found

> dat<-data.frame()

> for(i in 1:5){

+        dat <- rbind(dat,read.csv(files_full[i]))

+ }

> str(dat)

'data.frame':    150 obs. of  4 variables:

 $ Patient.Name: Factor w/ 5 levels "Andy","David",..: 1 1 1 1 1 1 1 1 1 1 ...

 $ Age         : int  30 30 30 30 30 30 30 30 30 30 ...

 $ Weight      : int  140 140 140 139 138 138 138 138 138 138 ...

 $ Day         : int  1 2 3 4 5 6 7 8 9 10 ...

 

> for(i in 1:5 ){

+        dat2<-data.frame()

+        dat2<-rbind(dat2,read.csv(files_full[i])) 

+ }

> str(dat2)

'data.frame':    30 obs. of  4 variables:

 $ Patient.Name: Factor w/ 1 level "Steve": 1 1 1 1 1 1 1 1 1 1 ...

 $ Age         : int  55 55 55 55 55 55 55 55 55 55 ...

 $ Weight      : int  225 225 225 224 224 224 223 223 223 223 ...

 $ Day         : int  1 2 3 4 5 6 7 8 9 10 ...

> list.files("diet_data")

[1] "Andy.csv"  "David.csv" "John.csv"  "Mike.csv"  "Steve.csv"

> head(dat2)

  Patient.Name Age Weight Day

1        Steve  55    225   1

2        Steve  55    225   2

3        Steve  55    225   3

4        Steve  55    224   4

5        Steve  55    224   5

6        Steve  55    224   6

 

Because we put dat2<- data.frame() inside of the loop, dat2 is being rewritten with each pass of the loop. So we only end up with the data from the last file in our list.

 

> median(dat$Weight)

[1] NA

> nrow(dat)

[1] 150

> ncol(dat)

[1] 4

> dim(dat)

[1] 150   4

> median(dat$Weight,na.rm=TRUE)

[1] 190

> dat_30<-dat[which(dat$Day==30),]

> dat_30

    Patient.Name Age Weight Day

30          Andy  30    135  30

60         David  35    201  30

90          John  22    177  30

120         Mike  40    192  30

150        Steve  55    214  30

> median(dat_30$Weight)

[1] 192

 

> weightmedian<-function(directory,day){

+   files_list<-list.files(directory,full.names=TRUE) #Create a list of files

+   dat<-data.frame() #creates an empty data frame

+   for(i in 1:5) { #loops through the files, rbinding them together

+     dat<-rbind(dat,read.csv(files_list[i]))   

+   }

+   dat_subset<-dat[which(dat[,"Day"]==day),] #subsets the rows that match the 'day' arguments

+   median(dat_subset[,"Weight"],na.rm=TRUE) #identifies the median weight

+ }

> weightmedian(directory="diet_data",day=20)

[1] 197.5

> weightmedian("diet_data",4)

[1] 188

 

 

반응형
Posted by 마르띤
,