'그래프 순서 변경'에 해당되는 글 1건

  1. 2017.07.11 Lesson2: R Basic
반응형


# 원하는 데이터만 발라서 보기

> data(mtcars)

> mean(mtcars$mpg)

[1] 20.09062

> subset(mtcars, mtcars$mpg >= 30 | mtcars$hp < 60)

                mpg cyl disp  hp drat    wt  qsec vs am gear carb

Fiat 128       32.4   4 78.7  66 4.08 2.200 19.47  1  1    4    1

Honda Civic    30.4   4 75.7  52 4.93 1.615 18.52  1  1    4    2

Toyota Corolla 33.9   4 71.1  65 4.22 1.835 19.90  1  1    4    1

Lotus Europa   30.4   4 95.1 113 3.77 1.513 16.90  1  1    5    2

> mtcars[mtcars$mpg >= 30 | mtcars$hp < 60, ] #column 전체를 보기 위해서는 콤마 필수

                mpg cyl disp  hp drat    wt  qsec vs am gear carb

Fiat 128       32.4   4 78.7  66 4.08 2.200 19.47  1  1    4    1

Honda Civic    30.4   4 75.7  52 4.93 1.615 18.52  1  1    4    2

Toyota Corolla 33.9   4 71.1  65 4.22 1.835 19.90  1  1    4    1

Lotus Europa   30.4   4 95.1 113 3.77 1.513 16.90  1  1    5    2




#reddit data 보기, 범주형 변수 그래프 그리기

> reddit <- read.csv('reddit.csv')

> library(ggplot2)

> qplot(data = reddit, x = age.range)





#오더 순서 정하기 setting levles of ordered factors solution


> reddit$age.range = ordered(reddit$age.range,levels=c("Under 18","18-24", "25-34","35-44","45-54","55-64","65 or Above"))

> qplot(data = reddit, x = age.range)



#alternative solution

> reddit$age.range = factor(reddit$age.range, levels=c("Under 18","18-24", "25-34","35-44","45-54","55-64","65 or Above"),ordered=T)

> qplot(data = reddit, x = age.range)




# practice

> nlevels(reddit$income.range)

[1] 8

> levels(reddit$income.range)

[1] "$100,000 - $149,999" "$150,000 or more"    "$20,000 - $29,999"   "$30,000 - $39,999"  

[5] "$40,000 - $49,999"   "$50,000 - $69,999"   "$70,000 - $99,999"   "Under $20,000"      

> qplot(data = reddit, x = income.range)



#아래같은 방법도 가능

> reddit$income.range = ordered(reddit$income.range, levels=c("Under $20,000" , "$20,000 - $29,999"   , "$30,000 - $39,999", "$40,000 - $49,999","$50,000 - $69,999" , "$70,000 - $99,999”, ”$100,000 - $149,999" , "$150,000 or more"))

> qplot(data = reddit, x = income.range)




#다른 예제

> tShirts <- factor(c('medium', 'small', 'large', 'medium', 'large', 'large'), levels = c('medium','small','large'))

> tShirts

[1] medium small  large  medium large  large 

Levels: medium small large

> qplot(x = tShirts)



> tShirts <- ordered(tShirts, levels = c('small', 'medium', 'large'))

> tShirts

[1] medium small  large  medium large  large 

Levels: small < medium < large

> qplot(x = tShirts)








참고

https://cn.udacity.com/course/data-analysis-with-r--ud651

https://cn.udacity.com/course/data-wrangling-with-mongodb--ud032

http://vita.had.co.nz/papers/tidy-data.pdf

http://courses.had.co.nz.s3-website-us-east-1.amazonaws.com/12-rice-bdsi/slides/07-tidy-data.pdf

http://www.computerworld.com/article/2497143/business-intelligence/business-intelligence-beginner-s-guide-to-r-introduction.html

http://www.statmethods.net/index.html

https://www.r-bloggers.com/

http://www.cookbook-r.com/

http://blog.revolutionanalytics.com/2013/08/foodborne-chicago.html

http://blog.yhat.com/posts/roc-curves.html

https://github.com/corynissen/foodborne_classifier

반응형
Posted by 마르띤
,