Get it from http://cs.wellesley.edu/~qtw/code/r_cat.r
> BFCases <- read.table(file="BirdFluCases.txt", header = TRUE,sep="\t")
> names(BFCases)
[1] "Year" "Azerbaijan" "Bangladesh" "Cambodia" "China"
[6] "Djibouti" "Egypt" "Indonesia." "Iraq" "LaoPDR"
[11] "Myanmar" "Nigeria" "Pakistan" "Thailand" "Turkey"
[16] "VietNam"
> str(BFCases)
'data.frame': 6 obs. of 16 variables:
$ Year : int 2003 2004 2005 2006 2007 2008
$ Azerbaijan: int 0 0 0 8 0 0
$ Bangladesh: int 0 0 0 0 0 1
$ Cambodia : int 0 0 4 2 1 0
$ China : int 1 0 8 13 5 3
$ Djibouti : int 0 0 0 1 0 0
$ Egypt : int 0 0 0 18 25 7
$ Indonesia.: int 0 0 20 55 42 18
$ Iraq : int 0 0 0 3 0 0
$ LaoPDR : int 0 0 0 0 2 0
$ Myanmar : int 0 0 0 0 1 0
$ Nigeria : int 0 0 0 0 1 0
$ Pakistan : int 0 0 0 0 3 0
$ Thailand : int 0 17 5 3 0 0
$ Turkey : int 0 0 0 12 0 0
$ VietNam : int 3 29 61 0 8 5
> BFDeaths <- read.table(file="BirdFluDeaths.txt", header = TRUE, sep="\t")
> names(BFDeaths)
[1] "Year" "Azerbaijan" "Bangladesh" "Cambodia" "China"
[6] "Djibouti" "Egypt" "Indonesia." "Iraq" "LaoPDR"
[11] "Myanmar" "Nigeria" "Pakistan" "Thailand" "Turkey"
[16] "VietNam"
> str(BFDeaths)
'data.frame': 6 obs. of 16 variables:
$ Year : int 2003 2004 2005 2006 2007 2008
$ Azerbaijan: int 0 0 0 5 0 0
$ Bangladesh: int 0 0 0 0 0 0
$ Cambodia : int 0 0 4 2 1 0
$ China : int 1 0 5 8 3 3
$ Djibouti : int 0 0 0 0 0 0
$ Egypt : int 0 0 0 10 9 3
$ Indonesia.: int 0 0 13 45 37 15
$ Iraq : int 0 0 0 2 0 0
$ LaoPDR : int 0 0 0 0 2 0
$ Myanmar : int 0 0 0 0 0 0
$ Nigeria : int 0 0 0 0 1 0
$ Pakistan : int 0 0 0 0 1 0
$ Thailand : int 0 12 2 3 0 0
$ Turkey : int 0 0 0 4 0 0
$ VietNam : int 3 20 19 0 5 5
>
> Cases <- rowSums(BFCases[,2:16])
> names(Cases) <- BFCases[,1]
> Cases
2003 2004 2005 2006 2007 2008
4 46 98 115 88 34
> #Let's also compare deaths by year
> Deaths <- rowSums(BFDeaths[,2:16])
> names(Deaths) <- BFDeaths[,1]
> Deaths
2003 2004 2005 2006 2007 2008
4 32 43 79 59 26
> #We want to plot Cases and Deaths in a bar chart
> Counts <- cbind(Cases, Deaths)
> Counts
Cases Deaths
2003 4 4
2004 46 32
2005 98 43
2006 115 79
2007 88 59
2008 34 26
> Cases
2003 2004 2005 2006 2007 2008
4 46 98 115 88 34
> barplot(Cases , main = "Bird flu cases")
> barplot(Counts)
> t(Counts)
2003 2004 2005 2006 2007 2008
Cases 4 46 98 115 88 34
Deaths 4 32 43 79 59 26
> barplot(t(Counts), col = gray(c(0.5,1)))
> barplot(t(Counts), beside = TRUE)
par(mfrow = c(2,2), mar = c(3, 3, 2, 1))
barplot(Cases , main = "Bird flu cases")
barplot(Counts)
barplot(t(Counts), col = gray(c(0.5,1)))
barplot(t(Counts), beside = TRUE)
> Counts
Cases Deaths
2003 4 4
2004 46 32
2005 98 43
2006 115 79
2007 88 59
2008 34 26
> BFProp<-cbind(Survivors=Cases-Deaths,Deaths)
> BFProp
Survivors Deaths
2003 0 4
2004 14 32
2005 55 43
2006 36 79
2007 29 59
2008 8 26
mosaicplot(BFProp,col=c('green','red'),main="Bird flu survival rates by year")
las
parameter to get the labels right.Download the solution code from http://cs.wellesley.edu/~qtw/code/r_cat_ex1_sol.r
> #here's a sorted list of countries
> names(sort(CCases,decreasing=T))
[1] "Indonesia." "VietNam" "Egypt" "China" "Thailand"
[6] "Turkey" "Azerbaijan" "Cambodia" "Iraq" "Pakistan"
[11] "LaoPDR" "Bangladesh" "Djibouti" "Myanmar" "Nigeria"
> #we can then reorder the matrix rows by the sorted names
> BFCountryProp<-BFCountryProp[names(sort(CCases,decreasing=T)),]
>
> BFCountryProp
Survivors CDeaths
Indonesia. 25 110
VietNam 54 52
Egypt 28 22
China 10 20
Thailand 8 17
Turkey 8 4
Azerbaijan 3 5
Cambodia 0 7
Iraq 1 2
Pakistan 2 1
LaoPDR 0 2
Bangladesh 1 0
Djibouti 1 0
Myanmar 1 0
Nigeria 0 1
> rs<-read.table('regSuperCensusMod.csv',header=T,sep=',')
> rsi<-rs[rs$IndOrg=="IND",]
> head(rsi)
> head(rsi)
Candidate RegularSuper CNumber CName CAmount Date
20 barack obama Regular C00431445 Obama for America 35800 20110525
26 barack obama Regular C00431445 Obama for America 25800 20110510
39 barack obama Regular C00431445 Obama for America 5000 20110404
40 barack obama Regular C00431445 Obama for America 5000 20110628
41 barack obama Regular C00401224 ActBlue 5000 20110420
43 barack obama Regular C00431445 Obama for America 5000 20110421
State ZIP IndOrg PopSt FrWhite FrBlack USADiversity
20 AR 72207 IND 2915918 0.745381 0.154289 0.417706
26 CA 94115 IND 37253956 0.401468 0.061713 0.728676
39 ME 4110 IND 1328361 0.944244 0.011824 0.110012
40 CA 94708 IND 37253956 0.401468 0.061713 0.728676
41 MA 2111 IND 6547629 0.761314 0.066344 0.405299
43 CA 94904 IND 37253956 0.401468 0.061713 0.728676
Contingency tables count categorical variables matching each value of all combinations of categorical variables:
> ct<-table(rsi[,c('Candidate','RegularSuper')])
> ct
RegularSuper
Candidate Regular Super
barack obama 28864 41
mitt romney 10470 159
newt gingrich 1266 18
barplot(ct,beside=T,col=rainbow(3),main='# of contributions by type and candidate')
legend("topright",rownames(ct),fill=rainbow(3))
mosaicplot(ct,main='# of contributions by type and candidate')
mosaicplot(~Candidate+RegularSuper,data=rsi,
main='# of contributions by type and candidate')
Let's say we've got a numerica variable that we'd like to perform a function on but grouped by a categorical variable (e.g., total campaign contributions, not the number):
> ct.cont<-aggregate(CAmount~Candidate+RegularSuper,data=rsi,sum)
> ct.cont
Candidate RegularSuper CAmount
1 barack obama Regular 23111899
2 mitt romney Regular 17243855
3 newt gingrich Regular 1101165
4 barack obama Super 445200
5 mitt romney Super 12167953
6 newt gingrich Super 2080250
> cta<-array(data=ct.cont$CAmount,
+ dim=c(length(levels(ct.cont$Candidate)),
+ length(levels(ct.cont$RegularSuper))),
+ dimnames=list(levels(ct.cont$Candidate),
+ levels(ct.cont$RegularSuper)))
>
> cta
Regular Super
barack obama 23111899 445200
mitt romney 17243855 12167953
newt gingrich 1101165 2080250
> r.state<-table(rsi[rsi$RegularSuper=='Regular',c('State','Candidate')])
> head(r.state)
Candidate
State barack obama mitt romney newt gingrich
99 0 35 0
AK 75 10 5
AL 101 52 13
AR 76 13 6
AS 2 0 0
AZ 321 203 19
mosaicplot(r.state,main='# of contributions by type and state')
> stateCont<-table(rsi[rsi$RegularSuper=='Regular','State'])
> stateCont
99 AK AL AR AS AZ CA CO CT DC DE FL GA GU HI IA ID IL IN KS
35 90 166 95 2 543 7545 726 1092 994 94 2302 1023 4 165 187 205 2138 277 120
KY LA MA MD ME MI MN MO MS MT NC ND NE NH NJ NM NV NY OH OK
143 176 2429 1329 181 1023 375 572 70 101 579 20 70 219 1021 187 246 4497 541 145
OR PA PR RI SC SD TN TX UT VA VI VT WA WI WV WY ZZ
407 1244 101 88 166 27 371 2439 928 1299 21 285 929 350 54 66 28
> r.state.srt<-r.state[names(sort(stateCont,decreasing=T)),]
> r.state.srt
Candidate
State barack obama mitt romney newt gingrich
CA 5995 1453 97
NY 3295 1162 40
TX 1488 837 114
MA 1629 790 10
FL 1221 939 142
IL 1881 231 26
mosaicplot(r.state.srt[1:20,],col=rainbow(3),
main='# of contributions (top 20 states sorted by # contributions)')
Table of Contents | t |
---|---|
Exposé | ESC |
Full screen slides | e |
Presenter View | p |
Source Files | s |
Slide Numbers | n |
Toggle screen blanking | b |
Show/hide slide context | c |
Notes | 2 |
Help | h |