Comparisons of numbers of proteins/orthogroup

Load relevant libraries

library(openxlsx)
library(multcomp)
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
## Loading required package: MASS
## 
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
## 
##     geyser
library(readxl)

Input the data

# Note: This data includes the R. undulata counts, which turned out to be outliers. The same tests are repeated below without the R. undulata counts. 

count190 <- read_excel("190_NHvH.xlsx")
count279 <- read_excel("279_NHvH.xlsx")
count266 <- read_excel("266_NHvH.xlsx")

Ensure the values are read as numeric, or forced to NA as necessary

count190$OG0000190_NH <- as.numeric(count190$OG0000190_NH)
## Warning: NAs introduced by coercion
count190$OG0000190_H <- as.numeric(count190$OG0000190_H)

count279$OG0000279_NH <- as.numeric(count279$OG0000279_NH)
## Warning: NAs introduced by coercion
count279$OG0000279_H <- as.numeric(count279$OG0000279_H)

count266$OG0000266_NH <- as.numeric(count266$OG0000266_NH)
## Warning: NAs introduced by coercion
count266$OG0000266_H <- as.numeric(count266$OG0000266_H)

Test for Normality using the Shapiro test shapiro.test(data$column1)

Shapiro test for OG00000190_Column1

shapiro.test(count190$OG0000190_NH)
## 
##  Shapiro-Wilk normality test
## 
## data:  count190$OG0000190_NH
## W = 0.69449, p-value = 0.0007555

Shapiro test for OG00000190_Column2

shapiro.test(count190$OG0000190_H)
## 
##  Shapiro-Wilk normality test
## 
## data:  count190$OG0000190_H
## W = 0.53612, p-value = 3.304e-05

Perform Wilcoxon Test due to non-normality

wilcox_test_result <- wilcox.test(count190$OG0000190_NH, count190$OG0000190_H)
## Warning in wilcox.test.default(count190$OG0000190_NH, count190$OG0000190_H):
## cannot compute exact p-value with ties
wilcox_test_result
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  count190$OG0000190_NH and count190$OG0000190_H
## W = 34.5, p-value = 0.09055
## alternative hypothesis: true location shift is not equal to 0

Shapiro test for OG00000279_Column1

shapiro.test(count279$OG0000279_NH)
## 
##  Shapiro-Wilk normality test
## 
## data:  count279$OG0000279_NH
## W = 0.84119, p-value = 0.0456

Shapiro test for OG00000279_Column2

shapiro.test(count279$OG0000279_H)
## 
##  Shapiro-Wilk normality test
## 
## data:  count279$OG0000279_H
## W = 0.80112, p-value = 0.009647

Perform Wilcoxon due to non-normality

wilcox_test_result <- wilcox.test(count279$OG0000279_NH, count279$OG0000279_H)
## Warning in wilcox.test.default(count279$OG0000279_NH, count279$OG0000279_H):
## cannot compute exact p-value with ties
wilcox_test_result
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  count279$OG0000279_NH and count279$OG0000279_H
## W = 21, p-value = 0.009211
## alternative hypothesis: true location shift is not equal to 0

Shapiro test for OG00000266_Column1

shapiro.test(count266$OG0000266_NH)
## 
##  Shapiro-Wilk normality test
## 
## data:  count266$OG0000266_NH
## W = 0.454, p-value = 1.055e-06

Shapiro test for OG00000266_Column2

shapiro.test(count266$OG0000266_H)
## 
##  Shapiro-Wilk normality test
## 
## data:  count266$OG0000266_H
## W = 0.8505, p-value = 0.03724

Perform Wilcoxon due to non-normality

wilcox_test_result <- wilcox.test(count266$OG0000266_NH, count279$OG0000279_H)
## Warning in wilcox.test.default(count266$OG0000266_NH, count279$OG0000279_H):
## cannot compute exact p-value with ties
wilcox_test_result
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  count266$OG0000266_NH and count279$OG0000279_H
## W = 11, p-value = 0.0009868
## alternative hypothesis: true location shift is not equal to 0

Redo the above analyses without the R. undulata counts

# Note: This data excludes the R. undulata counts, which are all outliers. This is a repeat of the analyses done above including the R. undulata counts, which showed statistically supported differences between fire associated and non-associated species with regards to their numbers of OG000279 (GST) and OG0000266 (aromatic compound dioxygenases). These analyses were performed to determine whether the R. undulata outliers influences the differences between the fire associated and non-associated species.

count190_removed <- read_excel("190_NHvH_removed.xlsx")
count279_removed <- read_excel("279_NHvH_removed.xlsx")
count266_removed <- read_excel("266_NHvH_removed.xlsx")

Ensure the values are read as numeric, or forced to NA as necessary

count190_removed$OG0000190_NH <- as.numeric(count190_removed$OG0000190_NH)
## Warning: NAs introduced by coercion
count190_removed$OG0000190_H <- as.numeric(count190_removed$OG0000190_H)

count279_removed$OG0000279_NH <- as.numeric(count279_removed$OG0000279_NH)
## Warning: NAs introduced by coercion
count279_removed$OG0000279_H <- as.numeric(count279_removed$OG0000279_H)

count266_removed$OG0000266_NH <- as.numeric(count266_removed$OG0000266_NH)
## Warning: NAs introduced by coercion
count266_removed$OG0000266_H <- as.numeric(count266_removed$OG0000266_H)

Test for Normality using the Shapiro test

Shapiro test for OG00000190_removed_Column1

shapiro.test(count190_removed$OG0000190_NH)
## 
##  Shapiro-Wilk normality test
## 
## data:  count190_removed$OG0000190_NH
## W = 0.69449, p-value = 0.0007555

Shapiro test for OG00000190_removed_Column2

shapiro.test(count190_removed$OG0000190_H)
## 
##  Shapiro-Wilk normality test
## 
## data:  count190_removed$OG0000190_H
## W = 0.84009, p-value = 0.03167

Perform Wilcoxon Test due to non-normality

wilcox_test_result <- wilcox.test(count190_removed$OG0000190_NH, count190_removed$OG0000190_H)
## Warning in wilcox.test.default(count190_removed$OG0000190_NH,
## count190_removed$OG0000190_H): cannot compute exact p-value with ties
wilcox_test_result
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  count190_removed$OG0000190_NH and count190_removed$OG0000190_H
## W = 34.5, p-value = 0.1465
## alternative hypothesis: true location shift is not equal to 0

Shapiro test for OG00000279_removed_Column1

shapiro.test(count279_removed$OG0000279_NH)
## 
##  Shapiro-Wilk normality test
## 
## data:  count279_removed$OG0000279_NH
## W = 0.84119, p-value = 0.0456

Shapiro test for OG00000279_removed_Column2

shapiro.test(count279_removed$OG0000279_H)
## 
##  Shapiro-Wilk normality test
## 
## data:  count279_removed$OG0000279_H
## W = 0.84865, p-value = 0.04098

Perform Wilcoxon due to non-normality

wilcox_test_result <- wilcox.test(count279_removed$OG0000279_NH, count279_removed$OG0000279_H)
## Warning in wilcox.test.default(count279_removed$OG0000279_NH,
## count279_removed$OG0000279_H): cannot compute exact p-value with ties
wilcox_test_result
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  count279_removed$OG0000279_NH and count279_removed$OG0000279_H
## W = 21, p-value = 0.01512
## alternative hypothesis: true location shift is not equal to 0

Shapiro test for OG00000266_removed_Column1

shapiro.test(count266_removed$OG0000266_NH)
## 
##  Shapiro-Wilk normality test
## 
## data:  count266_removed$OG0000266_NH
## W = 0.454, p-value = 1.055e-06

Shapiro test for OG00000279_removed_Column2

shapiro.test(count266_removed$OG0000266_H)
## 
##  Shapiro-Wilk normality test
## 
## data:  count266_removed$OG0000266_H
## W = 0.81958, p-value = 0.01703

Perform Wilcoxon Test due to non-normality

wilcox_test_result <- wilcox.test(count266_removed$OG0000266_NH, count266_removed$OG0000266_H)
## Warning in wilcox.test.default(count266_removed$OG0000266_NH,
## count266_removed$OG0000266_H): cannot compute exact p-value with ties
wilcox_test_result
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  count266_removed$OG0000266_NH and count266_removed$OG0000266_H
## W = 11, p-value = 0.001406
## alternative hypothesis: true location shift is not equal to 0