library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(pheatmap)
pfam <- read_csv("PFAMDomains.csv")
## Rows: 29 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): PFAM
## dbl (22): ANIG, CVEN2, GCAR, KPFE, MCON, MIMP, MSEX, MSNY, OOLI, PDOM, PECH,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
pfam
## # A tibble: 29 × 23
## PFAM ANIG CVEN2 GCAR KPFE MCON MIMP MSEX MSNY OOLI PDOM PECH POMP
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 PF00… 4 5 2 2 6 5 5 5 3 4 2 4
## 2 PF00… 8 7 6 2 8 8 8 8 7 5 7 8
## 3 PF00… 9 13 12 3 10 10 10 9 8 8 5 9
## 4 PF00… 9 9 9 2 9 9 9 9 10 8 9 9
## 5 PF00… 1 1 1 0 1 1 1 1 1 1 1 1
## 6 PF00… 24 25 22 8 25 27 27 23 26 20 23 24
## 7 PF01… 5 5 4 2 5 5 5 5 6 3 5 5
## 8 PF02… 1 1 1 0 1 0 1 0 1 1 1 1
## 9 PF02… 18 19 19 6 19 21 20 18 23 14 17 19
## 10 PF02… 6 9 9 4 11 11 10 10 11 6 5 8
## # ℹ 19 more rows
## # ℹ 10 more variables: RUND <dbl>, SBRU <dbl>, TBOR2 <dbl>, TBOU <dbl>,
## # TBRU <dbl>, TCLA <dbl>, THYB <dbl>, TMEL <dbl>, TNIV <dbl>, WMIK <dbl>
# Convert to matrix
pfam_matrix <- pfam %>%
column_to_rownames(var = "PFAM") %>%
as.matrix()
# Transpose the matrix (swap rows and columns)
pfam_matrix_t <- t(pfam_matrix)
pfam_matrix_t
## PF00011 PF00012 PF00043 PF00118 PF00183 PF00226 PF01556 PF02179 PF02518
## ANIG 4 8 9 9 1 24 5 1 18
## CVEN2 5 7 13 9 1 25 5 1 19
## GCAR 2 6 12 9 1 22 4 1 19
## KPFE 2 2 3 2 0 8 2 0 6
## MCON 6 8 10 9 1 25 5 1 19
## MIMP 5 8 10 9 1 27 5 0 21
## MSEX 5 8 10 9 1 27 5 1 20
## MSNY 5 8 9 9 1 23 5 0 18
## OOLI 3 7 8 10 1 26 6 1 23
## PDOM 4 5 8 8 1 20 3 1 14
## PECH 2 7 5 9 1 23 5 1 17
## POMP 4 8 9 9 1 24 5 1 19
## RUND 18 7 12 9 1 26 5 1 20
## SBRU 4 8 9 10 1 27 4 2 18
## TBOR2 6 7 7 9 1 26 5 1 16
## TBOU 3 10 8 9 1 25 6 1 14
## TBRU 3 8 8 9 1 26 5 1 16
## TCLA 3 10 5 9 1 26 6 1 18
## THYB 3 8 8 9 1 22 5 1 19
## TMEL 3 7 8 9 1 27 5 1 16
## TNIV 3 10 5 9 1 24 5 1 16
## WMIK 4 8 10 9 1 23 5 1 20
## PF02798 PF03234 PF03656 PF04119 PF04969 PF07961 PF08327 PF08564 PF08565
## ANIG 6 1 2 1 4 1 1 1 1
## CVEN2 9 1 1 2 4 1 1 5 3
## GCAR 9 1 2 5 4 0 1 1 1
## KPFE 4 0 0 1 3 0 1 0 0
## MCON 11 1 1 4 4 0 1 1 1
## MIMP 11 1 1 4 4 0 1 1 1
## MSEX 10 1 1 4 4 0 1 1 1
## MSNY 10 0 1 4 3 0 1 1 1
## OOLI 11 1 1 0 3 1 1 1 1
## PDOM 6 1 2 8 4 1 1 1 1
## PECH 5 1 1 2 4 1 1 1 1
## POMP 8 1 2 8 4 1 1 1 1
## RUND 15 1 1 2 4 1 1 1 1
## SBRU 6 1 3 2 4 0 1 1 1
## TBOR2 5 1 1 2 4 1 1 1 1
## TBOU 7 1 1 2 3 1 1 1 1
## TBRU 6 1 1 2 4 1 1 1 1
## TCLA 4 1 1 2 4 1 1 1 1
## THYB 7 1 2 1 4 0 1 1 1
## TMEL 6 1 1 2 4 1 1 1 1
## TNIV 5 1 1 2 4 1 1 1 1
## WMIK 7 1 2 2 4 0 1 1 1
## PF08609 PF09229 PF10280 PF10294 PF11701 PF13589 PF16546 PF16782 PF17886
## ANIG 1 1 0 7 1 5 1 1 0
## CVEN2 1 1 1 8 1 5 1 1 0
## GCAR 1 1 1 4 1 5 1 0 0
## KPFE 0 1 1 3 0 1 1 1 0
## MCON 1 1 1 7 1 5 1 1 0
## MIMP 1 1 1 8 2 5 1 1 0
## MSEX 1 1 1 7 1 5 1 1 0
## MSNY 1 1 1 8 1 4 1 1 0
## OOLI 1 1 1 6 1 3 1 1 1
## PDOM 1 1 0 5 1 5 0 0 0
## PECH 1 1 1 5 1 5 1 1 0
## POMP 1 1 1 6 1 7 1 0 0
## RUND 1 1 1 7 1 5 1 1 0
## SBRU 1 1 1 7 1 5 1 1 0
## TBOR2 1 1 1 7 1 5 1 1 1
## TBOU 1 2 1 6 1 4 1 1 1
## TBRU 1 1 1 8 1 5 1 1 0
## TCLA 2 1 1 8 1 4 1 1 1
## THYB 1 1 1 6 1 4 1 1 1
## TMEL 1 1 1 8 1 5 1 1 0
## TNIV 1 1 1 7 1 4 1 1 2
## WMIK 1 1 1 6 1 5 1 1 0
## PF18391 PF18972
## ANIG 0 1
## CVEN2 1 1
## GCAR 0 1
## KPFE 0 1
## MCON 1 1
## MIMP 1 1
## MSEX 1 1
## MSNY 1 1
## OOLI 1 0
## PDOM 0 1
## PECH 0 1
## POMP 0 1
## RUND 1 1
## SBRU 0 1
## TBOR2 1 1
## TBOU 1 1
## TBRU 1 1
## TCLA 1 1
## THYB 0 1
## TMEL 1 1
## TNIV 1 1
## WMIK 0 1
# Normalize each column
normalize <- function(x) {
(x - min(x)) / (max(x) - min(x))
}
# Apply normalization to each column of the transposed matrix
normalized_pfam <- apply(pfam_matrix_t, 2, normalize)
# Format the numbers
formatted_numbers <- formatC(pfam_matrix_t, format = "d", big.mark = "")
# Reorder the rows according to the order of phylogeny
desired_order <- c("MCON", "MIMP", "MSEX", "MSNY", "TMEL", "TBRU", "TBOR2", "CVEN2", "RUND", "THYB", "WMIK", "PDOM", "POMP", "SBRU", "GCAR", "ANIG", "TCLA", "TBOU", "TNIV", "KPFE", "PECH", "OOLI")
normalized_pfam <- normalized_pfam[desired_order, ]
# Generate the heatmap
pX <- pheatmap(normalized_pfam,
cluster_rows = FALSE,
cluster_cols = FALSE,
display_numbers = FALSE,
color = colorRampPalette(c("#EAD21A", "#F1A16D", "#DD5A56"))(50),
labels_row = rownames(normalized_pfam), # Update row labels to reflect the new matrix
border_color = "white")
# Save heatmap as PDF for import in and final editing in Affinity
ggsave("Heatmap.pdf", plot = pX, width = 300, height = 100, units = "mm", dpi = 300)