Generating a heatmap from count data

Load relevant libraries

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(pheatmap)

Load relevant data

pfam <- read_csv("PFAMDomains.csv")
## Rows: 29 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): PFAM
## dbl (22): ANIG, CVEN2, GCAR, KPFE, MCON, MIMP, MSEX, MSNY, OOLI, PDOM, PECH,...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
pfam
## # A tibble: 29 × 23
##    PFAM   ANIG CVEN2  GCAR  KPFE  MCON  MIMP  MSEX  MSNY  OOLI  PDOM  PECH  POMP
##    <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1 PF00…     4     5     2     2     6     5     5     5     3     4     2     4
##  2 PF00…     8     7     6     2     8     8     8     8     7     5     7     8
##  3 PF00…     9    13    12     3    10    10    10     9     8     8     5     9
##  4 PF00…     9     9     9     2     9     9     9     9    10     8     9     9
##  5 PF00…     1     1     1     0     1     1     1     1     1     1     1     1
##  6 PF00…    24    25    22     8    25    27    27    23    26    20    23    24
##  7 PF01…     5     5     4     2     5     5     5     5     6     3     5     5
##  8 PF02…     1     1     1     0     1     0     1     0     1     1     1     1
##  9 PF02…    18    19    19     6    19    21    20    18    23    14    17    19
## 10 PF02…     6     9     9     4    11    11    10    10    11     6     5     8
## # ℹ 19 more rows
## # ℹ 10 more variables: RUND <dbl>, SBRU <dbl>, TBOR2 <dbl>, TBOU <dbl>,
## #   TBRU <dbl>, TCLA <dbl>, THYB <dbl>, TMEL <dbl>, TNIV <dbl>, WMIK <dbl>

Convert data into a usable format

# Convert to matrix
pfam_matrix <- pfam %>%
  column_to_rownames(var = "PFAM") %>%
  as.matrix()

# Transpose the matrix (swap rows and columns)
pfam_matrix_t <- t(pfam_matrix)
pfam_matrix_t
##       PF00011 PF00012 PF00043 PF00118 PF00183 PF00226 PF01556 PF02179 PF02518
## ANIG        4       8       9       9       1      24       5       1      18
## CVEN2       5       7      13       9       1      25       5       1      19
## GCAR        2       6      12       9       1      22       4       1      19
## KPFE        2       2       3       2       0       8       2       0       6
## MCON        6       8      10       9       1      25       5       1      19
## MIMP        5       8      10       9       1      27       5       0      21
## MSEX        5       8      10       9       1      27       5       1      20
## MSNY        5       8       9       9       1      23       5       0      18
## OOLI        3       7       8      10       1      26       6       1      23
## PDOM        4       5       8       8       1      20       3       1      14
## PECH        2       7       5       9       1      23       5       1      17
## POMP        4       8       9       9       1      24       5       1      19
## RUND       18       7      12       9       1      26       5       1      20
## SBRU        4       8       9      10       1      27       4       2      18
## TBOR2       6       7       7       9       1      26       5       1      16
## TBOU        3      10       8       9       1      25       6       1      14
## TBRU        3       8       8       9       1      26       5       1      16
## TCLA        3      10       5       9       1      26       6       1      18
## THYB        3       8       8       9       1      22       5       1      19
## TMEL        3       7       8       9       1      27       5       1      16
## TNIV        3      10       5       9       1      24       5       1      16
## WMIK        4       8      10       9       1      23       5       1      20
##       PF02798 PF03234 PF03656 PF04119 PF04969 PF07961 PF08327 PF08564 PF08565
## ANIG        6       1       2       1       4       1       1       1       1
## CVEN2       9       1       1       2       4       1       1       5       3
## GCAR        9       1       2       5       4       0       1       1       1
## KPFE        4       0       0       1       3       0       1       0       0
## MCON       11       1       1       4       4       0       1       1       1
## MIMP       11       1       1       4       4       0       1       1       1
## MSEX       10       1       1       4       4       0       1       1       1
## MSNY       10       0       1       4       3       0       1       1       1
## OOLI       11       1       1       0       3       1       1       1       1
## PDOM        6       1       2       8       4       1       1       1       1
## PECH        5       1       1       2       4       1       1       1       1
## POMP        8       1       2       8       4       1       1       1       1
## RUND       15       1       1       2       4       1       1       1       1
## SBRU        6       1       3       2       4       0       1       1       1
## TBOR2       5       1       1       2       4       1       1       1       1
## TBOU        7       1       1       2       3       1       1       1       1
## TBRU        6       1       1       2       4       1       1       1       1
## TCLA        4       1       1       2       4       1       1       1       1
## THYB        7       1       2       1       4       0       1       1       1
## TMEL        6       1       1       2       4       1       1       1       1
## TNIV        5       1       1       2       4       1       1       1       1
## WMIK        7       1       2       2       4       0       1       1       1
##       PF08609 PF09229 PF10280 PF10294 PF11701 PF13589 PF16546 PF16782 PF17886
## ANIG        1       1       0       7       1       5       1       1       0
## CVEN2       1       1       1       8       1       5       1       1       0
## GCAR        1       1       1       4       1       5       1       0       0
## KPFE        0       1       1       3       0       1       1       1       0
## MCON        1       1       1       7       1       5       1       1       0
## MIMP        1       1       1       8       2       5       1       1       0
## MSEX        1       1       1       7       1       5       1       1       0
## MSNY        1       1       1       8       1       4       1       1       0
## OOLI        1       1       1       6       1       3       1       1       1
## PDOM        1       1       0       5       1       5       0       0       0
## PECH        1       1       1       5       1       5       1       1       0
## POMP        1       1       1       6       1       7       1       0       0
## RUND        1       1       1       7       1       5       1       1       0
## SBRU        1       1       1       7       1       5       1       1       0
## TBOR2       1       1       1       7       1       5       1       1       1
## TBOU        1       2       1       6       1       4       1       1       1
## TBRU        1       1       1       8       1       5       1       1       0
## TCLA        2       1       1       8       1       4       1       1       1
## THYB        1       1       1       6       1       4       1       1       1
## TMEL        1       1       1       8       1       5       1       1       0
## TNIV        1       1       1       7       1       4       1       1       2
## WMIK        1       1       1       6       1       5       1       1       0
##       PF18391 PF18972
## ANIG        0       1
## CVEN2       1       1
## GCAR        0       1
## KPFE        0       1
## MCON        1       1
## MIMP        1       1
## MSEX        1       1
## MSNY        1       1
## OOLI        1       0
## PDOM        0       1
## PECH        0       1
## POMP        0       1
## RUND        1       1
## SBRU        0       1
## TBOR2       1       1
## TBOU        1       1
## TBRU        1       1
## TCLA        1       1
## THYB        0       1
## TMEL        1       1
## TNIV        1       1
## WMIK        0       1

Normalize each column so that like is compared with like

# Normalize each column 
normalize <- function(x) {
  (x - min(x)) / (max(x) - min(x))
}

# Apply normalization to each column of the transposed matrix
normalized_pfam <- apply(pfam_matrix_t, 2, normalize)

# Format the numbers 
formatted_numbers <- formatC(pfam_matrix_t, format = "d", big.mark = "")

Generate heatmap with necessary formating

# Reorder the rows according to the order of phylogeny
desired_order <- c("MCON", "MIMP", "MSEX", "MSNY", "TMEL", "TBRU", "TBOR2", "CVEN2", "RUND", "THYB", "WMIK", "PDOM", "POMP", "SBRU", "GCAR", "ANIG", "TCLA", "TBOU", "TNIV", "KPFE", "PECH", "OOLI")

normalized_pfam <- normalized_pfam[desired_order, ]

# Generate the heatmap
pX <- pheatmap(normalized_pfam,
         cluster_rows = FALSE, 
         cluster_cols = FALSE, 
         display_numbers = FALSE,
         color = colorRampPalette(c("#EAD21A", "#F1A16D", "#DD5A56"))(50),
         labels_row = rownames(normalized_pfam),  # Update row labels to reflect the new matrix
         border_color = "white")

# Save heatmap as PDF for import in and final editing in Affinity
ggsave("Heatmap.pdf", plot = pX, width = 300, height = 100, units = "mm", dpi = 300)