EC1: Comparing PCA and tSNE clustering methods with gganimate

What’s vizualized?
- A gif visualizing the cluster derived with kmeans in reduced dimensional space using linear vs non-linear methods (PCA and tSNE), as well as in the original physical space.
By appliying animation between these three panels, you can observe the difference in how two different methods clusters differently in the reduced dimensional space. Interesting to note that in this particular case, the PCA method seems to cluster the data more clearly than tSNE.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
library(ggplot2)
library(stats) # for kmeans
library(reshape2) # for melt
library(Rtsne) # for tSNE
library(gganimate) # for animation
set.seed(42)
# Change this to your local directory where the data is stored
dir <-"./genomic-data-visualization-2025/"
pikachu_file <- "data/pikachu.csv.gz"
data_P <- read.csv(paste0(dir, pikachu_file))
# Picachu data
gexp_P <- data_P[, 7:ncol(data_P)]
# log transform the gexp data
log_gexp_P <- log2(gexp_P+1)
pos_P <- data_P[, 5:6]
# PCA
pca_P <- prcomp(log_gexp_P, scale=TRUE)
# tSNE
# emb_P <- Rtsne(log_gexp_P, dims=3, pca = TRUE, perplexity=30, verbose=FALSE)
# rename colums of the tSNE data
colnames(emb_P$Y) <- c("tSNE1", "tSNE2", "tSNE3")
# kmeans clustering
# do kmeans clustering on the log transformed data
k=7
kmeans_orig_P <- kmeans(log_gexp_P, centers=k)
clusters_P <- as.factor(kmeans_orig_P$cluster)
# add the x-y coordinates to the data frame
# df_P <- cbind(data_P[, 1:6], log_gexp_P, pca_P$x[, 1:3], emb_P$Y, clusters_P)
# head(df_P)
df_1 <- data.frame(x = pos_P$aligned_x, y = pos_P$aligned_y, cluster = clusters_P)
df_2 <- data.frame(x = pca_P$x[,1], y = pca_P$x[,2], cluster = clusters_P)
df_3 <- data.frame(x = emb_P$Y[,1], y = emb_P$Y[,2], cluster = clusters_P)
df <- rbind(cbind(df_1, order=1), cbind(df_2, order = 2), cbind(df_3, order = 3))
p <- ggplot(df, aes(x = x, y = y, color = cluster)) + geom_point(size=.75) # + facet_wrap(~order) + theme_minimal()
anim <- p + transition_states(order) + view_follow() + enter_fade() + exit_fade() + labs(title = 'PCA vs tSNE') + theme_minimal()
gif <- animate(anim, height = 800, width = 800)
gif