## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ## Load and download (if necessary) required packages ---- ## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #Tutorial: https://maraab23.github.io/ggseqplot/articles/ggseqplot.html ## Save package names as a vector of strings pkgs <- c("colorspace", "glue", "ggseqplot", "ggtext","ggplot2", "ggthemes", "hrbrthemes", "haven", "Hmisc", "kableExtra", "knitr", "patchwork", "pdftools", "RColorBrewer", "reshape2", "sjPlot", "tidyverse", "TraMineR", "tibble","TraMineRextras") ## Install uninstalled packages lapply(pkgs[!(pkgs %in% installed.packages())], install.packages, repos = getOption("repos")["CRAN"]) ## Load all packages to library and adjust options lapply(pkgs, library, character.only = TRUE) # load and install packages pacman::p_load(TraMineR, TraMineRextras, cluster, RColorBrewer, devtools, haven, tidyverse, reshape2, WeightedCluster, nnet) ## Don't forget to load ggseqplot library(ggseqplot) #alcdata <- read.csv(file ="C:/Users/damil/Downloads/ALC-Pre-Fellowship-2023-B.csv", header = TRUE) #write.csv(data,"C:/Users/damil/OneDrive/Documents/2021-UKVisa-Application/sample_data.csv", row.names = FALSE) #alcdata <- read.csv(file ="/Users/k1453370/Downloads/ALC-Pre-Fellowship-2023-B.csv", header = TRUE) alcdata <- read.csv(file ="/Users/k1453370/Documents/MGR-2024/ALC-Fellowship-Feb-2022-Final.csv", header = TRUE) #seqstatl(alcdata[, 33:89]) alcdata.alphabet <- c("fED","fJ","Flw","fM","fSE","fSM", "pJ", "pM","pSM", "U") alcdata.lab <- c("Further Education","Full-time Junior position","ALC Fellowship","Full-time Mid-level position", "Full-time Self Employed","Full-time Senior Management position","Part-time Junior position", "Part-time Mid-level position", "Part-time Senior Management position","Unemployed") alcdata.seq <- seqdef(alcdata,38:156, labels=alcdata.lab) #Pre-Fellowship (5 years) alcdata.alphabetflwA <- c("fED","Flw","fM","fSE","fSM", "pJ", "pM","pSM", "U") alcdata.labflwA <- c("Further Education","ALC Fellowship","Full-time Mid-level position", "Full-time Self Employed","Full-time Senior Management position","Part-time Junior position", "Part-time Mid-level position", "Part-time Senior Management position","Unemployed") alcdataflw.seqA <- seqdef(alcdataflwFinal,38:96, labels=alcdata.labflwA) #Post-Fellowship (5 years) alcdata.alphabetflwB <- c("fED","fJ","Flw","fM","fSE","fSM", "pJ", "pM","pSM", "U") alcdata.labflwB <- c("Further Education", "Full-time Junior position","ALC Fellowship", "Full-time Mid-level position", "Full-time Self Employed","Full-time Senior Management position","Part-time Junior position", "Part-time Mid-level position", "Part-time Senior Management position","Unemployed") alcdataflw.seqB <- seqdef(alcdataflwFinal,94:156, labels=alcdata.labflwB) #alcdata.alphabetB <- c("fJ","fM","fSE", "pJ","pM","U") #alcdata.labyB<- c("Further Education", "Full-time Junior position", "Full-time Mid-level position","Full-time Senior Management position","Part-time Junior position","Part-time Mid-level position", "Part-time Senior Management position","Unemployed") #alcdata.alphabetyr3 <- c("fED","fJ","fM","fSM", "pJ", "pM","pSM", "U") #alcdata.labyr3<- c("Unemployed","Full-time Self Employed", "Part-time Junior position", "Part-time Mid-level position", "Full-time Junior position", "Full-time Mid-level position","Full-time Senior Management position") #alcdata.seqyr2 <- seqdef(alcdata,46:57, labels=alcdata.labyr1) #alcdata.seqyr3 <- seqdef(alcdata,58:69, labels=alcdata.labyr3) ##also can plot frequencies using seqfplot seqfplot(alcdataflw.seqA, border = NA, with.legend = "right", legend.prop=0.4) seqfplot(alcdataflw.seqA, with.legend = "right", legend.prop=0.4) #Plot the first 10 sequences seqiplot(alcdata.seq, border=T) #Plot the first 10 sequences (Pre-ALC) seqiplot(alcdataflw.seqA, border=T) #Plot the first 10 sequences (Post-ALC) seqiplot(alcdataflw.seqB, border=T) #par(mfrow = c(2, 2)) par(mfrow = c(1, 2)) #Plot the 10 most frequent sequences. seqfplot(alcdata.seq, with.legend = "right", legend.prop=0.4) #Plot the 10 most frequent sequences (Post-ALC) seqfplot(alcdataflw.seqB, with.legend = "right", legend.prop=0.4) seqstatd(alcdata.seq) seqstatd(alcdata.seqyr1) seqstatd(alcdata.seqyr3) dplot <- ggseqdplot(alcdata.seq) dplot1 <- ggseqdplot(alcdata.seqyr1) dplot1$data # Example 1: State distribution plot # ggseqplot::ggseqdplot ggseqdplot(alcdata.seq, border=T) ggseqdplot(alcdata.seqyr1) ggseqdplot(alcdata.seqyr2) ggseqdplot(alcdata.seq) + scale_fill_discrete_sequential("heat") + scale_x_discrete(labels = month.abb) + labs(title = "State distribution plot (Pre- and Post-ALC Fellowship)", x = "Month") + guides(fill=guide_legend(title="Legend")) + theme_ipsum(base_family = "") + # ensures that this works on different OS theme(plot.title = element_text(size = 30, margin=margin(0,0,20,0)), plot.title.position = "plot") # Transition rate plot by Sex - Added February 15, 2024 ggseqtrplot(alcdata.seqA, group = alcdata$Sex) colourCount = length(alcdata.lab) #getPalette = colorRampPalette(brewer.pal(9, "Spectral")) getPalette = colorRampPalette(brewer.pal(9, "Set3")) ## let's see how our colours look like axisLimit <- sqrt(colourCount)+1 colours=data.frame(x1=rep(seq(1, axisLimit, 1), length.out=colourCount), x2=rep(seq(2, axisLimit+1, 1), length.out=colourCount), y1=rep(1:axisLimit, each=axisLimit,length.out=colourCount), y2=rep(2:(axisLimit+1), each=axisLimit,length.out=colourCount), t=letters[1:colourCount], r=alcdata.lab) ggplot() + scale_x_continuous(name="x") + scale_y_continuous(name="y") + geom_rect(data=colours, mapping=aes(xmin=x1, xmax=x2, ymin=y1, ymax=y2, fill=t), color="black", alpha=0.5) + geom_text(data=colours, aes(x=x1+(x2-x1)/2, y=y1+(y2-y1)/2, label=r), size=4) + scale_fill_manual(values = getPalette(colourCount)) + theme(legend.position = "none") alcdata.scodes <- c("fED","fJ","Flw","fM","fSE","fSM", "pJ", "pM","pSM","U") alcdata.seq <- seqdef(alcdata, 38:156, alphabet = alcdata.alphabet, states = alcdata.scodes, labels = alcdata.lab, cpal = getPalette(colourCount),xtstep = 10) seqiplot(alcdataflw.seqA, with.legend = FALSE, border = NA) seqlegend(alcdataflw.seqA) seqiplot(alcdata.seqB, border = T) seqlegend(alcdata.seqB) #Plot all the sequences in the data set, sorted by states from start. #seqIplot(alcdata.seq, sortv = "from.start", with.legend = "right", border = T) seqIplot(alcdata.seq, sortv = "from.start", with.legend = "right", withborder = TRUE) seqlegend(alcdata.seq) ## Plots by Groups seqIplot(alcdata.seq, group=alcdata$Sex, withlegend=TRUE, border=TRUE, xtstep=10, sortv="from.start") ## Default plot #Plot the first 10 sequences seqiplot(alcdata.seq, with.legend = "right", withborder=TRUE) #Plot the 10 most frequent sequences. seqfplot(alcdata.seqB,withlegend=TRUE,xtstep=8 ) seqlegend(alcdata.seq) #Compute and plot the state distributions by time points. Borders surrounding the bars are removed for nicer output. The display of the legend is deactivated since we include several plots in the same figure. seqdplot(alcdata.seq, with.legend = FALSE, border = T) #Compute and plot the transversal entropy index (sequence of entropies of the transversal state distributions). seqHtplot(alcdata.seq) #Plot the sequence of modal states of the transversal state distributions. seqmsplot(alcdata.seq, with.legend = FALSE, border = T) #Plot the mean time spent in each state of the alphabet. #par(mfrow = c(2, 2)) par(mfrow = c(1, 2)) seqmtplot(alcdata.seq, with.legend = FALSE) seqlegend(alcdata.seq) ## default plot with standard errors ggseqmtplot(alcdata.seq, no.n = TRUE, error.bar = "SE") ## flipped version ggseqmtplot(alcdata.seq, no.n = TRUE, error.bar = "SE") + coord_flip() + theme(axis.text.y=element_blank(), axis.ticks.y = element_blank(), panel.grid.major.y = element_blank(), legend.position = "top") #Plot the mean time spent in each state of the alphabet by sex seqmtplot(alcdata.seq, with.legend = FALSE,group = alcdata$Sex, ylim = c(0, 30)) #Plot the mean time spent in each state of the alphabet by Completion of a Higher Degree before the ALC Fellowship seqmtplot(alcdata.seq, with.legend = FALSE,group = alcdata$Q4.Did.you.complete.a.higher.degree..MA.MSc.or.equivalent..before.or.during.your.ALC.Fellowship. , ylim = c(0, 30)) #Plot the mean time spent in each state of the alphabet by Fellowship type seqmtplot(alcdata.seq, with.legend = FALSE,group = alcdata$Fellowship.type , ylim = c(0, 30)) #Plot the mean time spent in each state of the alphabet by Fellowship type seqmtplot(alcdata.seq, with.legend = FALSE,group = alcdata$Q7.Did.you.have.any.children..biological.adopted.stepchildren..during.your.ALC.Fellowship. , ylim = c(0, 30)) ## transitions from state to state (in probabilities) alcdataflw <- read.csv(file ="C:/Users/damil/Downloads/ALC-Pre-Fellowship-2023-B.csv", header = TRUE) alcdata.alphabetflw <- c("fJ","fM","fSE","fSM", "pJ", "pM", "U") alcdata.labflw <- c("Full-time Junior position","Full-time Mid-level position","Full-time Self Employed","Full-time Senior Management position","Part-time Junior position","Part-time Mid-level position","Unemployed") ## Computing transition rates between states "A" and "B" only seqtrate(alcdataflw.seq, c("U","Flw")) #https://rpubs.com/Kolpashnikova/sequenceAnalysis ## transitions from state to state (in probabilities) trate <- seqtrate(alcdataflw.seqB) round(trate, 2) ## heatmap of the transitions matrix heatTrate=melt(trate) ggplot(heatTrate, aes(Var2, Var1)) + geom_tile(aes(fill = value)) + geom_text(aes(label = round(value, 2))) + scale_fill_continuous(high = "#22598f", low = "#56B1F7", name="Transitions") colourCountflw = length(alcdata.labflw) getPalette = colorRampPalette(brewer.pal(9, "Spectral")) ## let's see how our colours look like axisLimit <- sqrt(colourCount)+1 colours=data.frame(x1=rep(seq(1, axisLimit, 1), length.out=colourCountflw), x2=rep(seq(2, axisLimit+1, 1), length.out=colourCountflw), y1=rep(1:axisLimit, each=axisLimit,length.out=colourCountflw), y2=rep(2:(axisLimit+1), each=axisLimit,length.out=colourCountflw), t=letters[1:colourCountflw], r=alcdata.labflw) ggplot() + scale_x_continuous(name="x") + scale_y_continuous(name="y") + geom_rect(data=colours, mapping=aes(xmin=x1, xmax=x2, ymin=y1, ymax=y2, fill=t), color="black", alpha=0.5) + geom_text(data=colours, aes(x=x1+(x2-x1)/2, y=y1+(y2-y1)/2, label=r), size=4) + scale_fill_manual(values = getPalette(colourCountflw)) + theme(legend.position = "none") alcdataflw.seq <- seqdef(alcdata,34:93, labels=alcdata.labflw) trate <- seqtrate(alcdataflw.seq) ## [>] computing transition probabilities for states UE/FD/SD/JFE/JPE/MFE/MPE/SMFE/SMPE/SE/LA/VL/FL ... round(trate, 2) ## heatmap of the transitions matrix heatTrate=melt(trate) ggplot(heatTrate, aes(Var2, Var1)) + geom_tile(aes(fill = value)) + geom_text(aes(label = round(value, 2))) + scale_fill_continuous(high = "#132B43", low = "#56B1F7", name="Transitions") par(mfrow = c(1, 1)) ggseqtrplot(alcdataflw.seq, dss = FALSE, group = alcdata$Fellowship.type) #Transition rate plot # to obtain the transition rates between the states of the alphabet. TraMineR::seqtrate stores these rates in a symmetrical matrix which internally is reshaped into a long format with one row for every combination of states (i.e., the squared size of the sequence alphabet) par(mfrow = c(1, 1)) ggseqtrplot(alcdataflw.seq, group = alcdata.seq$Sex) p1 <- ggseqtrplot(alcdataflw.seq, dss = FALSE, x_n.dodge = 2, labsize = 3) + ggtitle("STS Sequences by Sex") + theme(plot.margin = unit(c(5,10,5,5), "points")) #> [>] computing transition probabilities for states 0/1/2/3/4/5/6/7 ... p2 <- ggseqtrplot(alcdataflw.seq, x_n.dodge = 2, labsize = 3) + ggtitle("DSS Sequences") + theme(plot.margin = unit(c(5,5,5,10), "points")) #> [>] computing transition probabilities for states 0/1/2/3/4/5/6/7 ... p1+p2 & theme(plot.title = element_text(size = 20, hjust = 0.5)) p1 & theme(plot.title = element_text(size = 20, hjust = 0.5)) entropies<-seqient(alcdataflw.seq) lm.ent<-lm(entropies~Sex,alcdataflw) entropies<-seqient(alcdataflw.seq) lm.ent<-lm(entropies~Sex+Q4.Did.you.complete.a.higher.degree..MA.MSc.or.equivalent..before.or.during.your.ALC.Fellowship.,alcdataflw) gentime4_seq <- seqgranularity(alcdata.seq, tspan=4, method="mostfreq") seqdplot(gentime4_seq, border = NA, with.legend = "right", legend.prop=0.4) seqplot(gentime4_seq, type="ms", with.legend = "right", legend.prop=0.4)