The forest plot of hazard ratio shows that clonal hematopoietic mutations of some genes are related to treatment
Introduction
Show that different factors (age, smoking, treatment) have an impact on clonal hematopoiesis
Code explanation
Load the required tool function
library(tidyverse)
library(ggplot2)
signif.num <- function(x, ns = FALSE) {
if (ns) {
symbols = c("***", "**", "*", "ns")
} else {
symbols = c("***", "**", "*", "")
}
symnum(unlist(x), corr = FALSE, na = FALSE, legend = FALSE,
cutpoints = c(0, 0.001, 0.01, 0.05, 1),
symbols = symbols)
}
plot_forest <- function(class_results, x = "class", y = "estimate", ymin = "conf.low", ymax = "conf.high",
label = "p.label", limits = NULL, breaks = waiver(), title = "", col = NULL, fill = NULL,
dodge_width = 0.8, outer_limit_arrows = FALSE, ps=3, eb_w=0.4, eb_s=0.4, or_s=4, OR=T, yinter = 1,
nudge = 0, base_plot = geom_blank(), bar_col = 'black') {
# suppressWarnings(ggthemr::ggthemr("fresh"))
#' Forest plot of the coefficients in 'class_results'
output_plot <- ggplot(class_results, aes_string(x = x, y = y, ymin = ymin, ymax = ymax, col = col, fill = fill)) +
base_plot +
geom_hline(yintercept = yinter, color = "gray", linetype = "solid") +
geom_errorbar(position = position_dodge(width = dodge_width), width = eb_w, size=eb_s) +
geom_point(position = position_dodge(width = dodge_width), size=ps) +
geom_text(aes_string(label = label, vjust = nudge), position = position_dodge(width = dodge_width), color = 'black', size = or_s, alpha = .9) +
coord_flip() +
ggtitle(title)
if (OR==T) {
output_plot <- output_plot +
scale_y_log10(limits = limits, breaks = breaks)
}
if (outer_limit_arrows) {
stopifnot(length(limits) > 0)
# Check for errorbar values outside
class_results[, "ymin_out"] <- ifelse(class_results[, ymin] < limits[1], limits[1], NA)
class_results[, "linestyle_min"] <- ifelse(class_results[, ymin] < limits[1], "b", NA)
class_results[, "ymax_out"] <- ifelse(class_results[, ymax] > limits[2], limits[2], NA)
class_results[, "linestyle_max"] <- ifelse(class_results[, ymax] > limits[2], "b", NA)
output_plot <- output_plot + geom_linerange(data = class_results, aes_string(x = x, ymin = "ymin_out", ymax = ymax), linetype = 3, position = position_dodge(width = dodge_width))
output_plot <- output_plot + geom_linerange(data = class_results, aes_string(x = x, ymin = ymin, ymax = "ymax_out"), linetype = 3, position = position_dodge(width = dodge_width))
output_plot <- output_plot + geom_linerange(data = class_results, aes_string(x = x, ymin = "ymin_out", ymax = "ymax_out"), linetype = 3, position = position_dodge(width = dodge_width))
#output_plot <- output_plot + geom_segment(data = class_results, aes_string(x = x, xend = x, y = ymax, yend = paste0("ymin_out")),
# arrow = arrow(length = unit(.2, "cm")), position = position_dodge(width = dodge_width), lineend = "round")
#output_plot <- output_plot + geom_segment(data = class_results, aes_string(x = x, xend = x, y = ymin, yend = paste0("ymax_out")),
# arrow = arrow(length = unit(.2, "cm")), position = position_dodge(width = dodge_width), lineend = "round")
}
return(output_plot)
}
Data reading
M_wide_all = suppressWarnings(data.table::fread('M_wide_all.txt', sep = '\t', header = T)) %>% as.data.frame()
M_wide_all = M_wide_all %>%
mutate(race_b = as.integer(race == "White")
) %>%
mutate(age_scaled = as.vector(scale(age)),
age_d=age/10,
) %>%
mutate(mutnum_all_r = case_when(
mutnum_all ==0 ~ 0,
mutnum_all == 1 ~ 1,
mutnum_all >= 2 ~ 2)
) %>%
mutate(smoke_bin=case_when(
smoke==0 ~ 0,
smoke==1 ~ 1,
smoke==2 ~ 1)
) %>%
mutate(
Gender = relevel(factor(Gender), ref = 'Male'),
race = relevel(factor(race), "White"),
smoke = relevel(factor(smoke), "0"),
smoke_bin = relevel(factor(smoke_bin), "0"),
therapy_binary = relevel(factor(therapy_binary), 'untreated')
)
#Define wide data frame with treatment known
M_wide <- M_wide_all %>% filter(therapy_known==1)
Calculate the coefficient and confidence interval of age_scaled(Age) + smoke_bin(Smoke) + race_b + Gender + therapy_binary(Therapy) according to glm generalized linear regression
## forest plot
DTA = c('DNMT3A', 'TET2', 'ASXL1')
DDR = c('PPM1D', 'TP53', 'CHEK2')
SPL = c('SF3B1', 'SRSF2')
OTH = c('JAK2', 'ATM')
gene_list = c(DDR, DTA, SPL, OTH)
#ALL adjusted for treatment
logit_gene_var = list()
for (gene in gene_list) {
logit = glm(
formula = get(gene) ~ age_scaled + smoke_bin + race_b + Gender + therapy_binary,
data = M_wide,
family = "binomial")
logit_data = logit %>% sjPlot::get_model_data(type="est") %>% cbind(Gene = gene)
logit_gene_var = rbind(logit_gene_var, logit_data)
}
# for each gene
D = logit_gene_var %>%
filter(!term %in% c("GenderFemale", "race_b")) %>%
mutate(
term = c(
'therapy_binarytreated' = 'Therapy',
'smoke_bin1' = 'Smoking',
'age_scaled' = 'Age'
)[as.character(term)]
) %>%
mutate(term = factor(term, c("Age", "Therapy", "Smoking"))) %>%
mutate(p_fdr = p.adjust(p.value, method = "fdr")) %>%
mutate(termGene = paste0(term, Gene)) %>%
arrange(estimate, Gene) %>%
mutate(termGene = factor(termGene, levels = termGene)) %>%
mutate(gene_cat = case_when(
Gene %in% DTA ~ 'DTA',
Gene %in% DDR ~ 'DDR',
Gene %in% SPL ~ 'Splicing',
T ~ 'Other'
)
) %>%
mutate(gene_cat = factor(gene_cat, c('DDR', 'DTA', 'Splicing', 'Other'))) %>%
mutate(
q.value = p.adjust(p.value, n = nrow(.), method = 'fdr'),
q.label = paste0(signif(estimate, 2), signif.num(q.value)),
q.star = signif.num(q.value)
)
According to the organized data structure, use custom function to plot
p_forest = plot_forest(
D,
x = "termGene",
label = 'q.star',
eb_w = 0,
eb_s = 0.3,
ps = 1.5,
or_s = 2,
nudge = -0.3,
col = 'gene_cat'
) +
facet_wrap(~term, scale = 'free_y', ncol = 1) +
scale_x_discrete(
breaks = D$termGene,
labels = D$Gene,
expand = c(0.1,0)
) +
xlab('') + ylab('Odds Ratio of CH-PD') +
scale_color_nejm() +
panel_theme +
theme(
axis.text = element_text(size = font_size),
axis.title = element_text(size = font_size),
strip.text = element_text(size = font_size),
legend.position = 'top',
legend.title = element_blank(),
legend.text = element_text(size = font_size/1.2),
legend.key.size = unit(3, "mm")
)
OR with 95% confidence interval for CH-PD(clonal hematopoiesis mutations that we detected were classified as putative cancer-driver mutations) mutation in the ten most commonly mutated genes, with top, increasing age (n = 10,138); middle, for patients
previously exposed to cancer therapy (n = 5,978) compared with those with no exposure (n = 4,160); bottom, for current/former smokers (n = 4,989)
compared with nonsmokers (n = 5,145), in multivariable logistic regression models adjusted for therapy, smoking, ancestry, age, sex and time from
diagnosis to blood draw. Q value (FDR-corrected P value) < 0.05, Q < 0.01, Q < 0.001. Age is expressed as the mean centered values.
Interpretation of results
Age, smoking and cancer treatment have an impact on the clonal hematopoiesis of some genes.
References
Bolton, K.L., Ptashkin, R.N., Gao, T. et al. Cancer therapy shapes the fitness landscape of clonal hematopoiesis. Nat Genet 52, 1219–1226 (2020). https://doi.org/10.1038/s41588-020-00710-0
whoah this weblog is wonderful i love reading your articles.
Keep up the great work! You realize, a lot of individuals are searching round for this information, you could help them greatly.
Hi there, I discovered your web site by way of Google whilst searching
for a similar topic, your web site came up, it appears to be like
good. I have bookmarked it in my google bookmarks.
Hi there, simply changed into alert to your weblog through
Google, and located that it is truly informative.
I am going to be careful for brussels. I will appreciate
in the event you proceed this in future.
A lot of folks will be benefited out of your writing.
Cheers!
Whats up this is somewhat of off topic but I was wanting to know if blogs use
WYSIWYG editors or if you have to manually code with HTML.
I’m starting a blog soon but have no coding knowledge so I wanted
to get advice from someone with experience. Any help would be enormously appreciated!
I’m using markdown