We want to determine the extent to which the MIKK panel shows evidence of introgression with other medaka populations, specifically the allopatric Northern and Korean medaka strains, relative to sympatric Southern medaka strains.
library(here)
source(here::here("code", "scripts", "introgression", "source.R"))
Working directory on EBI cluster: /hps/research1/birney/users/ian/mikk_paper
container_dir=../sing_conts
# load Singularity (version 3.5.0)
module load singularity
for package in $( echo r-base_4.0.4 bcftools_1.9 numpy_1.15.4 bash_3.0.22 bioconductor_3.12 ) ; do
if [[ ! -f $container_dir/$package.sif ]]; then
singularity build \
--remote \
$container_dir/$package.sif \
$package.def
envs/fi ;
done
renv
Install all required packages (using r-base in Singularity container).
container_dir=../sing_conts
baseR=r-base_4.0.4
bsub -Is "singularity shell $container_dir/$baseR"
# Install all required packages
::restore() renv
wget -P code/scripts/introgression https://raw.githubusercontent.com/simonhmartin/genomics_general/master/ABBABABAwindows.py
wget -P code/scripts/introgression https://raw.githubusercontent.com/simonhmartin/genomics_general/master/genomics.py
ftp://ftp.ensembl.org/pub/release-102/emf/ensembl-compara/multiple_alignments/50_fish.epo/README.50_fish.epo
reads:
Alignments are grouped by japanese medaka hdrr chromosome, and then by coordinate system. Alignments containing duplications in japanese medaka hdrr are dumped once per duplicated segment. The files named .other.emf contain alignments that do not include any japanese medaka hdrr region. Each file contains up to 200 alignments.
ftp_dir=ftp://ftp.ensembl.org/pub/release-102/emf/ensembl-compara/multiple_alignments/50_fish.epo/
target_dir=../introgression/release-102
mkdir -p $target_dir/raw
# download, exlcuding *.other* files
wget -P $target_dir/raw $ftp_dir/* -R "*other*"
# unzip into new directory (excluding "other")
$target_dir/unzipped
mkdir -p $target_dir/unzipped
for i in $(find $target_dir/raw/50_fish.epo.[0-9]*); do
name=$(basename $i | cut -f3,4 -d'.');
bsub "zcat $i > $target_dir/unzipped/$name";
done
release 101
is unaffected. Remove all release 102
chr 6 files and replace with release 101
files.# remove release 102 files for chr 6
rm $target_dir/unzipped/6_*
# download chr 6 files from release 101
ftp_dir_101=ftp://ftp.ensembl.org/pub/release-101/emf/ensembl-compara/multiple_alignments/50_fish.epo/
target_dir_101=../introgression/release-101
mkdir -p $target_dir_101/raw
wget -P $target_dir_101/raw $ftp_dir_101/50_fish.epo.6_*
# unzip
mkdir -p $target_dir_101/unzipped
for i in $(find $target_dir_101/raw/*); do
name=$(basename $i | cut -f3,4 -d'.') ;
zcat $i > $target_dir_101/unzipped/$name ;
done
# copy over to release 102 directory
cp $target_dir_101/unzipped/* $target_dir/unzipped
Copy tree to file
tree_file=data/introgression/release_102_tree.txt
awk "NR==58,NR==205" $target_dir/raw/README.50_fish.epo \
> $tree_file
Then manually edit $tree_file
using regex to find spaces and replace them with "_": {bash} (?<=[a-z])( )(?=[a-z])
<- ape::read.tree(file = here::here("data", "introgression", "release_102_tree.txt")) phylo_tree
# Colour all Oryzias
<- phylo_tree$tip.label[grep("Oryzias_latipes", phylo_tree$tip.label)]
ids # get indices of edges descending from MRCA (determined through trial and error)
<- seq(39, 42)
oryzias_nodes <- ifelse(1:length(phylo_tree[["edge.length"]]) %in% oryzias_nodes, "#E84141", "black")
all_med_col # set colours for tip labels
<- ifelse(phylo_tree$tip.label %in% ids, "#E84141", "black")
all_med_tip # plot
::plot.phylo(phylo_tree,
apeuse.edge.length = T,
edge.color = all_med_col,
tip.color = all_med_tip,
font = 4)
# Save to repo
png(file= file.path(plots_dir, "tree_all_olat_highlight.png"),
width=22,
height=25,
units = "cm",
res = 400)
::plot.phylo(phylo_tree,
apeuse.edge.length = T,
edge.color = all_med_col,
tip.color = all_med_tip,
font = 4)
dev.off()
New tree file created manually to extract Oryzias fishes only, and replace reference codes (e.g. “ASM223467v1”) with line names (e.g. “HdrR”).
= here::here("data/introgression/release_102_tree_oryzias_only.txt")
in_file # Read in
<- ape::read.tree(file = in_file)
phylo_tree # Set colours
<- c("#55b6b0", "#f33a56", "#f3b61f", "#f6673a", "#631e68")
phylo_cols # Plot
::plot.phylo(phylo_tree,
apefont = 4,
tip.color = phylo_cols)
= here::here(plots_dir, "tree_oryzias.png")
out_file # Save
png(file=out_file,
width=2700,
height=1720,
units = "px",
res = 400)
::plot.phylo(phylo_tree,
apefont = 4,
tip.color = phylo_cols)
dev.off()
= here::here(plots_dir, "tree_oryzias_with_ancestor.png") out_file
= here::here(plots_dir, "tree_oryzias.png")
in_file
= in_file
tree_path
ggdraw() +
draw_image(tree_path) +
draw_label("x",
x = 0.152,
y = 0.52,
fontface = "bold",
color = "#f77cb5",
size = 25)
ggsave(out_file,
width=15.69,
height=10,
units = "cm",
dpi = 400)
::include_graphics(out_file) knitr
### Oryzias latipes only
New tree file created manually to extract Oryzias latipes fishes only, and replace reference codes (e.g. “ASM223467v1”) with line names (e.g. “HdrR”).
= here::here("data/introgression/release_102_tree_oryzias_latipes_only.txt")
in_file # Read in
<- ape::read.tree(file = in_file)
phylo_tree # Set colours
<- c("#9E2B25", "#f6673a", "#631e68")
phylo_cols # Plot
::plot.phylo(phylo_tree,
apefont = 4,
tip.color = phylo_cols)
= here::here(plots_dir, "tree_oryzias_latipes.png")
out_file # Save
png(file=out_file,
width=2700,
height=1720,
units = "px",
res = 400)
::plot.phylo(phylo_tree,
apefont = 4,
tip.color = phylo_cols)
dev.off()
= here::here(plots_dir, "tree_oryzias_latipes_with_ancestor.png") out_file
= here::here(plots_dir, "tree_oryzias_latipes.png")
in_file
= in_file
tree_path
ggdraw() +
draw_image(tree_path) +
draw_label("x",
x = 0.152,
y = 0.52,
fontface = "bold",
color = "#f77cb5",
size = 25)
ggsave(out_file,
width=15.69,
height=10,
units = "cm",
dpi = 400)
::include_graphics(out_file) knitr
target_dir=../introgression/release-102
segments_dir=$target_dir/segmented
date=20210312
script=code/scripts/introgression/20200907_extract-emf-segments.sh
mkdir -p $segments_dir
for i in $(find $target_dir/unzipped/* ); do
# get basename
bname=$(basename $i);
bname_short=$(echo ${bname::-4} );
# get chromosome
chr=$(echo $bname | cut -f1 -d"_" );
# make directory for each EMF file
new_path=$(echo $segments_dir/$bname_short );
if [ ! -d "$new_path" ]; then
mkdir $new_path;
fi
# get segment count
segment_count=$(grep "^DATA" $i | wc -l );
# get segment start and end for each file
for j in $(seq 1 $segment_count ); do
bsub \
-o ../log/$date\segment_$bname_short\_$j.out \
-e ../log/$date\segment_$bname_short\_$j.err \
"$script $i $j $new_path "
done;
done
# How many files?
find $segments_dir/*/*.data.txt | wc -l
# 8951
find $in_dir/*/*_1.data.txt | wc -l
# 4341
find $in_dir/*/*_-1.data.txt | wc -l
# 4610
snakemake
snmk_proj="introgression"
module load singularity
conda activate snakemake
snakemake \
--jobs 5000 \
--latency-wait 100 \
--cluster-config code/snakemake/$snmk_proj/config/cluster.json \
--cluster 'bsub -g /snakemake_bgenie -J {cluster.name} -n {cluster.n} -M {cluster.memory} -o {cluster.output} -e {cluster.error}' \
--keep-going \
--rerun-incomplete \
--use-conda \
--use-singularity \
-s code/snakemake/$snmk_proj/Snakefile \
-p
= here::here("data", "introgression", "20210315_f_stat_final.txt")
data_file # Read in data
<- read.table(data_file,
final_df header = T,
sep = "\t",
as.is = T)
<- final_df %>%
final_df ::mutate(across(P2,
dplyr~factor(.x, levels = fish_order))) %>%
::mutate(chr = factor(chr, levels = chr_order))
dplyr
::kable(head(final_df)) knitr
P1 | P2 | P3 | chr | d_stat | z_score | admix_f | f_ci_lower | f_ci_upper |
---|---|---|---|---|---|---|---|---|
javanicus | HdrR | MIKK | all | 0.9072306 | 324.06735 | 0.7093524 | 0.6943606 | 0.7243441 |
javanicus | HdrR | MIKK | 1 | 0.9422313 | 162.46960 | 0.8090713 | 0.7776743 | 0.8404682 |
javanicus | HdrR | MIKK | 2 | 0.9128441 | 57.55129 | 0.7668972 | 0.6931469 | 0.8406474 |
javanicus | HdrR | MIKK | 3 | 0.8869725 | 76.53532 | 0.6217246 | 0.5501172 | 0.6933320 |
javanicus | HdrR | MIKK | 4 | 0.9318431 | 72.86056 | 0.7503846 | 0.6643958 | 0.8363734 |
javanicus | HdrR | MIKK | 5 | 0.8879805 | 69.67407 | 0.6565419 | 0.5865893 | 0.7264945 |
melastigma
and javanicus
<- final_df %>%
cor_df # filter for when P1 is another Oryzias, and P2
::filter(P1 %in% c("javanicus", "melastigma") & P2 != "MIKK" & P3 == "MIKK") %>%
dplyr# pivot to put the admixture_f stat for melastigma and javanicus in the same row
::pivot_wider(id_cols = c("P2", "chr"),
tidyrnames_from = P1,
values_from = c(admix_f, f_ci_lower, f_ci_upper))
$chr <- as.character(cor_df$chr)
cor_df$chr <- ifelse(cor_df$chr == "all", "genome-wide", cor_df$chr)
cor_df<- c(seq(1,24), "genome-wide")
chr_order_plot $chr <- factor(cor_df$chr, levels = chr_order_plot)
cor_df
<- cor_df %>%
cor_df_means # apply across rows
::rowwise() %>%
dplyr# get means for f and CIs
::mutate(mean_f = mean(c(admix_f_javanicus, admix_f_melastigma)),
dplyrmean_ci_upper = mean(c(f_ci_upper_javanicus, f_ci_upper_melastigma)),
mean_ci_lower = mean(c(f_ci_lower_javanicus, f_ci_lower_melastigma))) %>%
# set stats at a maximum of 1
::mutate(across(c("mean_f", "mean_ci_upper", "mean_ci_lower"),
dplyr~dplyr::if_else(.x > 1,
1,
.x)))
::kable(head(cor_df_means)) knitr
P2 | chr | admix_f_javanicus | admix_f_melastigma | f_ci_lower_javanicus | f_ci_lower_melastigma | f_ci_upper_javanicus | f_ci_upper_melastigma | mean_f | mean_ci_upper | mean_ci_lower |
---|---|---|---|---|---|---|---|---|---|---|
HdrR | genome-wide | 0.7093524 | 0.6860648 | 0.6943606 | 0.6712495 | 0.7243441 | 0.7008800 | 0.6977086 | 0.7126121 | 0.6828051 |
HdrR | 1 | 0.8090713 | 0.7831495 | 0.7776743 | 0.7466954 | 0.8404682 | 0.8196036 | 0.7961104 | 0.8300359 | 0.7621849 |
HdrR | 2 | 0.7668972 | 0.7621796 | 0.6931469 | 0.6999464 | 0.8406474 | 0.8244128 | 0.7645384 | 0.8325301 | 0.6965466 |
HdrR | 3 | 0.6217246 | 0.5968322 | 0.5501172 | 0.5217981 | 0.6933320 | 0.6718664 | 0.6092784 | 0.6825992 | 0.5359576 |
HdrR | 4 | 0.7503846 | 0.7399951 | 0.6643958 | 0.6811117 | 0.8363734 | 0.7988785 | 0.7451899 | 0.8176260 | 0.6727538 |
HdrR | 5 | 0.6565419 | 0.6241430 | 0.5865893 | 0.5565435 | 0.7264945 | 0.6917426 | 0.6403425 | 0.7091186 | 0.5715664 |
= cor_df_means %>%
fstat_plot ggplot(aes(P2, mean_f, fill = P2)) +
geom_col() +
geom_errorbar(aes(ymin = mean_ci_lower,
ymax = mean_ci_upper),
position = position_dodge(0.9),
width = 0.25) +
guides(fill = F) +
facet_wrap(~chr) +
ylim(0,1) +
ylab(expression(paste("Mean ", italic("f"), " statistic"))) +
theme_cowplot(font_size = 8) +
scale_fill_manual(values = pal_abba)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
fstat_plot
= here::here(plots_dir, "20210315_f_stat")
out_file
# PNG
ggsave(filename = paste(out_file, ".png", sep = ""),
device = "png",
width = 24.75,
height = 19.5,
units = "cm",
dpi = 500)
# SVG
ggsave(filename = paste(out_file, ".svg", sep = ""),
device = "svg",
width = 24.75,
height = 19.5,
units = "cm")
= here::here("data", "introgression", "abba_sliding_final", "50000_100.txt")
in_file # Read in data
= readr::read_csv(in_file) %>%
df ::arrange(p1, p2, scaffold, start)
dplyr
# Convert fd to 0 if D < 0
$fd = ifelse(df$D < 0,
df0,
$fd)
df
# Change names
= df %>%
df ::mutate(p2 = recode(df$p2, hdrr = "HdrR", hni = "HNI", hsok = "HSOK")) dplyr
%>%
df ::filter(p1 == "melastigma") %>%
dplyrggplot() +
geom_line(aes(mid, fd, colour = p2)) +
facet_wrap(~scaffold, nrow = 24, ncol = 1) +
scale_colour_manual(values = pal_abba) +
theme_bw(base_size = 10) +
scale_x_continuous(breaks = c(0, 5000000, 10000000, 15000000, 20000000, 25000000, 30000000, 35000000),
labels = scales::comma) +
xlab("Base position") +
ylab(bquote(italic(f[d]))) +
labs(colour = "P2")
= here::here(plots_dir, "20210317_abba_sliding.png")
out_file
ggsave(filename = out_file,
device = "png",
width = 24.75,
height = 50,
units = "cm",
dpi = 300)
# Get chromosome lengths
= read.table(here("data", "Oryzias_latipes.ASM223467v1.dna.toplevel.fa_chr_counts.txt"),
med_chr_lens col.names = c("chr", "end"))
# Add start
$start = 1
med_chr_lens# Reorder
= med_chr_lens %>%
med_chr_lens ::select(chr, start, end)
dplyr# Create custom genome
= regioneR::toGRanges(med_chr_lens) med_genome
= here::here("data", "introgression", "abba_sliding_final", "50000_100.txt")
in_file # Read in data
= readr::read_csv(in_file) %>%
df ::arrange(p1, p2, scaffold, start)
dplyr
# Convert fd to 0 if D < 0
$fd = ifelse(df$D < 0,
df0,
$fd)
df
# Change names
= df %>%
df ::mutate(p2 = recode(df$p2, hdrr = "HdrR", hni = "HNI", hsok = "HSOK"))
dplyr
# Get df with mean of melastigma/javanicus
= df %>%
df_kp pivot_wider(id_cols = c(scaffold, start, end, mid, p2), names_from = p1, values_from = fd) %>%
# get mean of melastigma/javanicus
::mutate(mean_fd = rowMeans(dplyr::select(., melastigma, javanicus), na.rm = T)) %>%
dplyr::arrange(p2, scaffold, start)
dplyr
::kable(head(df_kp)) knitr
scaffold | start | end | mid | p2 | javanicus | melastigma | mean_fd |
---|---|---|---|---|---|---|---|
1 | 650001 | 700000 | 666130 | HdrR | NA | 0.1231 | 0.12310 |
1 | 700001 | 750000 | 721651 | HdrR | NA | 0.2357 | 0.23570 |
1 | 750001 | 800000 | 774678 | HdrR | NA | 0.1636 | 0.16360 |
1 | 950001 | 1000000 | 974734 | HdrR | NA | 0.2203 | 0.22030 |
1 | 1000001 | 1050000 | 1028780 | HdrR | NA | 0.2935 | 0.29350 |
1 | 1050001 | 1100000 | 1075868 | HdrR | 0.4066 | 0.2985 | 0.35255 |
= "/nfs/research/birney/users/ian/mikk_genome/introgression/hni_hsok.txt.gz"
in_file # Read in file on local
= read.table(in_file,
ol_ranges_df header = T,
sep = "\t",
as.is = T)
= ol_ranges_df %>%
ol_ranges_df_long ::pivot_longer(cols = c(hni, hsok), names_to = "line", values_to = "present")
tidyr
= split(ol_ranges_df_long, f = ol_ranges_df_long$line)
ol_ranges_list
= lapply(ol_ranges_list, function(x){
ol_ranges_list # remove NAs
= x %>%
df ::drop_na(present)
tidyr# convert to GRanges object
= GenomicRanges::makeGRangesFromDataFrame(df,
ol_ranges ignore.strand = T,
seqnames.field = "chr",
start.field = "pos",
end.field = "pos")
return(ol_ranges)
})
= here::here("data/introgression/mikk.txt.gz")
in_file # Read in file on local
= read.table(in_file,
mikk_ranges_df col.names = c("chr", "pos"),
sep = "\t",
as.is = T)
# Convert to GRanges object
= GenomicRanges::makeGRangesFromDataFrame(mikk_ranges_df,
mikk_ranges ignore.strand = T,
seqnames.field = "chr",
start.field = "pos",
end.field = "pos")
# Get list of exons from biomaRt
## Select dataset
= biomaRt::useEnsembl(biomart = "ensembl", dataset = "olatipes_gene_ensembl")
olat_mart ## Get attributes of interest (exon ID, chr, start, end)
<- biomaRt::getBM(attributes = c("chromosome_name", "ensembl_gene_id", "ensembl_transcript_id", "transcript_start", "transcript_end", "transcript_length", "ensembl_exon_id", "rank", "strand", "exon_chrom_start", "exon_chrom_end", "cds_start", "cds_end"),
exons mart = olat_mart)
## Convert exons to GRanges
= GenomicRanges::makeGRangesFromDataFrame(exons,
ex_ranges ignore.strand = T,
seqnames.field = "chromosome_name",
start.field = "exon_chrom_start",
end.field = "exon_chrom_end")
= file.path(plots_dir, "20210318_fd_with_density_all.png") file_out
# Save
png(file=file_out,
width=8500,
height=13500,
units = "px",
res = 400)
# Plot
= plotKaryotype(med_genome)
kp # Add base numbers
::kpAddBaseNumbers(kp, tick.dist = 5000000, cex = 0.3)
karyoploteR# Add data backgrounds
::kpDataBackground(kp, r0=0, r1 = 1, color = "white")
karyoploteR# Add axis label
kpAxis(kp, r0=0.3, r1 = 1, cex = 0.4)
# Add fd data
::kpLines(kp,
karyoploteRchr = df_kp$scaffold[df_kp$p2 == "HNI"],
x = df_kp$mid[df_kp$p2 == "HNI"],
y = df_kp$mean_fd[df_kp$p2 == "HNI"],
col = "#F6673A",
r0=0.3, r1 = 1)
::kpLines(kp,
karyoploteRchr = df_kp$scaffold[df_kp$p2 == "HdrR"],
x = df_kp$mid[df_kp$p2 == "HdrR"],
y = df_kp$mean_fd[df_kp$p2 == "HdrR"],
col = "#F3B61F",
r0=0.3, r1 = 1)
::kpLines(kp,
karyoploteRchr = df_kp$scaffold[df_kp$p2 == "HSOK"],
x = df_kp$mid[df_kp$p2 == "HSOK"],
y = df_kp$mean_fd[df_kp$p2 == "HSOK"],
col = "#631E68",
r0=0.3, r1 = 1)
# Add SNP density data
kpPlotDensity(kp, data=mikk_ranges, col = "#49A379",
r0=0, r1=0.1,
window.size = 25000)
kpPlotDensity(kp, data=ol_ranges_list$hni, col = "#F6673A",
r0=0.1, r1=0.2,
window.size = 25000)
kpPlotDensity(kp, data=ol_ranges_list$hsok, col = "#631E68",
r0=0.2, r1=0.3,
window.size = 25000)
#kpPlotDensity(kp, data=ol_ranges_list$hdrr, col = "#F3B61F",
# r0=0.45, r1=0.6,
# window.size = 25000)
# Add exon density to ideogram
kpPlotDensity(kp, data=ex_ranges, col = "#f77cb5",
data.panel = "ideogram",
window.size = 25000,
r0 = 0.5, r1 = 1)
kpPlotDensity(kp, data=ex_ranges, col = "#f77cb5",
data.panel = "ideogram",
window.size = 25000,
r0 = 0.5, r1 = 0)
# Add labels
kpAddLabels(kp, labels="MIKK",
r0=0, r1=0.05,
label.margin = 0.001,
cex = 0.4)
kpAddLabels(kp, labels="HNI",
r0=0.1, r1=0.15,
label.margin = 0.001,
cex = 0.4)
kpAddLabels(kp, labels="HSOK",
r0=0.2, r1=0.25,
label.margin = 0.001,
cex = 0.4)
#kpAddLabels(kp, labels="HdrR",
# r0=0.45, r1=0.6,
# cex = 0.4)
kpAddLabels(kp, labels=bquote(italic(f[d])),
r0=0.3, r1=1,
label.margin = 0.035,
cex = 0.6)
dev.off()
::include_graphics(file_out) knitr
= file.path(plots_dir, "20210318_fd_with_density_chr_4.png") out_file
png(file=out_file,
width=5500,
height=1186,
units = "px",
res = 400)
# Plot
= plotKaryotype(med_genome, chromosomes = "4", cex = 1.5)
kp # Add base numbers
::kpAddBaseNumbers(kp, tick.dist = 5000000, cex = 0.7)
karyoploteR# Add data backgrounds
::kpDataBackground(kp, r0=0, r1 = 1, color = "white")
karyoploteR# Add axis label
kpAxis(kp, r0=0.3, r1 = 1, cex = 0.8)
# Add fd data
= 2
lwd ::kpLines(kp,
karyoploteRchr = df_kp$scaffold[df_kp$p2 == "HNI"],
x = df_kp$mid[df_kp$p2 == "HNI"],
y = df_kp$mean_fd[df_kp$p2 == "HNI"],
col = "#F6673A",
r0=0.3, r1 = 1,
lwd = lwd)
::kpLines(kp,
karyoploteRchr = df_kp$scaffold[df_kp$p2 == "HdrR"],
x = df_kp$mid[df_kp$p2 == "HdrR"],
y = df_kp$mean_fd[df_kp$p2 == "HdrR"],
col = "#F3B61F",
r0=0.3, r1 = 1,
lwd = lwd)
::kpLines(kp,
karyoploteRchr = df_kp$scaffold[df_kp$p2 == "HSOK"],
x = df_kp$mid[df_kp$p2 == "HSOK"],
y = df_kp$mean_fd[df_kp$p2 == "HSOK"],
col = "#631E68",
r0=0.3, r1 = 1,
lwd = lwd)
# Add SNP density data
kpPlotDensity(kp, data=mikk_ranges, col = "#49A379",
r0=0, r1=0.1,
window.size = 25000)
kpPlotDensity(kp, data=ol_ranges_list$hni, col = "#F6673A",
r0=0.1, r1=0.2,
window.size = 25000)
kpPlotDensity(kp, data=ol_ranges_list$hsok, col = "#631E68",
r0=0.2, r1=0.3,
window.size = 25000)
#kpPlotDensity(kp, data=ol_ranges_list$hdrr, col = "#F3B61F",
# r0=0.45, r1=0.6,
# window.size = 25000)
# Add exon density to ideogram
kpPlotDensity(kp, data=ex_ranges, col = "#f77cb5",
data.panel = "ideogram",
window.size = 25000,
r0 = 0.5, r1 = 1)
kpPlotDensity(kp, data=ex_ranges, col = "#f77cb5",
data.panel = "ideogram",
window.size = 25000,
r0 = 0.5, r1 = 0)
# Add labels
kpAddLabels(kp, labels="MIKK",
r0=0, r1=0.05,
label.margin = 0.001,
cex = 0.5)
kpAddLabels(kp, labels="HNI",
r0=0.1, r1=0.15,
label.margin = 0.001,
cex = 0.5)
kpAddLabels(kp, labels="HSOK",
r0=0.2, r1=0.25,
label.margin = 0.001,
cex = 0.5)
#kpAddLabels(kp, labels="HdrR",
# r0=0.45, r1=0.6,
# cex = 0.4)
kpAddLabels(kp, labels=bquote(italic(f[d])),
r0=0.3, r1=1,
label.margin = 0.035,
cex = 1)
dev.off()
::include_graphics(out_file) knitr
Created with Vectr and saved here: plots/introgression/20210318_abba_diagram.svg
= here::here(plots_dir, "abba_diagram.png")
abba_diagram = here::here(plots_dir, "tree_oryzias_with_ancestor.png")
tree = here::here(plots_dir, "20210318_fd_with_density_chr_4.png")
karyo_chr4
= ggdraw() +
final_abba draw_image(tree,
x = 0, y = .7, width = .4, height = .35, vjust = .1, hjust = -.1, scale = 1.2) +
draw_image(karyo_chr4,
x = 0, y = 0, width = 1, height = 0.3, scale = 1.2) +
draw_plot(fstat_plot,
x = .4, y = .3, width = .6, height = .7) +
draw_image(abba_diagram,
x = 0, y = .3, width = .4, height = .35, vjust = -.05, scale = 1.1) +
draw_plot_label(label = c("A", "B", "C", "D"), size = 16,
x = c(0, 0, .38, 0), y = c(1, .7, 1, .3))
final_abba
ggsave(here::here(plots_dir, "20210318_final_figure.png"),
width = 35,
height = 21.875,
units = "cm",
dpi = 500)
= here::here("data/introgression/abba_sliding_final_with_icab/min-sites-250.txt") target_file
Sanity check with counts of sites:
::read_csv(target_file) %>%
readr# add sliding window length
::mutate(window_length_kb = (end - start + 1) / 1000) %>%
dplyr# filter for 500 kb windows
::filter(window_length_kb == 500) %>%
dplyr::count(p1, p2) dplyr
## Rows: 12862 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): p1, p2
## dbl (11): scaffold, start, end, mid, sites, sitesUsed, ABBA, BABA, D, fd, fdM
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## # A tibble: 6 × 3
## p1 p2 n
## <chr> <chr> <int>
## 1 hdrr hni 1395
## 2 hdrr hsok 1408
## 3 hdrr icab 1440
## 4 icab hdrr 1440
## 5 icab hni 1396
## 6 icab hsok 1408
As suggested by Simon Martin here: https://github.com/simonhmartin/genomics_general#abba-baba-statistics-in-sliding-windows > fd gives meaningless values (<0 or >1) if D is negative. If there is no excess of shared derived alleles between P2 and P3 (indicated by a positive D), then the excess cannot be quantified. fd values for windows with negative D should therefore either be discarded or converted to zero, depending on your hypothesis.
::read_csv(target_file) %>%
readr# add sliding window length
::mutate(window_length_kb = (end - start + 1) / 1000) %>%
dplyr# filter for 500 kb windows
::filter(window_length_kb == 500) %>%
dplyr# recode lines
::mutate(across(c("p1", "p2"), ~factor(.x, levels = c("icab", "hdrr", "hni", "hsok"))),
dplyracross(c("p1", "p2"), ~recode(.x, icab = "iCab", hdrr = "HdrR", hni = "HNI", hsok = "HSOK"))) %>%
::group_by(p1, p2) %>%
dplyr::summarise(n_pos_d = length(which(D > 0))) %>%
dplyrggplot() +
geom_col(aes(p2, n_pos_d, fill = p2)) +
facet_wrap(~p1) +
scale_fill_manual(values = pal_abba) +
xlab("P2") +
ylab("Number of windows (sites) with positive D") +
ggtitle("Choice of P1 (iCab or HdrR)") +
theme_cowplot() +
labs(fill = "P2")
ggsave(here::here("docs/plots/introgression/20210429_p1_hdrr_v_icab_counts_valid_sites.png"),
device = "png",
width=15.69,
height=10,
units = "cm",
dpi = 400)
::read_csv(target_file) %>%
readr# add sliding window length
::mutate(window_length_kb = (end - start + 1) / 1000) %>%
dplyr# filter for 500 kb windows
::filter(window_length_kb == 500) %>%
dplyr# recode lines
::mutate(across(c("p1", "p2"), ~factor(.x, levels = c("icab", "hdrr", "hni", "hsok"))),
dplyracross(c("p1", "p2"), ~recode(.x, icab = "iCab", hdrr = "HdrR", hni = "HNI", hsok = "HSOK"))) %>%
# remove all rows where D < 0
::filter(D > 0) %>%
dplyrggplot() +
geom_histogram(aes(fd, fill = p2), bins = 50) +
facet_wrap(vars(p1, p2)) +
scale_fill_manual(values = pal_abba) +
xlab("P2") +
# ylab("Number of windows (sites) with positive D") +
ggtitle(expression(italic(f[d]))) +
theme_cowplot()
ggsave(here::here("docs/plots/introgression/20210429_p1_hdrr_v_icab_fd_distribution.png"),
device = "png",
width=15.69,
height=10,
units = "cm",
dpi = 400)
::read_csv(target_file) %>%
readr# add sliding window length
::mutate(window_length_kb = (end - start + 1) / 1000) %>%
dplyr# filter for 500 kb windows
::filter(window_length_kb == 500) %>%
dplyr# recode lines
::mutate(across(c("p1", "p2"), ~factor(.x, levels = c("icab", "hdrr", "hni", "hsok"))),
dplyracross(c("p1", "p2"), ~recode(.x, icab = "iCab", hdrr = "HdrR", hni = "HNI", hsok = "HSOK"))) %>%
# remove all rows where D < 0
::filter(D > 0) %>%
dplyr::filter(p1 == "HdrR") %>%
dplyr::group_by(p2) %>%
dplyr::summarise(mean(fd)) dplyr
## # A tibble: 3 × 2
## p2 `mean(fd)`
## <fct> <dbl>
## 1 iCab 0.254
## 2 HNI 0.0938
## 3 HSOK 0.120
= readr::read_csv(target_file) %>%
mikk_abba_final # add sliding window length
::mutate(window_length_kb = (end - start + 1) / 1000) %>%
dplyr# filter for 500 kb windows
::filter(window_length_kb == 500) %>%
dplyr# recode `fd` as 0 if `D` is negative
::mutate(fd = ifelse(D < 0, 0, fd))
dplyr
# Is iCab or HdrR closer to MIKK?
%>%
mikk_abba_final ::filter(p2 %in% c("icab", "hdrr")) %>%
dplyr::group_by(p1, p2) %>%
dplyr::summarise(length(which(fd == 0))) dplyr
## # A tibble: 2 × 3
## # Groups: p1 [2]
## p1 p2 `length(which(fd == 0))`
## <chr> <chr> <int>
## 1 hdrr icab 598
## 2 icab hdrr 958
There are fewer 0s when HdrR is P1, which suggests that iCab is more closely related to the MIKK panel. But you want a P1 that is further away so that you’ll get more data points, which is why we’ll likely go with HdrR.
= mikk_abba_final %>%
mikk_abba_final # recode lines
::mutate(p2 = factor(p2, levels = c("hdrr", "icab", "hni", "hsok")),
dplyrp2 = recode(p2, hdrr = "HdrR", icab = "iCab", hni = "HNI", hsok = "HSOK")) %>%
::arrange(p2, scaffold, start) %>%
dplyr::select(scaffold, mid_1 = mid, mid_2 = mid, fd, p1, p2) %>%
dplyr::mutate(scaffold = paste("chr", scaffold, sep ="")) %>%
dplyrsplit(., f = .$p1) %>%
::map(., function(P1) split(P1, f = P1$p2)) %>%
purrr# Remove empty data frames for hdrr-hdrr and icab-icab population combinations
::map(., function(P1) P1[purrr::map_lgl(P1, function(P2) nrow(P2) != 0)]) purrr
= here::here("docs/plots/introgression/20210427_introgression_circos_MIKK_ABBA_p1-hdrr.png") out_plot
= mikk_abba_final[["hdrr"]]
target_list
png(out_plot,
width = 20,
height = 20,
units = "cm",
res = 500)
# Set parameters
## Decrease cell padding from default c(0.02, 1.00, 0.02, 1.00)
circos.par(cell.padding = c(0, 0, 0, 0),
track.margin = c(0, 0),
gap.degree = c(rep(1, nrow(chroms) - 1), 6))
# Initialize plot
circos.initializeWithIdeogram(chroms,
plotType = c("axis", "labels"),
major.by = 1e7,
axis.labels.cex = 0.25*par("cex"))
# Print label in center
text(0, 0, "MIKK panel\nintrogression with\niCab, HNI,\nand\nHSOK")
###############
# Introgression
###############
= 0
counter
::map(target_list, function(P2){
purrr# Set counter
<<- counter + 1
counter
circos.genomicTrack(P2,
panel.fun = function(region, value, ...){
circos.genomicLines(region,
1]],
value[[col = pal_abba[[names(target_list[counter])]],
area = T,
border = karyoploteR::darker(pal_abba[[names(target_list[counter])]],
amount = 80))
# Add baseline
circos.xaxis(h = "bottom",
labels = F,
major.tick = F)
},track.height = 0.1,
bg.border = NA,
ylim = c(0, 1))
# Add axis for introgression
circos.yaxis(side = "right",
at = c(.5, 1),
labels.cex = 0.25*par("cex"),
tick.length = 2
)
# Add y-axis label for introgression
if (counter == 2) {
circos.text(0, 0.5,
labels = expression(italic(f[d])),
sector.index = "chr1",
# facing = "clockwise",
adj = c(3, 0.5),
cex = 0.4*par("cex"))
}
# Add y-axis label for introgression
circos.text(0, 0.5,
labels = names(target_list)[counter],
sector.index = "chr1",
facing = "clockwise",
adj = c(.5, 0),
cex = 0.6*par("cex"))
})
circos.clear()
dev.off()
::include_graphics(out_plot) knitr
= here::here("docs/plots/introgression/20210427_introgression_circos_MIKK_ABBA_p1-icab.png") out_plot
= mikk_abba_final[["icab"]]
target_list
png(out_plot,
width = 20,
height = 20,
units = "cm",
res = 500)
# Set parameters
## Decrease cell padding from default c(0.02, 1.00, 0.02, 1.00)
circos.par(cell.padding = c(0, 0, 0, 0),
track.margin = c(0, 0),
gap.degree = c(rep(1, nrow(chroms) - 1), 6))
# Initialize plot
circos.initializeWithIdeogram(chroms,
plotType = c("axis", "labels"),
major.by = 1e7,
axis.labels.cex = 0.25*par("cex"))
# Print label in center
text(0, 0, "MIKK panel\nintrogression with\nHdrR, HNI,\nand\nHSOK")
###############
# Introgression
###############
= 0
counter
::map(target_list, function(P2){
purrr# Set counter
<<- counter + 1
counter
circos.genomicTrack(P2,
panel.fun = function(region, value, ...){
circos.genomicLines(region,
1]],
value[[col = pal_abba[[names(target_list[counter])]],
area = T,
border = karyoploteR::darker(pal_abba[[names(target_list[counter])]]))
# Add baseline
circos.xaxis(h = "bottom",
labels = F,
major.tick = F)
},track.height = 0.1,
bg.border = NA,
ylim = c(0, 1))
# Add axis for introgression
circos.yaxis(side = "right",
at = c(.5, 1),
labels.cex = 0.25*par("cex"),
tick.length = 2
)
# Add y-axis label for introgression
if (counter == 2) {
circos.text(0, 0.5,
labels = expression(italic(f[d])),
sector.index = "chr1",
# facing = "clockwise",
adj = c(3, 0.5),
cex = 0.4*par("cex"))
}
# Add y-axis label for introgression
circos.text(0, 0.5,
labels = names(target_list)[counter],
sector.index = "chr1",
facing = "clockwise",
adj = c(.5, 0),
cex = 0.6*par("cex"))
})
circos.clear()
dev.off()
::include_graphics(out_plot) knitr
= here::here("docs/plots/introgression/20220209_introgression_circos_MIKK_ABBA_p1-hdrr.png") out_plot
= mikk_abba_final[["hdrr"]]
target_list
# Reset palette
<- c("#F3B61F", "#F3B61F", "#631E68", "#F6673A", "#F33A56", "#55B6B0", "#621B00")
pal_abba names(pal_abba) <- c("iCab", "HdrR", "HSOK", "HNI", "melastigma", "javanicus", "Kaga")
png(out_plot,
width = 8.5,
height = 8.5,
units = "in",
res = 700)
# Set parameters
## Decrease cell padding from default c(0.02, 1.00, 0.02, 1.00)
circos.par(cell.padding = c(0, 0, 0, 0),
track.margin = c(0, 0),
gap.degree = c(rep(1, nrow(chroms) - 1), 9))
# Initialize plot
circos.initializeWithIdeogram(chroms,
plotType = c("axis", "labels"),
major.by = 1e7,
axis.labels.cex = 0.45*par("cex"))
# Print label in center
text(0, 0, "MIKK panel\nintrogression with\niCab, HNI,\nand\nHSOK")
###############
# Introgression
###############
= 0
counter
::map(target_list, function(P2){
purrr# Set counter
<<- counter + 1
counter
circos.genomicTrack(P2,
panel.fun = function(region, value, ...){
circos.genomicLines(region,
1]],
value[[col = pal_abba[[names(target_list[counter])]],
area = T,
border = karyoploteR::darker(pal_abba[[names(target_list[counter])]],
amount = 80))
# Add baseline
circos.xaxis(h = "bottom",
labels = F,
major.tick = F)
},track.height = 0.1,
bg.border = NA,
ylim = c(0, 1))
# Add axis for introgression
circos.yaxis(side = "right",
at = c(.5, 1),
labels.cex = 0.5*par("cex"),
tick.length = 2
)
# Add y-axis label for introgression
if (counter == 2) {
circos.text(0, 0.5,
labels = expression(italic(f[d])),
sector.index = "chr1",
# facing = "clockwise",
adj = c(2.5, 0.5),
cex = 0.8*par("cex"))
}
# Add y-axis label for introgression
circos.text(0, 0.5,
labels = names(target_list)[counter],
sector.index = "chr1",
facing = "clockwise",
adj = c(.5, 0),
cex = 0.7*par("cex"))
})
circos.clear()
dev.off()
::include_graphics(out_plot) knitr
= here::here(plots_dir, "20210505_abba_diagram.png")
abba_diagram = here::here(plots_dir, "tree_oryzias_latipes_with_ancestor.png")
tree = here::here(plots_dir, "20220209_introgression_circos_MIKK_ABBA_p1-hdrr.png")
circos_abba
= ggdraw() +
final_abba draw_image(tree,
x = 0, y = .5, width = .4, height = .55, vjust = 0, hjust = -.1, scale = 1.2) +
draw_image(circos_abba,
x = .4, y = 0, width = .6, height = 1, scale = 1.1, vjust = 0) +
draw_image(abba_diagram,
x = 0, y = 0, width = .4, height = .55, vjust = 0, hjust = -.125, scale = 1.2) +
draw_plot_label(label = c("A", "B", "C"), size = 25,
x = c(0, 0, .4), y = c(1, .6, 1))
final_abba
ggsave(here::here(plots_dir, "20220209_introgression_final_figure.pdf"),
device = "pdf",
width = 38,
height = 23.6,
units = "cm")
= here::here("data/introgression/abba_sliding_final_no_131-1", "500000_250.txt")
in_file # Read in data
= readr::read_csv(in_file) %>%
df ::arrange(p1, p2, scaffold, start)
dplyr
# Convert fd to 0 if D < 0
$fd = ifelse(df$D < 0,
df0,
$fd)
df
# Change names
= df %>%
df ::mutate(p2 = recode(df$p2, hdrr = "HdrR", hni = "HNI", hsok = "HSOK"))
dplyr
# Get df with mean of melastigma/javanicus
= df %>%
df_kp pivot_wider(id_cols = c(scaffold, start, end, mid, p2), names_from = p1, values_from = fd) %>%
# get mean of melastigma/javanicus
::mutate(mean_fd = rowMeans(dplyr::select(., melastigma, javanicus), na.rm = T)) %>%
dplyr::arrange(p2, scaffold, start)
dplyr
::kable(head(df_kp)) knitr
scaffold | start | end | mid | p2 | javanicus | melastigma | mean_fd |
---|---|---|---|---|---|---|---|
1 | 500001 | 1000000 | 770583 | HdrR | NA | 0.1788 | 0.17880 |
1 | 1000001 | 1500000 | 1227968 | HdrR | 0.4058 | 0.2320 | 0.31890 |
1 | 1500001 | 2000000 | 1764457 | HdrR | 0.3459 | 0.2768 | 0.31135 |
1 | 2000001 | 2500000 | 2205406 | HdrR | NA | 0.3024 | 0.30240 |
1 | 2500001 | 3000000 | 2776011 | HdrR | NA | 0.3551 | 0.35510 |
1 | 3000001 | 3500000 | 3243647 | HdrR | 0.3074 | 0.2239 | 0.26565 |
= file.path(plots_dir, "20210409_fd_with_density_chr_4_500kb-window.png") out_file
png(file=out_file,
width=5500,
height=1186,
units = "px",
res = 400)
# Plot
= plotKaryotype(med_genome, chromosomes = "4", cex = 1.5)
kp # Add base numbers
::kpAddBaseNumbers(kp, tick.dist = 5000000, cex = 0.7)
karyoploteR# Add data backgrounds
::kpDataBackground(kp, r0=0, r1 = 1, color = "white")
karyoploteR# Add axis label
kpAxis(kp, r0=0.3, r1 = 1, cex = 0.8)
# Add fd data
= 2
lwd ::kpLines(kp,
karyoploteRchr = df_kp$scaffold[df_kp$p2 == "HNI"],
x = df_kp$mid[df_kp$p2 == "HNI"],
y = df_kp$mean_fd[df_kp$p2 == "HNI"],
col = "#F6673A",
r0=0.3, r1 = 1,
lwd = lwd)
::kpLines(kp,
karyoploteRchr = df_kp$scaffold[df_kp$p2 == "HdrR"],
x = df_kp$mid[df_kp$p2 == "HdrR"],
y = df_kp$mean_fd[df_kp$p2 == "HdrR"],
col = "#F3B61F",
r0=0.3, r1 = 1,
lwd = lwd)
::kpLines(kp,
karyoploteRchr = df_kp$scaffold[df_kp$p2 == "HSOK"],
x = df_kp$mid[df_kp$p2 == "HSOK"],
y = df_kp$mean_fd[df_kp$p2 == "HSOK"],
col = "#631E68",
r0=0.3, r1 = 1,
lwd = lwd)
# Add SNP density data
kpPlotDensity(kp, data=mikk_ranges, col = "#49A379",
r0=0, r1=0.1,
window.size = 25000)
kpPlotDensity(kp, data=ol_ranges_list$hni, col = "#F6673A",
r0=0.1, r1=0.2,
window.size = 25000)
kpPlotDensity(kp, data=ol_ranges_list$hsok, col = "#631E68",
r0=0.2, r1=0.3,
window.size = 25000)
#kpPlotDensity(kp, data=ol_ranges_list$hdrr, col = "#F3B61F",
# r0=0.45, r1=0.6,
# window.size = 25000)
# Add exon density to ideogram
kpPlotDensity(kp, data=ex_ranges, col = "#f77cb5",
data.panel = "ideogram",
window.size = 25000,
r0 = 0.5, r1 = 1)
kpPlotDensity(kp, data=ex_ranges, col = "#f77cb5",
data.panel = "ideogram",
window.size = 25000,
r0 = 0.5, r1 = 0)
# Add labels
kpAddLabels(kp, labels="MIKK",
r0=0, r1=0.05,
label.margin = 0.001,
cex = 0.5)
kpAddLabels(kp, labels="HNI",
r0=0.1, r1=0.15,
label.margin = 0.001,
cex = 0.5)
kpAddLabels(kp, labels="HSOK",
r0=0.2, r1=0.25,
label.margin = 0.001,
cex = 0.5)
kpAddLabels(kp, labels=bquote(italic(f[d])),
r0=0.3, r1=1,
label.margin = 0.035,
cex = 1)
dev.off()
::include_graphics(out_file) knitr
##### chr2
= file.path(plots_dir, "20210409_fd_with_density_chr_2_500kb-window.png") out_file
png(file=out_file,
width=5500,
height=1186,
units = "px",
res = 400)
# Plot
= plotKaryotype(med_genome, chromosomes = "2", cex = 1.5)
kp # Add base numbers
::kpAddBaseNumbers(kp, tick.dist = 5000000, cex = 0.7)
karyoploteR# Add data backgrounds
::kpDataBackground(kp, r0=0, r1 = 1, color = "white")
karyoploteR# Add axis label
kpAxis(kp, r0=0.3, r1 = 1, cex = 0.8)
# Add fd data
= 2
lwd ::kpLines(kp,
karyoploteRchr = df_kp$scaffold[df_kp$p2 == "HNI"],
x = df_kp$mid[df_kp$p2 == "HNI"],
y = df_kp$mean_fd[df_kp$p2 == "HNI"],
col = "#F6673A",
r0=0.3, r1 = 1,
lwd = lwd)
::kpLines(kp,
karyoploteRchr = df_kp$scaffold[df_kp$p2 == "HdrR"],
x = df_kp$mid[df_kp$p2 == "HdrR"],
y = df_kp$mean_fd[df_kp$p2 == "HdrR"],
col = "#F3B61F",
r0=0.3, r1 = 1,
lwd = lwd)
::kpLines(kp,
karyoploteRchr = df_kp$scaffold[df_kp$p2 == "HSOK"],
x = df_kp$mid[df_kp$p2 == "HSOK"],
y = df_kp$mean_fd[df_kp$p2 == "HSOK"],
col = "#631E68",
r0=0.3, r1 = 1,
lwd = lwd)
# Add SNP density data
kpPlotDensity(kp, data=mikk_ranges, col = "#49A379",
r0=0, r1=0.1,
window.size = 25000)
kpPlotDensity(kp, data=ol_ranges_list$hni, col = "#F6673A",
r0=0.1, r1=0.2,
window.size = 25000)
kpPlotDensity(kp, data=ol_ranges_list$hsok, col = "#631E68",
r0=0.2, r1=0.3,
window.size = 25000)
#kpPlotDensity(kp, data=ol_ranges_list$hdrr, col = "#F3B61F",
# r0=0.45, r1=0.6,
# window.size = 25000)
# Add exon density to ideogram
kpPlotDensity(kp, data=ex_ranges, col = "#f77cb5",
data.panel = "ideogram",
window.size = 25000,
r0 = 0.5, r1 = 1)
kpPlotDensity(kp, data=ex_ranges, col = "#f77cb5",
data.panel = "ideogram",
window.size = 25000,
r0 = 0.5, r1 = 0)
# Add labels
kpAddLabels(kp, labels="MIKK",
r0=0, r1=0.05,
label.margin = 0.001,
cex = 0.5)
kpAddLabels(kp, labels="HNI",
r0=0.1, r1=0.15,
label.margin = 0.001,
cex = 0.5)
kpAddLabels(kp, labels="HSOK",
r0=0.2, r1=0.25,
label.margin = 0.001,
cex = 0.5)
kpAddLabels(kp, labels=bquote(italic(f[d])),
r0=0.3, r1=1,
label.margin = 0.035,
cex = 1)
dev.off()
::include_graphics(out_file) knitr
Use chr4.
= here::here(plots_dir, "abba_diagram.png")
abba_diagram = here::here(plots_dir, "tree_oryzias_with_ancestor.png")
tree = here::here(plots_dir, "20210409_fd_with_density_chr_4_500kb-window.png")
karyo_chr4 = here::here(plots_dir, "20210409_introgression_circos_MIKK_ABBA.png")
circos_abba
= ggdraw() +
final_abba draw_image(tree,
x = 0, y = .7, width = .4, height = .35, vjust = .1, hjust = -.1, scale = 1.2) +
draw_image(karyo_chr4,
x = 0, y = 0, width = 1, height = 0.3, scale = 1.2) +
draw_image(circos_abba,
x = .4, y = .3, width = .6, height = .7, scale = 1.15, vjust = .025) +
draw_image(abba_diagram,
x = 0, y = .3, width = .4, height = .35, vjust = -.05, scale = 1.1) +
draw_plot_label(label = c("A", "B", "C", "D"), size = 25,
x = c(0, 0, .45, 0), y = c(1, .7, 1, .3), color = "#4f0943")
final_abba
ggsave(here::here(plots_dir, "20210409_introgression_final_figure.png"),
final_abba,width = 35,
height = 21.875,
units = "cm",
dpi = 500)