library(here)
source(here::here("code", "scripts", "fecundity", "source.R"))
Data saved here (copied from Felix’s table in the paper): data/fecundity/20210212_inbreeding.csv
= here::here("data", "fecundity", "20210212_inbreeding.csv")
raw_dat = readr::read_csv(raw_dat) inbreed_df
## Rows: 12 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): MIKK, Inbreeding (Generation), N of strains lost
## dbl (5): not productive, infertile male, no males, no females, all cause death
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
= 253
total_crosses
= inbreed_df %>%
inbreed_df_tidy # Remove final row, which makes `N of strains lost` character
::slice(-c(11,12)) %>%
dplyr# Change column names
::select(GENERATION = `Inbreeding (Generation)`,
dplyrN_LOST_TOTAL = `N of strains lost`,
everything()) %>%
# Make N_LOST integer
::mutate(N_LOST_TOTAL = as.integer(N_LOST_TOTAL)) %>%
dplyr# Remove MIKK column
::select(-MIKK) %>%
dplyr# Get cumulative lost and N_SURVIVING
::mutate(N_LOST_CUM = cumsum(N_LOST_TOTAL),
dplyrN_SURVIVING = total_crosses - N_LOST_CUM) %>%
# Replace
::mutate(dplyr::across(c(`not productive`,
dplyr`infertile male`,
`no males`,
`no females`,
`all cause death`),
~tidyr::replace_na(.x, 0))) %>%
# Get cumulative lost for each category
::mutate(dplyr::across(c(`not productive`,
dplyr`infertile male`,
`no males`,
`no females`,
`all cause death`),
~cumsum(.x))) %>%
# Amend GENERATION column
::mutate(GENERATION = stringr::str_extract(GENERATION, "(?:F\\d{1,2})")) %>%
dplyr# Add first row
rbind(., c("F1", rep(0,7), total_crosses)) %>%
# Add rows for F12-14
rbind(., dplyr::filter(., GENERATION == "F11") %>%
::slice(rep(1:n(), each = 3)) %>%
dplyr::mutate(GENERATION = c("F12", "F13", "F14"))) %>%
dplyr# Factorise GENERATION
::mutate(GENERATION = factor(GENERATION, levels = paste("F", seq(1:14), sep = ""))) %>%
dplyr# Order
order(.$GENERATION), ] %>%
.[# Gather
::pivot_longer(cols = c(`not productive`,
tidyr`infertile male`,
`no males`,
`no females`,
`all cause death`,
N_SURVIVING),names_to = "STATUS",
values_to = "N") %>%
# Convert `N` to integer
::mutate(N = as.integer(N))
dplyr
::kable(head(inbreed_df_tidy)) knitr
GENERATION | N_LOST_TOTAL | N_LOST_CUM | STATUS | N |
---|---|---|---|---|
F1 | 0 | 0 | not productive | 0 |
F1 | 0 | 0 | infertile male | 0 |
F1 | 0 | 0 | no males | 0 |
F1 | 0 | 0 | no females | 0 |
F1 | 0 | 0 | all cause death | 0 |
F1 | 0 | 0 | N_SURVIVING | 253 |
= c("Infertile male",
recode_vec "No males",
"No females",
"Not productive",
"All-cause death",
"Reproducing")
names(recode_vec) = c("infertile male",
"no males",
"no females",
"not productive",
"all cause death",
"N_SURVIVING")
= inbreed_df_tidy %>%
test # geom_area() seems to need `x` to be numeric
::mutate(GENERATION = as.integer(gsub("F", "", GENERATION))) %>%
dplyr# recode STATUS values and order
::mutate(STATUS = dplyr::recode(STATUS, !!!recode_vec),
dplyrSTATUS = factor(STATUS, levels = recode_vec))# %>%
= ggplot(test) +
out2 geom_area(aes(x=GENERATION,
y=N,
fill = STATUS)) +
# geom_text(aes(x=GENERATION, y=N, label = STATUS)) +
scale_fill_viridis_d(option = "magma") +
scale_x_continuous(breaks = unique(test$GENERATION),
labels = paste("F", unique(test$GENERATION), sep = "")) +
theme_cowplot(font_size = 10) +
xlab("Generation") +
ylab("Number of MIKK panel lines") +
labs(fill = "Status") +
theme(panel.grid = element_blank())
# theme(legend.position=c(0.2, 0.3),
# legend.box.background = element_blank())
out2
= c("#FC4E07", "#FFBF00", "#0BC166", "#00AFBB", "#360568", "#D84797")
reviewer_pal
= ggplot(test) +
out3 geom_area(aes(x=GENERATION,
y=N,
fill = STATUS)) +
scale_fill_manual(values = reviewer_pal) +
scale_x_continuous(breaks = unique(test$GENERATION),
labels = paste("F", unique(test$GENERATION), sep = "")) +
theme_cowplot(font_size = 16) +
xlab("Generation") +
ylab("Number of MIKK panel lines") +
labs(fill = "Status") +
theme(panel.grid = element_blank())
out3
= here::here("data", "fecundity", "20210205_semiquantitative.xlsx")
in_file = readxl::read_xlsx(in_file, range = "A1:C81")
df_semi = here::here("data", "fecundity", "20210208_semiquant.csv")
out_file
# Get strain levels
= unique(df_semi$Pair)
strain_levels
# Create recode vector
= c("Feb 2019", "Jul 2020")
date_recode names(date_recode) = c("2/19", "7/20")
= c(0, 1, 2, 3, 4, 5)
recode_vec_1 names(recode_vec_1) = c(0, "o", "x", "x/", "xx", "xxx")
= c("Not producing",
recode_vec_2 "<3 eggs; not every day",
"<5 eggs; not every day",
"1-3 eggs per day",
"1-5 eggs per day",
"5-10 eggs per day")
names(recode_vec_2) = c(0, 1, 2, 3, 4, 5)
= gsub("; ", ";\n", recode_vec_2)
recode_vec_3 = gsub("eggs per", "eggs\nper", recode_vec_3)
recode_vec_3
# Tidy
= df_semi %>%
semi_out # pivot fecundity
::pivot_longer(cols = contains("fecundity"),
tidyrnames_to = "DATE",
names_prefix = "fecundity ",
values_to = "FECUNDITY") %>%
# recode fecundity measures
::mutate(DATE = dplyr::recode(DATE, !!!date_recode),
dplyrFECUNDITY = dplyr::recode(FECUNDITY, "xx/" = "xx"),
FECUNDITY = dplyr::na_if(FECUNDITY, "do not prod. Yet"),
FECUNDITY = ifelse(is.na(FECUNDITY), 0, FECUNDITY),
FECUNDITY = dplyr::recode(FECUNDITY, !!!factor(recode_vec_1)),
KEY = dplyr::recode(FECUNDITY, !!!recode_vec_2)) %>%
# rename STRAIN
::rename(STRAIN = Pair) %>%
dplyr# factorise
::mutate(STRAIN = factor(STRAIN, levels = strain_levels)) %>%
dplyr# write to file
::write_csv(out_file, na = "")
readr
::kable(head(semi_out)) knitr
STRAIN | DATE | FECUNDITY | KEY |
---|---|---|---|
4-1 | Jul 2020 | 2 | <5 eggs; not every day |
4-1 | Feb 2019 | 2 | <5 eggs; not every day |
4-2 | Jul 2020 | 2 | <5 eggs; not every day |
4-2 | Feb 2019 | 4 | 1-5 eggs per day |
5-1 | Jul 2020 | 2 | <5 eggs; not every day |
5-1 | Feb 2019 | 4 | 1-5 eggs per day |
# Process data
= semi_out %>%
final_hor_df ::mutate(KEY = gsub("; ", ";\n", KEY),
dplyrKEY = gsub("eggs per", "eggs\nper", KEY),
KEY = factor(KEY, levels = recode_vec_3)) %>%
::filter(DATE == "Jul 2020") %>%
dplyr::mutate(STRAIN = factor(STRAIN, levels=unique(STRAIN[order(KEY,decreasing = T)]))) %>%
dplyr# remove "Not producing"
::filter(KEY != "Not producing")
dplyr# order by category
# Generate histogram
= final_hor_df %>%
fec_count ggplot() +
geom_histogram(aes(KEY, fill = KEY),
stat = "count") +
scale_fill_viridis_d() +
theme_cowplot(font_size = 16) +
xlab("Fecundity") +
ylab("Number of MIKK panel lines") +
guides(fill = F) +
theme(axis.text = element_text(size = 14))
fec_count
= here::here("data/fecundity/eye_rel_measurements_line_ids.txt")
eye_dat_file
= readr::read_delim(eye_dat_file,
eye_df delim = "\t",
col_types = c("cdc"))
%>%
eye_df ::datatable() DT
# How many of each line and sex?
%>%
eye_df ::count(line, gender) dplyr
## # A tibble: 152 × 3
## line gender n
## <chr> <chr> <int>
## 1 10-1 Female 2
## 2 10-1 Male 3
## 3 104-1 Female 2
## 4 104-1 Male 1
## 5 106-1 Female 2
## 6 106-1 Male 1
## 7 106-2 Female 2
## 8 106-2 Male 1
## 9 11-1 Female 2
## 10 11-1 Male 1
## # … with 142 more rows
# Exclude males and get means
= eye_df %>%
eye_df_filt #dplyr::filter(gender == "Female") %>%
::group_by(line, gender) %>%
dplyr::summarise(MEAN_EYE = mean(eye_rel)) %>%
dplyr::arrange(MEAN_EYE) dplyr
## `summarise()` has grouped output by 'line'. You can override using the `.groups` argument.
%>%
eye_df_filt ggplot() +
geom_histogram(aes(MEAN_EYE), bins = 40) +
facet_wrap(vars(gender)) +
theme_cowplot() +
guides(fill = "none", colour = "none")
= c("#0D1B2A", "#1B263B")
gender_pal_1 = c("#FA9702", "#922294")
gender_pal_2
= eye_df_filt %>%
eye_dens ggplot() +
geom_density(aes(MEAN_EYE, fill = gender, colour = gender)) +
scale_fill_manual(values = gender_pal_2) +
scale_colour_manual(values = darker(gender_pal_2, 70)) +
facet_wrap(vars(gender)) +
theme_cowplot(font_size = 16) +
guides(fill = "none", colour = "none") +
xlab("Mean relative eye size") +
ylab("Density")
eye_dens
# order
= eye_df_filt %>%
eye_df_filt_f ::filter(gender == "Female") %>%
dplyr::arrange(MEAN_EYE)
dplyr
= eye_df_filt_f$line
line_order
%>%
eye_df_filt_f ::mutate(line = factor(line, levels = line_order)) %>%
dplyrggplot() +
geom_col(aes(line, MEAN_EYE, fill = line)) +
scale_fill_viridis_d(option = "magma") +
theme_cowplot() +
guides(fill = "none") +
theme(axis.text.x = element_text(size = 5, angle = 45, hjust = 1))
= ggdraw() +
final draw_plot(out3, x = 0, y = 0.45, width = .55, height = 0.55) +
draw_plot(fec_count, x = .55, y = 0.45, width = .45, height = 0.55) +
draw_plot(eye_dens, x = 0, y = 0, width = 1, height = 0.45) +
draw_plot_label(label = c("A", "B", "C"), size = 24,
x = c(0, 0.55, 0), y = c(1, 1, 0.45))
final
# SVG
ggsave(here::here("docs/plots/fecundity/20220209_final_figure.pdf"),
final,device = "pdf",
units = "in",
height = 12,
width = 16)
# PNG
ggsave(here::here("docs/plots/fecundity/20220209_final_figure.png"),
final,device = "png",
dpi = 400,
units = "in",
height = 12,
width = 16)