library(here)
source(here::here("code", "scripts", "fecundity", "source.R"))Data saved here (copied from Felix’s table in the paper): data/fecundity/20210212_inbreeding.csv
raw_dat = here::here("data", "fecundity", "20210212_inbreeding.csv")
inbreed_df = readr::read_csv(raw_dat)## Rows: 12 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): MIKK, Inbreeding (Generation), N of strains lost
## dbl (5): not productive, infertile male, no males, no females, all cause death
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
total_crosses = 253
inbreed_df_tidy = inbreed_df %>%
# Remove final row, which makes `N of strains lost` character
dplyr::slice(-c(11,12)) %>%
# Change column names
dplyr::select(GENERATION = `Inbreeding (Generation)`,
N_LOST_TOTAL = `N of strains lost`,
everything()) %>%
# Make N_LOST integer
dplyr::mutate(N_LOST_TOTAL = as.integer(N_LOST_TOTAL)) %>%
# Remove MIKK column
dplyr::select(-MIKK) %>%
# Get cumulative lost and N_SURVIVING
dplyr::mutate(N_LOST_CUM = cumsum(N_LOST_TOTAL),
N_SURVIVING = total_crosses - N_LOST_CUM) %>%
# Replace
dplyr::mutate(dplyr::across(c(`not productive`,
`infertile male`,
`no males`,
`no females`,
`all cause death`),
~tidyr::replace_na(.x, 0))) %>%
# Get cumulative lost for each category
dplyr::mutate(dplyr::across(c(`not productive`,
`infertile male`,
`no males`,
`no females`,
`all cause death`),
~cumsum(.x))) %>%
# Amend GENERATION column
dplyr::mutate(GENERATION = stringr::str_extract(GENERATION, "(?:F\\d{1,2})")) %>%
# Add first row
rbind(., c("F1", rep(0,7), total_crosses)) %>%
# Add rows for F12-14
rbind(., dplyr::filter(., GENERATION == "F11") %>%
dplyr::slice(rep(1:n(), each = 3)) %>%
dplyr::mutate(GENERATION = c("F12", "F13", "F14"))) %>%
# Factorise GENERATION
dplyr::mutate(GENERATION = factor(GENERATION, levels = paste("F", seq(1:14), sep = ""))) %>%
# Order
.[order(.$GENERATION), ] %>%
# Gather
tidyr::pivot_longer(cols = c(`not productive`,
`infertile male`,
`no males`,
`no females`,
`all cause death`,
N_SURVIVING),
names_to = "STATUS",
values_to = "N") %>%
# Convert `N` to integer
dplyr::mutate(N = as.integer(N))
knitr::kable(head(inbreed_df_tidy))| GENERATION | N_LOST_TOTAL | N_LOST_CUM | STATUS | N |
|---|---|---|---|---|
| F1 | 0 | 0 | not productive | 0 |
| F1 | 0 | 0 | infertile male | 0 |
| F1 | 0 | 0 | no males | 0 |
| F1 | 0 | 0 | no females | 0 |
| F1 | 0 | 0 | all cause death | 0 |
| F1 | 0 | 0 | N_SURVIVING | 253 |
recode_vec = c("Infertile male",
"No males",
"No females",
"Not productive",
"All-cause death",
"Reproducing")
names(recode_vec) = c("infertile male",
"no males",
"no females",
"not productive",
"all cause death",
"N_SURVIVING")
test = inbreed_df_tidy %>%
# geom_area() seems to need `x` to be numeric
dplyr::mutate(GENERATION = as.integer(gsub("F", "", GENERATION))) %>%
# recode STATUS values and order
dplyr::mutate(STATUS = dplyr::recode(STATUS, !!!recode_vec),
STATUS = factor(STATUS, levels = recode_vec))# %>%
out2 = ggplot(test) +
geom_area(aes(x=GENERATION,
y=N,
fill = STATUS)) +
# geom_text(aes(x=GENERATION, y=N, label = STATUS)) +
scale_fill_viridis_d(option = "magma") +
scale_x_continuous(breaks = unique(test$GENERATION),
labels = paste("F", unique(test$GENERATION), sep = "")) +
theme_cowplot(font_size = 10) +
xlab("Generation") +
ylab("Number of MIKK panel lines") +
labs(fill = "Status") +
theme(panel.grid = element_blank())
# theme(legend.position=c(0.2, 0.3),
# legend.box.background = element_blank())
out2reviewer_pal = c("#FC4E07", "#FFBF00", "#0BC166", "#00AFBB", "#360568", "#D84797")
out3 = ggplot(test) +
geom_area(aes(x=GENERATION,
y=N,
fill = STATUS)) +
scale_fill_manual(values = reviewer_pal) +
scale_x_continuous(breaks = unique(test$GENERATION),
labels = paste("F", unique(test$GENERATION), sep = "")) +
theme_cowplot(font_size = 16) +
xlab("Generation") +
ylab("Number of MIKK panel lines") +
labs(fill = "Status") +
theme(panel.grid = element_blank())
out3in_file = here::here("data", "fecundity", "20210205_semiquantitative.xlsx")
df_semi = readxl::read_xlsx(in_file, range = "A1:C81")
out_file = here::here("data", "fecundity", "20210208_semiquant.csv")
# Get strain levels
strain_levels = unique(df_semi$Pair)
# Create recode vector
date_recode = c("Feb 2019", "Jul 2020")
names(date_recode) = c("2/19", "7/20")
recode_vec_1 = c(0, 1, 2, 3, 4, 5)
names(recode_vec_1) = c(0, "o", "x", "x/", "xx", "xxx")
recode_vec_2 = c("Not producing",
"<3 eggs; not every day",
"<5 eggs; not every day",
"1-3 eggs per day",
"1-5 eggs per day",
"5-10 eggs per day")
names(recode_vec_2) = c(0, 1, 2, 3, 4, 5)
recode_vec_3 = gsub("; ", ";\n", recode_vec_2)
recode_vec_3 = gsub("eggs per", "eggs\nper", recode_vec_3)
# Tidy
semi_out = df_semi %>%
# pivot fecundity
tidyr::pivot_longer(cols = contains("fecundity"),
names_to = "DATE",
names_prefix = "fecundity ",
values_to = "FECUNDITY") %>%
# recode fecundity measures
dplyr::mutate(DATE = dplyr::recode(DATE, !!!date_recode),
FECUNDITY = dplyr::recode(FECUNDITY, "xx/" = "xx"),
FECUNDITY = dplyr::na_if(FECUNDITY, "do not prod. Yet"),
FECUNDITY = ifelse(is.na(FECUNDITY), 0, FECUNDITY),
FECUNDITY = dplyr::recode(FECUNDITY, !!!factor(recode_vec_1)),
KEY = dplyr::recode(FECUNDITY, !!!recode_vec_2)) %>%
# rename STRAIN
dplyr::rename(STRAIN = Pair) %>%
# factorise
dplyr::mutate(STRAIN = factor(STRAIN, levels = strain_levels)) %>%
# write to file
readr::write_csv(out_file, na = "")
knitr::kable(head(semi_out))| STRAIN | DATE | FECUNDITY | KEY |
|---|---|---|---|
| 4-1 | Jul 2020 | 2 | <5 eggs; not every day |
| 4-1 | Feb 2019 | 2 | <5 eggs; not every day |
| 4-2 | Jul 2020 | 2 | <5 eggs; not every day |
| 4-2 | Feb 2019 | 4 | 1-5 eggs per day |
| 5-1 | Jul 2020 | 2 | <5 eggs; not every day |
| 5-1 | Feb 2019 | 4 | 1-5 eggs per day |
# Process data
final_hor_df = semi_out %>%
dplyr::mutate(KEY = gsub("; ", ";\n", KEY),
KEY = gsub("eggs per", "eggs\nper", KEY),
KEY = factor(KEY, levels = recode_vec_3)) %>%
dplyr::filter(DATE == "Jul 2020") %>%
dplyr::mutate(STRAIN = factor(STRAIN, levels=unique(STRAIN[order(KEY,decreasing = T)]))) %>%
# remove "Not producing"
dplyr::filter(KEY != "Not producing")
# order by category
# Generate histogram
fec_count = final_hor_df %>%
ggplot() +
geom_histogram(aes(KEY, fill = KEY),
stat = "count") +
scale_fill_viridis_d() +
theme_cowplot(font_size = 16) +
xlab("Fecundity") +
ylab("Number of MIKK panel lines") +
guides(fill = F) +
theme(axis.text = element_text(size = 14))
fec_counteye_dat_file = here::here("data/fecundity/eye_rel_measurements_line_ids.txt")
eye_df = readr::read_delim(eye_dat_file,
delim = "\t",
col_types = c("cdc"))
eye_df %>%
DT::datatable()# How many of each line and sex?
eye_df %>%
dplyr::count(line, gender)## # A tibble: 152 × 3
## line gender n
## <chr> <chr> <int>
## 1 10-1 Female 2
## 2 10-1 Male 3
## 3 104-1 Female 2
## 4 104-1 Male 1
## 5 106-1 Female 2
## 6 106-1 Male 1
## 7 106-2 Female 2
## 8 106-2 Male 1
## 9 11-1 Female 2
## 10 11-1 Male 1
## # … with 142 more rows
# Exclude males and get means
eye_df_filt = eye_df %>%
#dplyr::filter(gender == "Female") %>%
dplyr::group_by(line, gender) %>%
dplyr::summarise(MEAN_EYE = mean(eye_rel)) %>%
dplyr::arrange(MEAN_EYE)## `summarise()` has grouped output by 'line'. You can override using the `.groups` argument.
eye_df_filt %>%
ggplot() +
geom_histogram(aes(MEAN_EYE), bins = 40) +
facet_wrap(vars(gender)) +
theme_cowplot() +
guides(fill = "none", colour = "none")gender_pal_1 = c("#0D1B2A", "#1B263B")
gender_pal_2 = c("#FA9702", "#922294")
eye_dens = eye_df_filt %>%
ggplot() +
geom_density(aes(MEAN_EYE, fill = gender, colour = gender)) +
scale_fill_manual(values = gender_pal_2) +
scale_colour_manual(values = darker(gender_pal_2, 70)) +
facet_wrap(vars(gender)) +
theme_cowplot(font_size = 16) +
guides(fill = "none", colour = "none") +
xlab("Mean relative eye size") +
ylab("Density")
eye_dens# order
eye_df_filt_f = eye_df_filt %>%
dplyr::filter(gender == "Female") %>%
dplyr::arrange(MEAN_EYE)
line_order = eye_df_filt_f$line
eye_df_filt_f %>%
dplyr::mutate(line = factor(line, levels = line_order)) %>%
ggplot() +
geom_col(aes(line, MEAN_EYE, fill = line)) +
scale_fill_viridis_d(option = "magma") +
theme_cowplot() +
guides(fill = "none") +
theme(axis.text.x = element_text(size = 5, angle = 45, hjust = 1))final = ggdraw() +
draw_plot(out3, x = 0, y = 0.45, width = .55, height = 0.55) +
draw_plot(fec_count, x = .55, y = 0.45, width = .45, height = 0.55) +
draw_plot(eye_dens, x = 0, y = 0, width = 1, height = 0.45) +
draw_plot_label(label = c("A", "B", "C"), size = 24,
x = c(0, 0.55, 0), y = c(1, 1, 0.45))
final# SVG
ggsave(here::here("docs/plots/fecundity/20220209_final_figure.pdf"),
final,
device = "pdf",
units = "in",
height = 12,
width = 16)
# PNG
ggsave(here::here("docs/plots/fecundity/20220209_final_figure.png"),
final,
device = "png",
dpi = 400,
units = "in",
height = 12,
width = 16)