This lab journal replicates the analyses for ‘ceasing to publish’.
package.check
: Check if packages are installed (and
install if not) in R (source).fpackage.check <- function(packages) {
lapply(packages, FUN = function(x) {
if (!require(x, character.only = TRUE)) {
install.packages(x, dependencies = TRUE)
library(x, character.only = TRUE)
}
})
}
fsave <- function(x, file, location="./data/processed/") {
datename <- substr(gsub("[:-]", "", Sys.time()), 1,8)
totalname <- paste(location, datename, file, sep="")
save(x, file = totalname)
}
survival
: general package to create life tables and
survival models, needed for flexsurvflexsurv
: fitting flexible survival regression
modelsboot
: bootstrapping for SEs of AMEstidyverse
: for data manipulationggplot2
: for creating figures 5-7ggpubr
: for combining two figures in one (plot 5)kableExtra
: formatting tablespackages = c("survival", "flexsurv", "boot", "tidyverse", "ggplot2", "ggpubr", "kableExtra")
fpackage.check(packages)
## Loading required package: survival
## Loading required package: flexsurv
## Loading required package: boot
##
## Attaching package: 'boot'
## The following object is masked from 'package:survival':
##
## aml
## Loading required package: kableExtra
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
We use two processed datasets.
df_ppf3
df_ppf5
load(file = "./data/processed/df_stopping.rda")
load(file = "data/processed/df_stopping5.rda")
Kaplan-Meier curve
KM1 <- survfit(Surv(time-1, time, inactive) ~ 1, conf.lower='usual', data = df_ppf3)
KM2 <- survfit(Surv(time-1, time, inactive) ~ gender, conf.lower='usual', data = df_ppf3)
KM3 <- survfit(Surv(time-1, time, inactive) ~ ethnicity2, conf.lower='usual', data = df_ppf3)
plot(KM1)
plot(KM2)
plot(KM3)
#MC1 <- flexsurvreg(Surv((time -1), time, inactive) ~ 1, data = df_ppf3, dist = "gengamma") # commented: no convergence
#MC2 <- flexsurvreg(Surv((time -1), time, inactive) ~ 1, data = df_ppf3, dist = "genf") # commented: no convergence
MC3 <- flexsurvreg(Surv((time -1), time, inactive) ~ 1, data = df_ppf3, dist = "weibull")
MC4 <- flexsurvreg(Surv((time -1), time, inactive) ~ 1, data = df_ppf3, dist = "gamma")
MC5 <- flexsurvreg(Surv((time -1), time, inactive) ~ 1, data = df_ppf3, dist = "exp")
MC6 <- flexsurvreg(Surv((time -1), time, inactive) ~ 1, data = df_ppf3, dist = "llogis")
MC7 <- flexsurvreg(Surv((time -1), time, inactive) ~ 1, data = df_ppf3, dist = "gompertz")
MC8 <- flexsurvreg(Surv((time -1), time, inactive) ~ 1, data = df_ppf3, dist = "lognormal")
testdist <- AIC(MC3, MC4, MC5, MC6, MC7, MC8)
testdist[order(testdist$AIC),]
M1 <- flexsurvreg(Surv((time -1), time, inactive) ~ gender , data = df_ppf3, dist = "lognormal")
M1
## Call:
## flexsurvreg(formula = Surv((time - 1), time, inactive) ~ gender,
## data = df_ppf3, dist = "lognormal")
##
## Estimates:
## data mean est L95% U95% se exp(est) L95% U95%
## meanlog NA 2.19181 2.16380 2.21983 0.01429 NA NA NA
## sdlog NA 1.06590 1.05003 1.08202 0.00816 NA NA NA
## genderwomen 0.32431 -0.19212 -0.23351 -0.15073 0.02112 0.82521 0.79175 0.86008
## gendermissing 0.15973 -0.62256 -0.66863 -0.57649 0.02351 0.53657 0.51241 0.56187
##
## N = 118372, Events: 9951, Censored: 108421
## Total time at risk: 118372
## Log-likelihood = -32564.76, df = 4
## AIC = 65137.52
P-values
## meanlog sdlog genderwomen gendermissing
## 0 0 0 0
M2 <- flexsurvreg(Surv((time -1), time, inactive) ~ ethnicity2 , data = df_ppf3, dist = "lognormal")
M2
## Call:
## flexsurvreg(formula = Surv((time - 1), time, inactive) ~ ethnicity2,
## data = df_ppf3, dist = "lognormal")
##
## Estimates:
## data mean est L95% U95% se exp(est) L95% U95%
## meanlog NA 2.11350 2.09063 2.13636 0.01167 NA NA NA
## sdlog NA 1.07443 1.05840 1.09070 0.00824 NA NA NA
## ethnicity2minority 0.01282 -0.45499 -0.58923 -0.32075 0.06849 0.63445 0.55475 0.72561
## ethnicity2other 0.24807 -0.37859 -0.41738 -0.33980 0.01979 0.68483 0.65877 0.71191
##
## N = 118372, Events: 9951, Censored: 108421
## Total time at risk: 118372
## Log-likelihood = -32721.55, df = 4
## AIC = 65451.1
P-values
## meanlog sdlog ethnicity2minority ethnicity2other
## 0 0 0 0
M3 <- flexsurvreg(Surv((time -1), time, inactive) ~ gender + ethnicity2 + uni + field2 + phd_cohort + npubs_prev_s, data = df_ppf3, dist = "lognormal")
M3
## Call:
## flexsurvreg(formula = Surv((time - 1), time, inactive) ~ gender +
## ethnicity2 + uni + field2 + phd_cohort + npubs_prev_s, data = df_ppf3,
## dist = "lognormal")
##
## Estimates:
## data mean est L95% U95% se
## meanlog NA 2.254134 2.172277 2.335991 0.041765
## sdlog NA 0.847069 0.834590 0.859736 0.006415
## genderwomen 0.324308 -0.084489 -0.118045 -0.050932 0.017121
## gendermissing 0.159734 -0.436816 -0.475709 -0.397922 0.019844
## ethnicity2minority 0.012824 -0.119125 -0.224714 -0.013536 0.053873
## ethnicity2other 0.248065 -0.101543 -0.133789 -0.069298 0.016452
## uniLU 0.016947 -0.054197 -0.173059 0.064666 0.060645
## uniRU 0.211317 -0.205849 -0.284479 -0.127219 0.040118
## uniRUG 0.040846 0.608518 0.457734 0.759303 0.076932
## uniTUD 0.004131 0.367652 0.163204 0.572100 0.104312
## uniTUE 0.023840 0.503403 0.342315 0.664492 0.082190
## uniTI 0.010873 -0.435195 -0.578198 -0.292192 0.072962
## uniUM 0.096568 -0.023207 -0.109659 0.063245 0.044109
## uniUT 0.080543 -0.605701 -0.691606 -0.519796 0.043830
## uniUU 0.090376 -0.181588 -0.265715 -0.097462 0.042922
## uniUvA 0.121828 -0.049879 -0.136390 0.036633 0.044139
## uniVU 0.090748 -0.048208 -0.137242 0.040825 0.045426
## uniWUR 0.158940 -0.000619 -0.085835 0.084596 0.043478
## field2Physical and Mathematical Sciences 0.116016 -0.201981 -0.248623 -0.155340 0.023797
## field2Social and Behavioral Sciences 0.145904 0.193687 0.146858 0.240516 0.023893
## field2Engineering 0.077620 -0.119362 -0.176828 -0.061897 0.029320
## field2Agricultural Sciences 0.085899 0.048752 -0.010329 0.107833 0.030144
## field2Humanities 0.041429 0.293647 0.209210 0.378085 0.043081
## field2missing 0.160798 0.290978 0.243449 0.338508 0.024250
## phd_cohort 15.109840 -0.033922 -0.036236 -0.031609 0.001180
## npubs_prev_s 0.479051 1.500639 1.434418 1.566861 0.033787
## exp(est) L95% U95%
## meanlog NA NA NA
## sdlog NA NA NA
## genderwomen 0.918982 0.888656 0.950343
## gendermissing 0.646090 0.621444 0.671714
## ethnicity2minority 0.887697 0.798745 0.986556
## ethnicity2other 0.903442 0.874775 0.933049
## uniLU 0.947246 0.841088 1.066802
## uniRU 0.813956 0.752406 0.880540
## uniRUG 1.837707 1.580489 2.136786
## uniTUD 1.444340 1.177277 1.771985
## uniTUE 1.654342 1.408203 1.943503
## uniTI 0.647138 0.560908 0.746625
## uniUM 0.977060 0.896139 1.065288
## uniUT 0.545692 0.500771 0.594642
## uniUU 0.833945 0.766658 0.907137
## uniUvA 0.951345 0.872502 1.037312
## uniVU 0.952935 0.871759 1.041670
## uniWUR 0.999381 0.917746 1.088278
## field2Physical and Mathematical Sciences 0.817110 0.779874 0.856124
## field2Social and Behavioral Sciences 1.213716 1.158189 1.271905
## field2Engineering 0.887486 0.837924 0.939980
## field2Agricultural Sciences 1.049960 0.989724 1.113862
## field2Humanities 1.341311 1.232703 1.459487
## field2missing 1.337736 1.275641 1.402853
## phd_cohort 0.966647 0.964413 0.968886
## npubs_prev_s 4.484555 4.197201 4.791582
##
## N = 118372, Events: 9951, Censored: 108421
## Total time at risk: 118372
## Log-likelihood = -30292, df = 26
## AIC = 60636
P-values
## meanlog sdlog
## 0.0000 0.0000
## genderwomen gendermissing
## 0.0000 0.0000
## ethnicity2minority ethnicity2other
## 0.0270 0.0000
## uniLU uniRU
## 0.3715 0.0000
## uniRUG uniTUD
## 0.0000 0.0004
## uniTUE uniTI
## 0.0000 0.0000
## uniUM uniUT
## 0.5988 0.0000
## uniUU uniUvA
## 0.0000 0.2585
## uniVU uniWUR
## 0.2886 0.9886
## field2Physical and Mathematical Sciences field2Social and Behavioral Sciences
## 0.0000 0.0000
## field2Engineering field2Agricultural Sciences
## 0.0000 0.1058
## field2Humanities field2missing
## 0.0000 0.0000
## phd_cohort npubs_prev_s
## 0.0000 0.0000
bootFunc <- function(data, i) {
df <- data[i,] #bootstrap datasets
M <- flexsurv::flexsurvreg(survival::Surv((time -1), time, inactive) ~ gender + ethnicity2 + uni + field2 + phd_cohort + npubs_prev_s, data = df, dist = "lognormal")
suppressWarnings({
dfmaj <- dfmin <- dfmen <- dfwomen <- df
dfwomen$gender <- "women" # one dataset of all women
dfmen$gender <- "men" # one dataset of all men
dfmin$ethnicity2 <- "minority" # one dataset all minority
dfmaj$ethnicity2 <- "majority" # one dataset all majority
dfwomen$gender <- factor(dfwomen$gender, levels=levels(df$gender))
dfmen$gender <- factor(dfmen$gender, levels=levels(df$gender))
dfmin$ethnicity2 <- factor(dfmin$ethnicity2, levels=levels(df$ethnicity2))
dfmaj$ethnicity2 <- factor(dfmaj$ethnicity2, levels=levels(df$ethnicity2))
#calculate the predicted probabilities for gender
pw <- as.numeric(unlist(predict(M, type="response", newdata=dfwomen)))
pm <- as.numeric(unlist(predict(M, type="response", newdata=dfmen)))
# average marginal effects
AME_gender <- mean(pw - pm)
#calculate the predicted probabilities for ethnicity
pmn <- as.numeric(unlist(predict(M, type="response", newdata=dfmin)))
pmj <- as.numeric(unlist(predict(M, type="response", newdata=dfmaj)))
# average marginal effects
AME_ethnicity <- mean(pmn - pmj)
})
c(AME_gender, AME_ethnicity)
#save results
}
b3 <- boot(df_ppf3, bootFunc, R = 999, parallel="snow", ncpus=10)
fsave(b3, file = "boot3.rda", location = "./results/stopping/")
original <- b3$t0
bias <- colMeans(b3$t) - b3$t0
se <- apply(b3$t, 2, sd)
boot.df3 <- data.frame(original=original, bias=bias, se=se)
row.names(boot.df3) <- c("AME_gender", "AME_ethnicity")
boot.df3$t <- (boot.df3$original / boot.df3$se)
round(boot.df3, 5)
## original bias se t
## AME_gender -1.65428 -0.00432 0.31677 -5.22231
## AME_ethnicity -2.18740 0.04751 0.84055 -2.60236
M4 <- flexsurvreg(Surv((time -1), time, inactive) ~ gender + ethnicity2 + uni + field2 + phd_cohort + npubs_prev_s + phd_cohort*gender + phd_cohort*ethnicity2, data = df_ppf3, dist = "lognormal")
M4
## Call:
## flexsurvreg(formula = Surv((time - 1), time, inactive) ~ gender +
## ethnicity2 + uni + field2 + phd_cohort + npubs_prev_s + phd_cohort *
## gender + phd_cohort * ethnicity2, data = df_ppf3, dist = "lognormal")
##
## Estimates:
## data mean est L95% U95% se
## meanlog NA 2.241756 2.152108 2.331405 0.045740
## sdlog NA 0.845462 0.833004 0.858107 0.006404
## genderwomen 0.324308 0.021328 -0.074926 0.117582 0.049110
## gendermissing 0.159734 -0.656435 -0.759920 -0.552949 0.052800
## ethnicity2minority 0.012824 -0.138562 -0.533041 0.255916 0.201268
## ethnicity2other 0.248065 -0.020699 -0.118039 0.076640 0.049664
## uniLU 0.016947 -0.036353 -0.155113 0.082408 0.060593
## uniRU 0.211317 -0.195520 -0.274085 -0.116955 0.040085
## uniRUG 0.040846 0.606243 0.455623 0.756863 0.076848
## uniTUD 0.004131 0.392737 0.188261 0.597213 0.104326
## uniTUE 0.023840 0.515919 0.354808 0.677030 0.082201
## uniTI 0.010873 -0.416125 -0.559084 -0.273166 0.072940
## uniUM 0.096568 -0.004576 -0.091134 0.081982 0.044163
## uniUT 0.080543 -0.584547 -0.670719 -0.498374 0.043966
## uniUU 0.090376 -0.164626 -0.248936 -0.080315 0.043016
## uniUvA 0.121828 -0.034374 -0.121055 0.052307 0.044226
## uniVU 0.090748 -0.033359 -0.122402 0.055684 0.045431
## uniWUR 0.158940 0.010904 -0.074224 0.096032 0.043434
## field2Physical and Mathematical Sciences 0.116016 -0.202237 -0.248801 -0.155672 0.023758
## field2Social and Behavioral Sciences 0.145904 0.190841 0.144093 0.237589 0.023852
## field2Engineering 0.077620 -0.118354 -0.175720 -0.060987 0.029269
## field2Agricultural Sciences 0.085899 0.044306 -0.014673 0.103284 0.030092
## field2Humanities 0.041429 0.293140 0.208847 0.377434 0.043008
## field2missing 0.160798 0.291224 0.243740 0.338708 0.024227
## phd_cohort 15.109840 -0.033855 -0.037081 -0.030628 0.001646
## npubs_prev_s 0.479051 1.496518 1.430389 1.562648 0.033740
## genderwomen:phd_cohort 5.472333 -0.005237 -0.010027 -0.000446 0.002444
## gendermissing:phd_cohort 2.550983 0.012432 0.006944 0.017920 0.002800
## ethnicity2minority:phd_cohort 0.242718 0.000462 -0.017883 0.018808 0.009360
## ethnicity2other:phd_cohort 4.313131 -0.004433 -0.009240 0.000374 0.002453
## exp(est) L95% U95%
## meanlog NA NA NA
## sdlog NA NA NA
## genderwomen 1.021557 0.927813 1.124774
## gendermissing 0.518697 0.467704 0.575251
## ethnicity2minority 0.870609 0.586818 1.291645
## ethnicity2other 0.979513 0.888661 1.079654
## uniLU 0.964300 0.856318 1.085899
## uniRU 0.822407 0.760267 0.889625
## uniRUG 1.833530 1.577155 2.131579
## uniTUD 1.481029 1.207148 1.817048
## uniTUE 1.675177 1.425907 1.968024
## uniTI 0.659598 0.571733 0.760966
## uniUM 0.995435 0.912896 1.085436
## uniUT 0.557359 0.511341 0.607517
## uniUU 0.848211 0.779630 0.922825
## uniUvA 0.966210 0.885985 1.053699
## uniVU 0.967191 0.884793 1.057263
## uniWUR 1.010964 0.928463 1.100795
## field2Physical and Mathematical Sciences 0.816902 0.779735 0.855840
## field2Social and Behavioral Sciences 1.210267 1.154992 1.268188
## field2Engineering 0.888382 0.838853 0.940835
## field2Agricultural Sciences 1.045302 0.985434 1.108807
## field2Humanities 1.340631 1.232256 1.458537
## field2missing 1.338065 1.276013 1.403134
## phd_cohort 0.966712 0.963598 0.969836
## npubs_prev_s 4.466113 4.180324 4.771440
## genderwomen:phd_cohort 0.994777 0.990023 0.999554
## gendermissing:phd_cohort 1.012510 1.006968 1.018082
## ethnicity2minority:phd_cohort 1.000462 0.982276 1.018986
## ethnicity2other:phd_cohort 0.995577 0.990802 1.000374
##
## N = 118372, Events: 9951, Censored: 108421
## Total time at risk: 118372
## Log-likelihood = -30274.33, df = 30
## AIC = 60608.66
P-values
## meanlog sdlog
## 0.0000 0.0000
## genderwomen gendermissing
## 0.6641 0.0000
## ethnicity2minority ethnicity2other
## 0.4912 0.6768
## uniLU uniRU
## 0.5485 0.0000
## uniRUG uniTUD
## 0.0000 0.0002
## uniTUE uniTI
## 0.0000 0.0000
## uniUM uniUT
## 0.9175 0.0000
## uniUU uniUvA
## 0.0001 0.4370
## uniVU uniWUR
## 0.4628 0.8018
## field2Physical and Mathematical Sciences field2Social and Behavioral Sciences
## 0.0000 0.0000
## field2Engineering field2Agricultural Sciences
## 0.0001 0.1409
## field2Humanities field2missing
## 0.0000 0.0000
## phd_cohort npubs_prev_s
## 0.0000 0.0000
## genderwomen:phd_cohort gendermissing:phd_cohort
## 0.0322 0.0000
## ethnicity2minority:phd_cohort ethnicity2other:phd_cohort
## 0.9606 0.0707
bootFunc <- function(data, i) {
df <- data[i,] #bootstrap datasets
M <- flexsurv::flexsurvreg(survival::Surv((time -1), time, inactive) ~ gender + ethnicity2 + uni + field2 + phd_cohort + npubs_prev_s + phd_cohort*gender + phd_cohort*ethnicity2, data = df, dist = "lognormal")
suppressWarnings({
dfmaj <- dfmin <- dfmen <- dfwomen <- df
dfwomen$gender <- "women" # one dataset of all women
dfmen$gender <- "men" # one dataset of all men
dfmin$ethnicity2 <- "minority" # one dataset all minority
dfmaj$ethnicity2 <- "majority" # one dataset all majority
dfwomen$gender <- factor(dfwomen$gender, levels=levels(df$gender))
dfmen$gender <- factor(dfmen$gender, levels=levels(df$gender))
dfmin$ethnicity2 <- factor(dfmin$ethnicity2, levels=levels(df$ethnicity2))
dfmaj$ethnicity2 <- factor(dfmaj$ethnicity2, levels=levels(df$ethnicity2))
#calculate the predicted probabilities for gender
pw <- as.numeric(unlist(predict(M, type="response", newdata=dfwomen)))
pm <- as.numeric(unlist(predict(M, type="response", newdata=dfmen)))
# average marginal effects
AME_gender <- mean(pw - pm)
#calculate the predicted probabilities for ethnicity
pmn <- as.numeric(unlist(predict(M, type="response", newdata=dfmin)))
pmj <- as.numeric(unlist(predict(M, type="response", newdata=dfmaj)))
# average marginal effects
AME_ethnicity <- mean(pmn - pmj)
# cohort interactions
s <- 0.1
# copying our data into separate dataframes
dfwomencp <- dfwomencm <- dfmencp <- dfmencm <- df
dfmincp <- dfmincm <- dfmajcp <- dfmajcm <- df
# assigning gender to each of the dataframes
dfwomencp$gender <- dfwomencm$gender <- "women"
dfmencp$gender <- dfmencm$gender <- "men"
dfwomencp$gender <- factor(dfwomencp$gender, levels=levels(df$gender))
dfwomencm$gender <- factor(dfwomencm$gender, levels=levels(df$gender))
dfmencp$gender <- factor(dfmencp$gender, levels=levels(df$gender))
dfmencm$gender <- factor(dfmencm$gender, levels=levels(df$gender))
# assigning ethnicity to the dataframes
dfmincp$ethnicity2 <- dfmincm$ethnicity2 <- "minority"
dfmajcp$ethnicity2 <- dfmajcm$ethnicity2 <- "majority"
dfmincp$ethnicity2 <- factor(dfmincp$ethnicity2, levels=levels(df$ethnicity2))
dfmincm$ethnicity2 <- factor(dfmincm$ethnicity2, levels=levels(df$ethnicity2))
dfmajcp$ethnicity2 <- factor(dfmajcp$ethnicity2, levels=levels(df$ethnicity2))
dfmajcm$ethnicity2 <- factor(dfmajcm$ethnicity2, levels=levels(df$ethnicity2))
# adding/subtracting small changes PhD cohort
dfwomencp$phd_cohort <- dfmencp$phd_cohort <- df$phd_cohort + s
dfwomencm$phd_cohort <- dfmencm$phd_cohort <- df$phd_cohort - s
dfmajcp$phd_cohort <- dfmincp$phd_cohort <- df$phd_cohort + s
dfmajcm$phd_cohort <- dfmincm$phd_cohort <- df$phd_cohort - s
# calculating predicted probabilities
pwp <- as.numeric(unlist(predict(M, type="response", newdata=dfwomencp)))
pwm <- as.numeric(unlist(predict(M, type="response", newdata=dfwomencm)))
pmp <- as.numeric(unlist(predict(M, type="response", newdata=dfmencp)))
pmm <- as.numeric(unlist(predict(M, type="response", newdata=dfmencm)))
pnp <- as.numeric(unlist(predict(M, type="response", newdata=dfmincp)))
pnm <- as.numeric(unlist(predict(M, type="response", newdata=dfmincm)))
pjp <- as.numeric(unlist(predict(M, type="response", newdata=dfmajcp)))
pjm <- as.numeric(unlist(predict(M, type="response", newdata=dfmajcm)))
# marginal effects
AME_gendercoh <- mean(((pwp - pmp) - (pwm - pmm)) / (2*s))
AME_ethnicitycoh <- mean(((pnp - pjp) - (pnm - pjm)) / (2*s))
})
c(AME_gender, AME_ethnicity, AME_gendercoh, AME_ethnicitycoh)
#save results
}
b4 <- boot(df_ppf3, bootFunc, R = 999, parallel="snow", ncpus=10)
fsave(b4, file = "boot4.rda", location = "./results/stopping/")
original <- b4$t0
bias <- colMeans(b4$t) - b4$t0
se <- apply(b4$t, 2, sd)
boot.df4 <- data.frame(original=original, bias=bias, se=se)
row.names(boot.df4) <- c("AME_gender", "AME_ethnicity", "AME_gender-coh", "AME_ethnicity-coh")
boot.df4$t <- (boot.df4$original / boot.df4$se)
round(boot.df4, 5)
## original bias se t
## AME_gender -0.78358 0.00137 0.51239 -1.52926
## AME_ethnicity -2.43144 0.15562 1.67483 -1.45175
## AME_gender-coh -0.07530 -0.00054 0.06601 -1.14074
## AME_ethnicity-coh 0.09160 -0.01590 0.20778 0.44085
columns <- c(rep(c("B", "C.I."), 4))
rows <- c("<strong>Shape</strong></p>", "<strong>Scale</strong></p>", "Gender: ref=men" ,"Women", "Missing gender", "Ethnicity: ref=majority","Minority", "Other", "University: ref=Erasmus University", "Leiden University", "Radboud University", "University of Groningen", "Delft University of Technology", "Eindhoven University of Technology", "Tilburg University", "Maastricht University", "University of Twente", "Utrecht University", "University of Amsterdam", "Vrije Universiteit Amsterdam", "Wageningen University and Research Centre", "Field: ref=Biological and Health Sciences", "Physical and mathematical sciences", "Social and behavioral sciences", "Engineering", "Agricultural sciences", "Humanities", "Missing field", "<strong>PhD cohort</strong></p>", "<strong>Previous publications</strong></p>", "Cohort interactions","PhD cohort * women", "PhD cohort * missing gender", "PhD cohort * ethnic minority", "PhD cohort * other ethnicity", "<strong>AIC", "<strong>N")
t4 <- data.frame(matrix(nrow=length(rows), ncol=length(columns)))
colnames(t4) <- columns
rownames(t4) <- rows
# Model 1
t4[c(1,2,4,5),1] <- format(round(as.numeric(M1$coef), 2), nsmall=2) # estimates
t4[c(1,2),2] <- paste0("[", format(round(log(M1$res[c(1,2),2]), 2), nsmall=2), " , ", format(round(log(M1$res[c(1,2),3]), 2), nsmall=2), "]") # CI for shape and scale
t4[c(4,5),2] <- paste0("[", format(round(M1$res[c(3,4),2], 2), nsmall=2), " , ", format(round(M1$res[c(3,4),3], 3), nsmall=2), "]") # CI for other covariates
# Model 2
t4[c(1,2,7,8),3] <- format(round(M2$coef, 2), nsmall=2)
t4[c(1,2),4] <- paste0("[", format(round(log(M2$res[c(1,2),2]), 2), nsmall=2), " , ", format(round(log(M2$res[c(1,2),3]), 2), nsmall=2), "]")
t4[c(7,8),4] <- paste0("[", format(round(M2$res[c(3,4),2], 2), nsmall=2), " , ", format(round(M2$res[c(3,4),3], 2), nsmall=2), "]")
# Model 3
t4[c(1,2,4,5,7,8,10:21,23:30),5] <- format(round(M3$coef, 2), nsmall=2)
#t4[c(1,2), 6] <- paste("[", round(log(M3$res[c(1,2),2]), 3), ",", round(log(M3$res[c(1,2),3]), 3), "]")
t4[c(1,2,4,5,7,8,10:21,23:30), 6] <- paste0("[", format(round(M3$res[c(1,2,3:26),2], 2), nsmall=2), " , ", format(round(M3$res[c(1,2,3:26),3], 2), nsmall=2), "]")
# Model 4
t4[c(1,2,4,5,7,8,10:21,23:30,32:35),7] <- format(round(M4$coef, 2), nsmall=2)
#t4[c(1,2), 8] <- paste("[", round(log(M4$res[c(1:2),2]), 3), ",", round(log(M4$res[c(1:2),3]), 3), "]")
t4[c(1,2,4,5,7,8,10:21,23:30,32:35), 8] <- paste0("[", format(round(M4$res[c(1,2,3:30),2], 2), nsmall=2), " , ", format(round(M4$res[c(1,2,3:30),3], 2), nsmall=2), "]")
# AIC
t4[36,1] <- round(M1$AIC, 0)
t4[36,3] <- round(M2$AIC, 0)
t4[36,5] <- round(M3$AIC, 0)
t4[36,7] <- round(M4$AIC, 0)
# sample size
t4[37, c(1,3,5,7)] <- rep(nrow(df_ppf3[!duplicated(df_ppf3$id),]), 4)
t4[is.na(t4)] <- ""
t4 %>%
kable(format = 'html', caption = '<b>Table 4.</b> Log-normal regression analyses on stopping to publish', escape=FALSE) %>%
add_header_above(.,c(' '=1, 'Model 1'=2, 'Model 2'=2, 'Model 3'=2, 'Model 4'=2), escape = FALSE, bold = TRUE) %>%
row_spec(row=c(3,6,9,22,31), bold=T) %>%
kable_classic(full_width = F, html_font = "Cambria") %>%
kable_styling(font_size = 11) -> table4
table4
B | C.I. | B | C.I. | B | C.I. | B | C.I. | |
---|---|---|---|---|---|---|---|---|
Shape | 2.19 | [0.77 , 0.80] | 2.11 | [0.74 , 0.76] | 2.25 | [ 2.17 , 2.34] | 2.24 | [ 2.15 , 2.33] |
Scale | 0.06 | [0.05 , 0.08] | 0.07 | [0.06 , 0.09] | -0.17 | [ 0.83 , 0.86] | -0.17 | [ 0.83 , 0.86] |
Gender: ref=men | ||||||||
Women | -0.19 | [-0.23 , -0.151] | -0.08 | [-0.12 , -0.05] | 0.02 | [-0.07 , 0.12] | ||
Missing gender | -0.62 | [-0.67 , -0.576] | -0.44 | [-0.48 , -0.40] | -0.66 | [-0.76 , -0.55] | ||
Ethnicity: ref=majority | ||||||||
Minority | -0.45 | [-0.59 , -0.32] | -0.12 | [-0.22 , -0.01] | -0.14 | [-0.53 , 0.26] | ||
Other | -0.38 | [-0.42 , -0.34] | -0.10 | [-0.13 , -0.07] | -0.02 | [-0.12 , 0.08] | ||
University: ref=Erasmus University | ||||||||
Leiden University | -0.05 | [-0.17 , 0.06] | -0.04 | [-0.16 , 0.08] | ||||
Radboud University | -0.21 | [-0.28 , -0.13] | -0.20 | [-0.27 , -0.12] | ||||
University of Groningen | 0.61 | [ 0.46 , 0.76] | 0.61 | [ 0.46 , 0.76] | ||||
Delft University of Technology | 0.37 | [ 0.16 , 0.57] | 0.39 | [ 0.19 , 0.60] | ||||
Eindhoven University of Technology | 0.50 | [ 0.34 , 0.66] | 0.52 | [ 0.35 , 0.68] | ||||
Tilburg University | -0.44 | [-0.58 , -0.29] | -0.42 | [-0.56 , -0.27] | ||||
Maastricht University | -0.02 | [-0.11 , 0.06] | 0.00 | [-0.09 , 0.08] | ||||
University of Twente | -0.61 | [-0.69 , -0.52] | -0.58 | [-0.67 , -0.50] | ||||
Utrecht University | -0.18 | [-0.27 , -0.10] | -0.16 | [-0.25 , -0.08] | ||||
University of Amsterdam | -0.05 | [-0.14 , 0.04] | -0.03 | [-0.12 , 0.05] | ||||
Vrije Universiteit Amsterdam | -0.05 | [-0.14 , 0.04] | -0.03 | [-0.12 , 0.06] | ||||
Wageningen University and Research Centre | 0.00 | [-0.09 , 0.08] | 0.01 | [-0.07 , 0.10] | ||||
Field: ref=Biological and Health Sciences | ||||||||
Physical and mathematical sciences | -0.20 | [-0.25 , -0.16] | -0.20 | [-0.25 , -0.16] | ||||
Social and behavioral sciences | 0.19 | [ 0.15 , 0.24] | 0.19 | [ 0.14 , 0.24] | ||||
Engineering | -0.12 | [-0.18 , -0.06] | -0.12 | [-0.18 , -0.06] | ||||
Agricultural sciences | 0.05 | [-0.01 , 0.11] | 0.04 | [-0.01 , 0.10] | ||||
Humanities | 0.29 | [ 0.21 , 0.38] | 0.29 | [ 0.21 , 0.38] | ||||
Missing field | 0.29 | [ 0.24 , 0.34] | 0.29 | [ 0.24 , 0.34] | ||||
PhD cohort | -0.03 | [-0.04 , -0.03] | -0.03 | [-0.04 , -0.03] | ||||
Previous publications | 1.50 | [ 1.43 , 1.57] | 1.50 | [ 1.43 , 1.56] | ||||
Cohort interactions | ||||||||
PhD cohort * women | -0.01 | [-0.01 , 0.00] | ||||||
PhD cohort * missing gender | 0.01 | [ 0.01 , 0.02] | ||||||
PhD cohort * ethnic minority | 0.00 | [-0.02 , 0.02] | ||||||
PhD cohort * other ethnicity | 0.00 | [-0.01 , 0.00] | ||||||
AIC | 65138 | 65451 | 60636 | 60609 | ||||
N | 15021 | 15021 | 15021 | 15021 |
R1 <- flexsurvreg(Surv((time -1), time, inactive) ~ gender , data = df_ppf5, dist = "lognormal")
R1
R2 <- flexsurvreg(Surv((time -1), time, inactive) ~ ethnicity2 , data = df_ppf5, dist = "lognormal")
R2
R3 <- flexsurvreg(Surv((time -1), time, inactive) ~ gender + ethnicity2 + uni + field2 + phd_cohort + npubs_prev_s, data = df_ppf5, dist = "lognormal")
R3
R4 <- flexsurvreg(Surv((time -1), time, inactive) ~ gender + ethnicity2 + uni + field2 + phd_cohort + npubs_prev_s + phd_cohort*gender + phd_cohort*ethnicity2, data = df_ppf5, dist = "lognormal")
R4
AMEs Model 3 (no SEs)
dfmaj <- dfmin <- dfmen <- dfwomen <- df_ppf5
dfwomen$gender <- "women" # one dataset of all women
dfmen$gender <- "men" # one dataset of all men
dfmin$ethnicity2 <- "minority" # one dataset all minority
dfmaj$ethnicity2 <- "majority" # one dataset all majority
dfwomen$gender <- factor(dfwomen$gender, levels=levels(df_ppf5$gender))
dfmen$gender <- factor(dfmen$gender, levels=levels(df_ppf5$gender))
dfmin$ethnicity2 <- factor(dfmin$ethnicity2, levels=levels(df_ppf5$ethnicity2))
dfmaj$ethnicity2 <- factor(dfmaj$ethnicity2, levels=levels(df_ppf5$ethnicity2))
#calculate the predicted probabilities for gender
pw <- as.numeric(unlist(predict(R3, type="response", newdata=dfwomen)))
pm <- as.numeric(unlist(predict(R3, type="response", newdata=dfmen)))
# average marginal effects
AME_gender <- mean(pw - pm)
#calculate the predicted probabilities for ethnicity
pmn <- as.numeric(unlist(predict(R3, type="response", newdata=dfmin)))
pmj <- as.numeric(unlist(predict(R3, type="response", newdata=dfmaj)))
# average marginal effects
AME_ethnicity <- mean(pmn - pmj)
AME_gender
AME_ethnicity
AMEs Model 4 (no SEs)
dfmaj <- dfmin <- dfmen <- dfwomen <- df_ppf5
dfwomen$gender <- "women" # one dataset of all women
dfmen$gender <- "men" # one dataset of all men
dfmin$ethnicity2 <- "minority" # one dataset all minority
dfmaj$ethnicity2 <- "majority" # one dataset all majority
dfwomen$gender <- factor(dfwomen$gender, levels=levels(df_ppf5$gender))
dfmen$gender <- factor(dfmen$gender, levels=levels(df_ppf5$gender))
dfmin$ethnicity2 <- factor(dfmin$ethnicity2, levels=levels(df_ppf5$ethnicity2))
dfmaj$ethnicity2 <- factor(dfmaj$ethnicity2, levels=levels(df_ppf5$ethnicity2))
#calculate the predicted probabilities for gender
pw <- as.numeric(unlist(predict(R4, type="response", newdata=dfwomen)))
pm <- as.numeric(unlist(predict(R4, type="response", newdata=dfmen)))
# average marginal effects
AME_gender <- mean(pw - pm)
#calculate the predicted probabilities for ethnicity
pmn <- as.numeric(unlist(predict(R4, type="response", newdata=dfmin)))
pmj <- as.numeric(unlist(predict(R4, type="response", newdata=dfmaj)))
# average marginal effects
AME_ethnicity <- mean(pmn - pmj)
# cohort interactions
s <- 0.1
# copying our data into separate dataframes
dfwomencp <- dfwomencm <- dfmencp <- dfmencm <- df_ppf5
dfmincp <- dfmincm <- dfmajcp <- dfmajcm <- df_ppf5
# assigning gender to each of the dataframes
dfwomencp$gender <- dfwomencm$gender <- "women"
dfmencp$gender <- dfmencm$gender <- "men"
dfwomencp$gender <- factor(dfwomencp$gender, levels=levels(df_ppf5$gender))
dfwomencm$gender <- factor(dfwomencm$gender, levels=levels(df_ppf5$gender))
dfmencp$gender <- factor(dfmencp$gender, levels=levels(df_ppf5$gender))
dfmencm$gender <- factor(dfmencm$gender, levels=levels(df_ppf5$gender))
# assigning ethnicity to the dataframes
dfmincp$ethnicity2 <- dfmincm$ethnicity2 <- "minority"
dfmajcp$ethnicity2 <- dfmajcm$ethnicity2 <- "majority"
dfmincp$ethnicity2 <- factor(dfmincp$ethnicity2, levels=levels(df_ppf5$ethnicity2))
dfmincm$ethnicity2 <- factor(dfmincm$ethnicity2, levels=levels(df_ppf5$ethnicity2))
dfmajcp$ethnicity2 <- factor(dfmajcp$ethnicity2, levels=levels(df_ppf5$ethnicity2))
dfmajcm$ethnicity2 <- factor(dfmajcm$ethnicity2, levels=levels(df_ppf5$ethnicity2))
# adding/subtracting small changes PhD cohort
dfwomencp$phd_cohort <- dfmencp$phd_cohort <- df_ppf5$phd_cohort + s
dfwomencm$phd_cohort <- dfmencm$phd_cohort <- df_ppf5$phd_cohort - s
dfmajcp$phd_cohort <- dfmincp$phd_cohort <- df_ppf5$phd_cohort + s
dfmajcm$phd_cohort <- dfmincm$phd_cohort <- df_ppf5$phd_cohort - s
# calculating predicted probabilities
pwp <- as.numeric(unlist(predict(R4, type="response", newdata=dfwomencp)))
pwm <- as.numeric(unlist(predict(R4, type="response", newdata=dfwomencm)))
pmp <- as.numeric(unlist(predict(R4, type="response", newdata=dfmencp)))
pmm <- as.numeric(unlist(predict(R4, type="response", newdata=dfmencm)))
pnp <- as.numeric(unlist(predict(R4, type="response", newdata=dfmincp)))
pnm <- as.numeric(unlist(predict(R4, type="response", newdata=dfmincm)))
pjp <- as.numeric(unlist(predict(R4, type="response", newdata=dfmajcp)))
pjm <- as.numeric(unlist(predict(R4, type="response", newdata=dfmajcm)))
# marginal effects
AME_gendercoh <- mean(((pwp - pmp) - (pwm - pmm)) / (2*s))
AME_ethnicitycoh <- mean(((pnp - pjp) - (pnm - pjm)) / (2*s))
Model 3 SEs
bootFunc <- function(data, i) {
df <- data[i,] #bootstrap datasets
M <- flexsurv::flexsurvreg(survival::Surv((time -1), time, inactive) ~ gender + ethnicity2 + uni + field2 + phd_cohort + npubs_prev_s, data = df, dist = "lognormal")
suppressWarnings({
dfmaj <- dfmin <- dfmen <- dfwomen <- df
dfwomen$gender <- "women" # one dataset of all women
dfmen$gender <- "men" # one dataset of all men
dfmin$ethnicity2 <- "minority" # one dataset all minority
dfmaj$ethnicity2 <- "majority" # one dataset all majority
dfwomen$gender <- factor(dfwomen$gender, levels=levels(df$gender))
dfmen$gender <- factor(dfmen$gender, levels=levels(df$gender))
dfmin$ethnicity2 <- factor(dfmin$ethnicity2, levels=levels(df$ethnicity2))
dfmaj$ethnicity2 <- factor(dfmaj$ethnicity2, levels=levels(df$ethnicity2))
#calculate the predicted probabilities for gender
pw <- as.numeric(unlist(predict(M, type="response", newdata=dfwomen)))
pm <- as.numeric(unlist(predict(M, type="response", newdata=dfmen)))
# average marginal effects
AME_gender <- mean(pw - pm)
#calculate the predicted probabilities for ethnicity
pmn <- as.numeric(unlist(predict(M, type="response", newdata=dfmin)))
pmj <- as.numeric(unlist(predict(M, type="response", newdata=dfmaj)))
# average marginal effects
AME_ethnicity <- mean(pmn - pmj)
})
c(AME_gender, AME_ethnicity)
#save results
}
b3 <- boot(df_ppf5, bootFunc, R = 100, parallel="snow", ncpus=10)
fsave(b3, file = "boot3_R1.rda", location = "./results/stopping/robustness/")
original <- b3$t0
bias <- colMeans(b3$t) - b3$t0
se <- apply(b3$t, 2, sd)
boot.df3 <- data.frame(original=original, bias=bias, se=se)
row.names(boot.df3) <- c("AME_gender", "AME_ethnicity")
boot.df3$t <- (boot.df3$original / boot.df3$se)
round(boot.df3, 5)
## original bias se t
## AME_gender -4.23738 -1.02307 NA NA
## AME_ethnicity -6.24852 -2.96893 NA NA
R5 <- flexsurvreg(Surv((time -1), time, inactive) ~ ethnicity3 , data = df_ppf3, dist = "lognormal")
R5
R6 <- flexsurvreg(Surv((time -1), time, inactive) ~ gender + ethnicity3 + uni + field2 + phd_cohort + npubs_prev_s, data = df_ppf3, dist = "lognormal")
R6
R7 <- flexsurvreg(Surv((time -1), time, inactive) ~ gender + ethnicity3 + uni + field2 + phd_cohort + npubs_prev_s + phd_cohort*gender + phd_cohort*ethnicity2, data = df_ppf3, dist = "lognormal")
R7
AMEs Model 3 (no SEs)
dfmaj <- dfmin <- dfmen <- dfwomen <- df_ppf5
dfwomen$gender <- "women" # one dataset of all women
dfmen$gender <- "men" # one dataset of all men
dfmin$ethnicity3 <- "minority" # one dataset all minority
dfmaj$ethnicity3 <- "majority" # one dataset all majority
dfwomen$gender <- factor(dfwomen$gender, levels=levels(df_ppf5$gender))
dfmen$gender <- factor(dfmen$gender, levels=levels(df_ppf5$gender))
dfmin$ethnicity3 <- factor(dfmin$ethnicity3, levels=levels(df_ppf5$ethnicity3))
dfmaj$ethnicity3 <- factor(dfmaj$ethnicity3, levels=levels(df_ppf5$ethnicity3))
#calculate the predicted probabilities for gender
pw <- as.numeric(unlist(predict(R6, type="response", newdata=dfwomen)))
pm <- as.numeric(unlist(predict(R6, type="response", newdata=dfmen)))
# average marginal effects
AME_gender <- mean(pw - pm)
#calculate the predicted probabilities for ethnicity
pmn <- as.numeric(unlist(predict(R6, type="response", newdata=dfmin)))
pmj <- as.numeric(unlist(predict(R6, type="response", newdata=dfmaj)))
# average marginal effects
AME_ethnicity <- mean(pmn - pmj)
AME_gender
AME_ethnicity
AMEs Model 4 (no SEs)
dfmaj <- dfmin <- dfmen <- dfwomen <- df_ppf5
dfwomen$gender <- "women" # one dataset of all women
dfmen$gender <- "men" # one dataset of all men
dfmin$ethnicity3 <- "minority" # one dataset all minority
dfmaj$ethnicity3 <- "majority" # one dataset all majority
dfwomen$gender <- factor(dfwomen$gender, levels=levels(df_ppf5$gender))
dfmen$gender <- factor(dfmen$gender, levels=levels(df_ppf5$gender))
dfmin$ethnicity3 <- factor(dfmin$ethnicity3, levels=levels(df_ppf5$ethnicity3))
dfmaj$ethnicity3 <- factor(dfmaj$ethnicity3, levels=levels(df_ppf5$ethnicity3))
#calculate the predicted probabilities for gender
pw <- as.numeric(unlist(predict(R7, type="response", newdata=dfwomen)))
pm <- as.numeric(unlist(predict(R7, type="response", newdata=dfmen)))
# average marginal effects
AME_gender <- mean(pw - pm)
#calculate the predicted probabilities for ethnicity
pmn <- as.numeric(unlist(predict(R7, type="response", newdata=dfmin)))
pmj <- as.numeric(unlist(predict(R7, type="response", newdata=dfmaj)))
# average marginal effects
AME_ethnicity <- mean(pmn - pmj)
# cohort interactions
s <- 0.1
# copying our data into separate dataframes
dfwomencp <- dfwomencm <- dfmencp <- dfmencm <- df_ppf5
dfmincp <- dfmincm <- dfmajcp <- dfmajcm <- df_ppf5
# assigning gender to each of the dataframes
dfwomencp$gender <- dfwomencm$gender <- "women"
dfmencp$gender <- dfmencm$gender <- "men"
dfwomencp$gender <- factor(dfwomencp$gender, levels=levels(df_ppf5$gender))
dfwomencm$gender <- factor(dfwomencm$gender, levels=levels(df_ppf5$gender))
dfmencp$gender <- factor(dfmencp$gender, levels=levels(df_ppf5$gender))
dfmencm$gender <- factor(dfmencm$gender, levels=levels(df_ppf5$gender))
# assigning ethnicity to the dataframes
dfmincp$ethnicity3 <- dfmincm$ethnicity3 <- "minority"
dfmajcp$ethnicity3 <- dfmajcm$ethnicity3 <- "majority"
dfmincp$ethnicity3 <- factor(dfmincp$ethnicity3, levels=levels(df_ppf5$ethnicity3))
dfmincm$ethnicity3 <- factor(dfmincm$ethnicity3, levels=levels(df_ppf5$ethnicity3))
dfmajcp$ethnicity3 <- factor(dfmajcp$ethnicity3, levels=levels(df_ppf5$ethnicity3))
dfmajcm$ethnicity3 <- factor(dfmajcm$ethnicity3, levels=levels(df_ppf5$ethnicity3))
# adding/subtracting small changes PhD cohort
dfwomencp$phd_cohort <- dfmencp$phd_cohort <- df_ppf5$phd_cohort + s
dfwomencm$phd_cohort <- dfmencm$phd_cohort <- df_ppf5$phd_cohort - s
dfmajcp$phd_cohort <- dfmincp$phd_cohort <- df_ppf5$phd_cohort + s
dfmajcm$phd_cohort <- dfmincm$phd_cohort <- df_ppf5$phd_cohort - s
# calculating predicted probabilities
pwp <- as.numeric(unlist(predict(R7, type="response", newdata=dfwomencp)))
pwm <- as.numeric(unlist(predict(R7, type="response", newdata=dfwomencm)))
pmp <- as.numeric(unlist(predict(R7, type="response", newdata=dfmencp)))
pmm <- as.numeric(unlist(predict(R7, type="response", newdata=dfmencm)))
pnp <- as.numeric(unlist(predict(R7, type="response", newdata=dfmincp)))
pnm <- as.numeric(unlist(predict(R7, type="response", newdata=dfmincm)))
pjp <- as.numeric(unlist(predict(R7, type="response", newdata=dfmajcp)))
pjm <- as.numeric(unlist(predict(R7, type="response", newdata=dfmajcm)))
# marginal effects
AME_gendercoh <- mean(((pwp - pmp) - (pwm - pmm)) / (2*s))
AME_ethnicitycoh <- mean(((pnp - pjp) - (pnm - pjm)) / (2*s))
AME_gender
AME_gendercoh
AME_ethnicity
AME_ethnicitycoh
df_ppf3 %>%
group_by(time) %>%
summarise(event = sum(inactive),
total = n()) %>%
mutate(hazard = event/total) %>%
ggplot(aes(x=time, y = log(-log(1-hazard)))) +
geom_point() +
geom_smooth()
# C-log-log link
R8 <- glm(formula = inactive ~ time + gender,
family = binomial(link="cloglog"),
data = df_ppf3)
R9 <- glm(formula = inactive ~ time + ethnicity2,
family = binomial(link="cloglog"),
data = df_ppf3)
R10 <- glm(formula = inactive ~ time + gender + ethnicity2 + uni + field + phd_cohort + npubs_prev_s,
family = binomial(link="cloglog"),
data = df_ppf3)
R11 <- glm(formula = inactive ~ time + gender + ethnicity2 + uni + field + phd_cohort + npubs_prev_s + gender*phd_cohort + ethnicity2*phd_cohort,
family = binomial(link="cloglog"),
data = df_ppf3)
summary(R8, exp = T)
summary(R9, exp = T)
summary(R10, exp = T)
summary(R11, exp = T)
In cohort 3 (i.e. 1993), Figure 6 shows a divergent gender pattern: women tend to survive much longer than men in this specific cohort. Hence, the observed cohort effect might be due to this exceptional year. We look into this by removing this cohort and rerunning model 4.
# Removing cohort 3
df_nocoh2 <- df_ppf3[df_ppf3$phd_cohort!=2,]
M4_no2 <- flexsurvreg(Surv((time -1), time, inactive) ~ gender + ethnicity2 + uni + field2 + phd_cohort + npubs_prev_s + phd_cohort*gender + phd_cohort*ethnicity2, data = df_nocoh2, dist = "lognormal")
M4_no2
# Removing cohort 0-2
df_nocoh02 <- df_ppf3[df_ppf3$phd_cohort>2,]
M4_no02 <- flexsurvreg(Surv((time -1), time, inactive) ~ gender + ethnicity2 + uni + field2 + phd_cohort + npubs_prev_s + phd_cohort*gender + phd_cohort*ethnicity2, data = df_nocoh02, dist = "lognormal")
M4_no02
# Removing cohort 0-4
df_nocoh04 <- df_ppf3[df_ppf3$phd_cohort>4,]
M4_no04 <- flexsurvreg(Surv((time -1), time, inactive) ~ gender + ethnicity2 + uni + field2 + phd_cohort + npubs_prev_s + phd_cohort*gender + phd_cohort*ethnicity2, data = df_nocoh04, dist = "lognormal")
M4_no04
Copyright © 2023