This lab journal demonstrates how we treat the association between PhD cohort and ‘starting to publish’ flexibly.


Custom functions

  • fpackage.check: Check if packages are installed (and install if not) in R (source).
fpackage.check <- function(packages) {
    lapply(packages, FUN = function(x) {
        if (!require(x, character.only = TRUE)) {
            install.packages(x, dependencies = TRUE)
            library(x, character.only = TRUE)
        }
    })
}

Packages

packages = c("tidyverse", "ggplot2", "ggpubr", "RColorBrewer", "margins", "splines", "splines2", "boot",
    "kableExtra", "DescTools")

fpackage.check(packages)

Input

We use one processed dataset.

load(file = "./data/processed/df_starting.rda")

Given that the probability to start publishing appears to depend non-linearly on PhD cohort (see below), we attempt to deal with the effect of cohort a bit more flexibly. To do so, we uses splines.

figure3
figure3
figure4
figure4

Model 3: Gender, ethnicity and controls

(Non-)linear effects of PhD cohort

We use splines instead of polynomials, because cubic effects of cohort on ‘starting to publish’ do not capture the trend very accurately, but using higher order polynomials is not advisable. Therefore, we investigate the use of splines with several knots, determining the splines based on a spline-only model.

# no nodes
test1 <- glm(start_pub ~ ns(phd_cohort, 3), family = binomial, data = df_starting)

test2 <- glm(start_pub ~ bSpline(phd_cohort, df = 3), family = binomial, data = df_starting)

knots5 <- quantile(df_starting$phd_cohort, probs = seq(0, 1, 0.25))[2:4]
test3 <- glm(start_pub ~ bSpline(phd_cohort, knots = knots5, df = 3), family = binomial, data = df_starting)

knots6 <- quantile(df_starting$phd_cohort, probs = seq(0, 1, 0.2))[2:5]
test4 <- glm(start_pub ~ bSpline(phd_cohort, knots = knots6, df = 3), family = binomial, data = df_starting)

test5 <- glm(start_pub ~ ns(phd_cohort, knots = knots5, df = 3), family = binomial, data = df_starting)

test6 <- glm(start_pub ~ ns(phd_cohort, knots = knots6, df = 3), family = binomial, data = df_starting)
res <- AIC(test1, test2, test3, test4, test5, test6)
res[order(res$AIC), ]
#>       df      AIC
#> test4  8 81571.21
#> test3  7 81591.71
#> test6  6 81605.42
#> test5  5 81615.85
#> test1  4 81642.83
#> test2  4 81651.75

Let us visually inspect the top3.

df_starting %>%
    group_by(phd_cohort) %>%
    summarise(y = mean(as.numeric(start_pub))) -> df_starting2

nd <- data.frame(phd_cohort = c(0:29))
nd$y1 <- predict(test4, type = "response", newdata = nd)
nd$y2 <- predict(test3, type = "response", newdata = nd)
nd$y3 <- predict(test6, type = "response", newdata = nd)

ggplot() + geom_line(data = df_starting2, aes(x = phd_cohort, y = y)) + geom_point(data = df_starting2,
    aes(x = phd_cohort, y = y)) + geom_line(data = nd, aes(x = phd_cohort, y = y1), col = "red") + geom_point(data = nd,
    aes(x = phd_cohort, y = y1), col = "red") + geom_line(data = nd, aes(x = phd_cohort, y = y2), col = "blue") +
    geom_point(data = nd, aes(x = phd_cohort, y = y2), col = "blue") + geom_line(data = nd, aes(x = phd_cohort,
    y = y3), col = "purple") + geom_point(data = nd, aes(x = phd_cohort, y = y3), col = "purple")

They all more or less predict the trend. We go for the blue line, the following model

knots5 <- quantile(df_starting$phd_cohort, probs=seq(0,1,0.25))[2:4]
test3 <- glm(start_pub ~ bSpline(phd_cohort, knots=knots5,df=3), family = binomial, data = df_starting)

This would imply we go for the following model 3:

glm(start_pub ~ gender + ethnicity2 + uni + bSpline(phd_cohort, knots=knots5,df=3), data = df_starting, family = binomial)
LS0tDQp0aXRsZTogIkFwcGVuZGl4IDE6IHNwbGluZXMnIg0KZGF0ZTogIkxhc3QgY29tcGlsZWQgb24gYHIgZm9ybWF0KFN5cy50aW1lKCksICclQiwgJVknKWAiDQpvdXRwdXQ6IA0KICBodG1sX2RvY3VtZW50Og0KICAgIGNzczogdHdlYWtzLmNzcw0KICAgIHRvYzogIHRydWUNCiAgICB0b2NfZmxvYXQ6IHRydWUNCiAgICBudW1iZXJfc2VjdGlvbnM6IGZhbHNlDQogICAgY29kZV9mb2xkaW5nOiBzaG93DQogICAgY29kZV9kb3dubG9hZDogeWVzDQoNCi0tLQ0KDQoNCg0KYGBge3IsIGdsb2JhbHNldHRpbmdzLCBlY2hvPUZBTFNFLCB3YXJuaW5nPUZBTFNFLCByZXN1bHRzPSJoaWRlIn0NCg0KbGlicmFyeShrbml0cikNCm9wdHNfY2h1bmskc2V0KHRpZHkub3B0cz1saXN0KHdpZHRoLmN1dG9mZj0xMDApLHRpZHk9VFJVRSwgd2FybmluZyA9IEZBTFNFLCBtZXNzYWdlID0gRkFMU0UsY29tbWVudCA9ICIjPiIsIGNhY2hlPVRSVUUsIGNsYXNzLnNvdXJjZT1jKCJ0ZXN0IiksIGNsYXNzLm91dHB1dD1jKCJ0ZXN0MiIpLCBjYWNoZS5sYXp5ID0gRkFMU0UpDQpvcHRpb25zKHdpZHRoID0gMTAwKQ0KcmdsOjpzZXR1cEtuaXRyKCkNCg0KY29sb3JpemUgPC0gZnVuY3Rpb24oeCwgY29sb3IpIHtzcHJpbnRmKCI8c3BhbiBzdHlsZT0nY29sb3I6ICVzOyc+JXM8L3NwYW4+IiwgY29sb3IsIHgpIH0NCg0KYGBgDQoNCmBgYHtyIGtsaXBweSwgZWNobz1GQUxTRSwgaW5jbHVkZT1UUlVFLCBldmFsPVRSVUV9DQprbGlwcHk6OmtsaXBweShwb3NpdGlvbiA9IGMoJ3RvcCcsICdyaWdodCcpKQ0KI2tsaXBweTo6a2xpcHB5KGNvbG9yID0gJ2RhcmtyZWQnKQ0KI2tsaXBweTo6a2xpcHB5KHRvb2x0aXBfbWVzc2FnZSA9ICdDbGljayB0byBjb3B5JywgdG9vbHRpcF9zdWNjZXNzID0gJ0RvbmUnKQ0KYGBgDQoNCg0KLS0tLQ0KDQpUaGlzIGxhYiBqb3VybmFsIGRlbW9uc3RyYXRlcyBob3cgd2UgdHJlYXQgdGhlIGFzc29jaWF0aW9uIGJldHdlZW4gUGhEIGNvaG9ydCBhbmQgJ3N0YXJ0aW5nIHRvIHB1Ymxpc2gnIGZsZXhpYmx5LiANCiAgDQoNCi0tLS0NCg0KYGBge3IsIGVjaG89RkFMU0V9DQoNCnJtKGxpc3QgPSBscygpKQ0KDQpgYGANCg0KDQoNCiMgQ3VzdG9tIGZ1bmN0aW9ucw0KDQotIGBmcGFja2FnZS5jaGVja2A6IENoZWNrIGlmIHBhY2thZ2VzIGFyZSBpbnN0YWxsZWQgKGFuZCBpbnN0YWxsIGlmIG5vdCkgaW4gUiAoW3NvdXJjZV0oaHR0cHM6Ly92YmFsaWdhLmdpdGh1Yi5pby92ZXJpZnktdGhhdC1yLXBhY2thZ2VzLWFyZS1pbnN0YWxsZWQtYW5kLWxvYWRlZC8pKS4gIA0KDQoNCmBgYHtyLCByZXN1bHRzPSdoaWRlJ30NCg0KZnBhY2thZ2UuY2hlY2sgPC0gZnVuY3Rpb24ocGFja2FnZXMpIHsNCiAgbGFwcGx5KHBhY2thZ2VzLCBGVU4gPSBmdW5jdGlvbih4KSB7DQogICAgaWYgKCFyZXF1aXJlKHgsIGNoYXJhY3Rlci5vbmx5ID0gVFJVRSkpIHsNCiAgICAgIGluc3RhbGwucGFja2FnZXMoeCwgZGVwZW5kZW5jaWVzID0gVFJVRSkNCiAgICAgIGxpYnJhcnkoeCwgY2hhcmFjdGVyLm9ubHkgPSBUUlVFKQ0KICAgIH0NCiAgfSkNCn0NCg0KDQpgYGANCg0KDQotLS0gIA0KDQojIFBhY2thZ2VzDQoNCmBgYHtyLCByZXN1bHRzPSdoaWRlJ30NCg0KcGFja2FnZXMgPSBjKCJ0aWR5dmVyc2UiLCJnZ3Bsb3QyIiwgImdncHViciIsIlJDb2xvckJyZXdlciIsICJtYXJnaW5zIiwgInNwbGluZXMiLCAic3BsaW5lczIiLCAiYm9vdCIsICJrYWJsZUV4dHJhIiwgIkRlc2NUb29scyIpDQoNCmZwYWNrYWdlLmNoZWNrKHBhY2thZ2VzKQ0KDQpgYGANCg0KDQotLS0gDQoNCiMgSW5wdXQNCg0KDQoNCldlIHVzZSBvbmUgcHJvY2Vzc2VkIGRhdGFzZXQuDQoNCiogW2RmX3N0YXJ0aW5nLnJkYV0oaHR0cHM6Ly9naXRodWIuY29tL2FtbXVsZGVycy9hbWF0dGVyb2Z0aW1lL2RhdGEvcHJvY2Vzc2VkL2RmX3N0YXJ0aW5nLnJkYSk6IGRhdGFzZXQgb2YgUGhEcyB3aXRoIGFsbCByZWxldmFudCB2YXJpYWJsZXM6IGdlbmRlciArIGV0aG5pY2l0eSArIHVuaXZlcnNpdHkgKyBQaEQgeWVhciAgDQogICAgLSBGb3IgY29uc3RydWN0aW9uIG9mIHRoaXMgZGF0YXNldCBzZWUgW0RlcGVuZGVudCBWYXJpYWJsZXMgOiBTdGFydGluZyBhbmQgU3RvcHBpbmcgdG8gUHVibGlzaF0oZGF0YXByZXBhcmF0aW9uLmh0bWwpDQogICAgLSBuYW1lIG9mIGRhdGFzZXQ6IGBkZl9zdGFydGluZ2AgDQogICAgDQoNCmBgYHtyIGRhdGF9DQoNCmxvYWQoZmlsZSA9ICIuL2RhdGEvcHJvY2Vzc2VkL2RmX3N0YXJ0aW5nLnJkYSIpDQoNCmBgYA0KDQoNCi0tLS0NCg0KR2l2ZW4gdGhhdCB0aGUgcHJvYmFiaWxpdHkgdG8gc3RhcnQgcHVibGlzaGluZyBhcHBlYXJzIHRvIGRlcGVuZCBub24tbGluZWFybHkgb24gUGhEIGNvaG9ydCAoc2VlIGJlbG93KSwgd2UgYXR0ZW1wdCB0byBkZWFsIHdpdGggdGhlIGVmZmVjdCBvZiBjb2hvcnQgYSBiaXQgbW9yZSBmbGV4aWJseS4gVG8gZG8gc28sIHdlIHVzZXMgc3BsaW5lcy4gDQoNCiFbZmlndXJlM10oLi9vdXRwdXQvc3RhcnRpbmcvcGxvdDMuanBnKQ0KDQohW2ZpZ3VyZTRdKC4vb3V0cHV0L3N0YXJ0aW5nL3Bsb3Q0LmpwZykNCg0KDQojIE1vZGVsIDM6IEdlbmRlciwgZXRobmljaXR5IGFuZCBjb250cm9scw0KDQojIyAoTm9uLSlsaW5lYXIgZWZmZWN0cyBvZiBQaEQgY29ob3J0DQoNCldlIHVzZSBzcGxpbmVzIGluc3RlYWQgb2YgcG9seW5vbWlhbHMsIGJlY2F1c2UgY3ViaWMgZWZmZWN0cyBvZiBjb2hvcnQgb24gJ3N0YXJ0aW5nIHRvIHB1Ymxpc2gnIGRvIG5vdCBjYXB0dXJlIHRoZSB0cmVuZCB2ZXJ5IGFjY3VyYXRlbHksIGJ1dCB1c2luZyBoaWdoZXIgb3JkZXIgcG9seW5vbWlhbHMgaXMgbm90IGFkdmlzYWJsZS4gVGhlcmVmb3JlLCB3ZSBpbnZlc3RpZ2F0ZSB0aGUgdXNlIG9mIHNwbGluZXMgd2l0aCBzZXZlcmFsIGtub3RzLCBkZXRlcm1pbmluZyB0aGUgc3BsaW5lcyBiYXNlZCBvbiBhIHNwbGluZS1vbmx5IG1vZGVsLiANCg0KYGBge3J9DQoNCiNubyBub2Rlcw0KdGVzdDEgPC0gZ2xtKHN0YXJ0X3B1YiB+IG5zKHBoZF9jb2hvcnQsIDMpLCBmYW1pbHkgPSBiaW5vbWlhbCwgZGF0YSA9IGRmX3N0YXJ0aW5nKQ0KDQp0ZXN0MiA8LSBnbG0oc3RhcnRfcHViIH4gIGJTcGxpbmUocGhkX2NvaG9ydCwgZGY9MyksIGZhbWlseSA9IGJpbm9taWFsLCBkYXRhID0gZGZfc3RhcnRpbmcpDQoNCmtub3RzNSA8LSBxdWFudGlsZShkZl9zdGFydGluZyRwaGRfY29ob3J0LCBwcm9icz1zZXEoMCwxLDAuMjUpKVsyOjRdDQp0ZXN0MyA8LSBnbG0oc3RhcnRfcHViIH4gYlNwbGluZShwaGRfY29ob3J0LCBrbm90cz1rbm90czUsZGY9MyksIGZhbWlseSA9IGJpbm9taWFsLCBkYXRhID0gZGZfc3RhcnRpbmcpDQoNCmtub3RzNiA8LSBxdWFudGlsZShkZl9zdGFydGluZyRwaGRfY29ob3J0LCBwcm9icz1zZXEoMCwxLDAuMjApKVsyOjVdDQp0ZXN0NCA8LSBnbG0oc3RhcnRfcHViIH4gYlNwbGluZShwaGRfY29ob3J0LCBrbm90cz1rbm90czYsZGY9MyksIGZhbWlseSA9IGJpbm9taWFsLCBkYXRhID0gZGZfc3RhcnRpbmcpDQoNCnRlc3Q1IDwtIGdsbShzdGFydF9wdWIgfiBucyhwaGRfY29ob3J0LCBrbm90cz1rbm90czUsZGY9MyksIGZhbWlseSA9IGJpbm9taWFsLCBkYXRhID0gZGZfc3RhcnRpbmcpDQoNCnRlc3Q2IDwtIGdsbShzdGFydF9wdWIgfiBucyhwaGRfY29ob3J0LCBrbm90cz1rbm90czYsZGY9MyksIGZhbWlseSA9IGJpbm9taWFsLCBkYXRhID0gZGZfc3RhcnRpbmcpDQoNCmBgYA0KDQoNCmBgYHtyfQ0KcmVzIDwtIEFJQyh0ZXN0MSwgdGVzdDIsIHRlc3QzLCB0ZXN0NCwgdGVzdDUsIHRlc3Q2KQ0KcmVzW29yZGVyKHJlcyRBSUMpLF0NCmBgYA0KDQoNCkxldCB1cyB2aXN1YWxseSBpbnNwZWN0IHRoZSB0b3AzLiANCmBgYHtyfQ0KDQpkZl9zdGFydGluZyAlPiUgDQogIGdyb3VwX2J5KHBoZF9jb2hvcnQpICU+JSANCiAgc3VtbWFyaXNlKHkgPSBtZWFuKGFzLm51bWVyaWMoc3RhcnRfcHViKSkpIC0+IGRmX3N0YXJ0aW5nMg0KDQpuZCA8LSBkYXRhLmZyYW1lKHBoZF9jb2hvcnQ9YygwOjI5KSkNCm5kJHkxIDwtIHByZWRpY3QodGVzdDQsIHR5cGU9InJlc3BvbnNlIiwgbmV3ZGF0YSA9IG5kKQ0KbmQkeTIgPC0gcHJlZGljdCh0ZXN0MywgdHlwZT0icmVzcG9uc2UiLCBuZXdkYXRhID0gbmQpDQpuZCR5MyA8LSBwcmVkaWN0KHRlc3Q2LCB0eXBlPSJyZXNwb25zZSIsIG5ld2RhdGEgPSBuZCkNCg0KZ2dwbG90KCkgKw0KICBnZW9tX2xpbmUoZGF0YT1kZl9zdGFydGluZzIsIGFlcyh4PXBoZF9jb2hvcnQsIHk9eSkpICsNCiAgZ2VvbV9wb2ludChkYXRhPWRmX3N0YXJ0aW5nMiwgYWVzKHg9cGhkX2NvaG9ydCwgeT15KSkgKw0KICBnZW9tX2xpbmUoZGF0YT1uZCwgYWVzKHg9cGhkX2NvaG9ydCwgeT15MSksIGNvbD0icmVkIikgKw0KICBnZW9tX3BvaW50KGRhdGE9bmQsIGFlcyh4PXBoZF9jb2hvcnQsIHk9eTEpLCBjb2w9InJlZCIpICsgDQogIGdlb21fbGluZShkYXRhPW5kLCBhZXMoeD1waGRfY29ob3J0LCB5PXkyKSwgY29sPSJibHVlIikgKw0KICBnZW9tX3BvaW50KGRhdGE9bmQsIGFlcyh4PXBoZF9jb2hvcnQsIHk9eTIpLCBjb2w9ImJsdWUiKSArDQogIGdlb21fbGluZShkYXRhPW5kLCBhZXMoeD1waGRfY29ob3J0LCB5PXkzKSwgY29sPSJwdXJwbGUiKSArDQogIGdlb21fcG9pbnQoZGF0YT1uZCwgYWVzKHg9cGhkX2NvaG9ydCwgeT15MyksIGNvbD0icHVycGxlIikNCiAgDQpgYGANCg0KVGhleSBhbGwgbW9yZSBvciBsZXNzIHByZWRpY3QgdGhlIHRyZW5kLiBXZSBnbyBmb3IgdGhlIGJsdWUgbGluZSwgdGhlIGZvbGxvd2luZyBtb2RlbA0KDQpgYGANCmtub3RzNSA8LSBxdWFudGlsZShkZl9zdGFydGluZyRwaGRfY29ob3J0LCBwcm9icz1zZXEoMCwxLDAuMjUpKVsyOjRdDQp0ZXN0MyA8LSBnbG0oc3RhcnRfcHViIH4gYlNwbGluZShwaGRfY29ob3J0LCBrbm90cz1rbm90czUsZGY9MyksIGZhbWlseSA9IGJpbm9taWFsLCBkYXRhID0gZGZfc3RhcnRpbmcpDQpgYGANCg0KDQpUaGlzIHdvdWxkIGltcGx5IHdlIGdvIGZvciB0aGUgZm9sbG93aW5nIG1vZGVsIDM6DQoNCmBgYA0KZ2xtKHN0YXJ0X3B1YiB+IGdlbmRlciArIGV0aG5pY2l0eTIgKyB1bmkgKyBiU3BsaW5lKHBoZF9jb2hvcnQsIGtub3RzPWtub3RzNSxkZj0zKSwgZGF0YSA9IGRmX3N0YXJ0aW5nLCBmYW1pbHkgPSBiaW5vbWlhbCkNCg0KYGBgDQoNCg==


Copyright © 2023