Title: | Early Growth Genetics Longitudinal Analysis |
---|---|
Description: | Tools for longitudinal analysis within the EGG (Early Growth Genetics) Consortium (<http://egg-consortium.org/>). |
Authors: | Mickaël Canouil [aut, cre] , Nicole Warrington [aut] , Kimberley Burrows [aut] , Anni Heiskala [aut] |
Maintainer: | Mickaël Canouil <[email protected]> |
License: | MIT + file LICENSE |
Version: | 1.0.3 |
Built: | 2024-11-11 15:19:38 UTC |
Source: | https://github.com/mcanouil/eggla |
A dataset containing the age, sex, weight, height and BMI for 100 individuals. Measurements performed from birth to 17 years old.
bmigrowth
bmigrowth
A data frame with 1050 rows and 6 variables:
(character) ID using three digits.
(numeric) age in years.
(integer) sex with 1: male and 0: female.
(numeric) weight in kilograms.
(integer) height in centimetres.
(numeric) Body Mass Index in kilograms per quare metre.
Compute adiposity peak (AP) and adiposity rebound (AR).
compute_apar( fit, from = c("predicted", "observed"), start = 0.25, end = 10, step = 0.01, filter = NULL )
compute_apar( fit, from = c("predicted", "observed"), start = 0.25, end = 10, step = 0.01, filter = NULL )
fit |
A model object from a statistical model
such as from a call |
from |
A string indicating the type of data to be used for the AP and AR computation, either "predicted" or "observed". Default is "predicted". |
start |
The start of the time window to compute AP and AR. |
end |
The end of the time window to compute AP and AR. |
step |
The step to increment the sequence. |
filter |
A string following |
A data.table
object.
library(eggla) data("bmigrowth") res <- egg_model( formula = log(bmi) ~ age, data = bmigrowth[bmigrowth[["sex"]] == 0, ], id_var = "ID", random_complexity = 1 ) head(compute_apar(fit = res, from = "predicted")[AP | AR]) # Comparing observed and predicted values library(data.table) library(ggplot2) library(patchwork) list_gg <- melt( data = rbindlist( l = lapply( X = (function(.x) `names<-`(.x, .x))(c("predicted", "observed")), FUN = compute_apar, fit = res ), idcol = "from" )[ AP | AR ][ j = what := fifelse(paste(AP, AR) %in% paste(FALSE, TRUE), "AR", "AP") ], id.vars = c("from", "egg_id", "what"), measure.vars = c("egg_ageyears", "egg_bmi") )[ j = list(gg = list({ dt <- dcast(data = .SD, formula = egg_id + what ~ from) range_xy <- range(dt[, c("observed", "predicted")], na.rm = TRUE) ggplot(data = dt) + aes(x = observed, y = predicted, colour = what) + geom_abline(intercept = 0, slope = 1) + geom_segment(aes(xend = observed, yend = observed), alpha = 0.5) + geom_point() + scale_colour_manual(values = c("#E69F00FF", "#56B4E9FF")) + labs( x = sprintf("Observed: %s", sub(".*_", "", toupper(variable))), y = sprintf("Predicted: %s", sub(".*_", "", toupper(variable))), colour = NULL, title = sub(".*_", "", toupper(variable)) ) + coord_cartesian(xlim = range_xy, ylim = range_xy) })), by = "variable" ] wrap_plots(list_gg[["gg"]], guides = "collect")
library(eggla) data("bmigrowth") res <- egg_model( formula = log(bmi) ~ age, data = bmigrowth[bmigrowth[["sex"]] == 0, ], id_var = "ID", random_complexity = 1 ) head(compute_apar(fit = res, from = "predicted")[AP | AR]) # Comparing observed and predicted values library(data.table) library(ggplot2) library(patchwork) list_gg <- melt( data = rbindlist( l = lapply( X = (function(.x) `names<-`(.x, .x))(c("predicted", "observed")), FUN = compute_apar, fit = res ), idcol = "from" )[ AP | AR ][ j = what := fifelse(paste(AP, AR) %in% paste(FALSE, TRUE), "AR", "AP") ], id.vars = c("from", "egg_id", "what"), measure.vars = c("egg_ageyears", "egg_bmi") )[ j = list(gg = list({ dt <- dcast(data = .SD, formula = egg_id + what ~ from) range_xy <- range(dt[, c("observed", "predicted")], na.rm = TRUE) ggplot(data = dt) + aes(x = observed, y = predicted, colour = what) + geom_abline(intercept = 0, slope = 1) + geom_segment(aes(xend = observed, yend = observed), alpha = 0.5) + geom_point() + scale_colour_manual(values = c("#E69F00FF", "#56B4E9FF")) + labs( x = sprintf("Observed: %s", sub(".*_", "", toupper(variable))), y = sprintf("Predicted: %s", sub(".*_", "", toupper(variable))), colour = NULL, title = sub(".*_", "", toupper(variable)) ) + coord_cartesian(xlim = range_xy, ylim = range_xy) })), by = "variable" ] wrap_plots(list_gg[["gg"]], guides = "collect")
time_model()
.Compute area under the curves for "clubic slope", "linear splines" and "cubic splines"
fitted using time_model()
.
compute_aucs( fit, method, period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17), knots = list(cubic_slope = NULL, linear_splines = c(0.75, 5.5, 11), cubic_splines = c(1, 8, 12))[[method]] )
compute_aucs( fit, method, period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17), knots = list(cubic_slope = NULL, linear_splines = c(0.75, 5.5, 11), cubic_splines = c(1, 8, 12))[[method]] )
fit |
A model object from a statistical model such as
from a call to |
method |
The type of model provided in |
period |
The intervals knots on which AUCs are to be computed. |
knots |
The knots as defined |
A data.frame
with AUC for each individuals/samples.
data("bmigrowth") ls_mod <- time_model( x = "age", y = "log(bmi)", cov = NULL, data = bmigrowth[bmigrowth[["sex"]] == 0, ], method = "linear_splines" ) head(compute_aucs( fit = ls_mod, method = "linear_splines", period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17)#, # knots = list( # "cubic_slope" = NULL, # "linear_splines" = c(0.75, 5.5, 11), # "cubic_splines" = c(1, 8, 12) # )[[method]] ))
data("bmigrowth") ls_mod <- time_model( x = "age", y = "log(bmi)", cov = NULL, data = bmigrowth[bmigrowth[["sex"]] == 0, ], method = "linear_splines" ) head(compute_aucs( fit = ls_mod, method = "linear_splines", period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17)#, # knots = list( # "cubic_slope" = NULL, # "linear_splines" = c(0.75, 5.5, 11), # "cubic_splines" = c(1, 8, 12) # )[[method]] ))
time_model()
.Based on computed area under the curves (i.e., compute_aucs()
)
and slopes (i.e., compute_slopes()
) for several intervals using
a model fitted by time_model()
, compute the correlations between
each intervals derived parameters.
compute_correlations( fit, method, period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17), knots = list(cubic_slope = NULL, linear_splines = c(0.75, 5.5, 11), cubic_splines = c(1, 8, 12))[[method]] )
compute_correlations( fit, method, period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17), knots = list(cubic_slope = NULL, linear_splines = c(0.75, 5.5, 11), cubic_splines = c(1, 8, 12))[[method]] )
fit |
A model object from a statistical model such as
from a call to |
method |
The type of model provided in |
period |
The intervals knots on which AUCs are to be computed. |
knots |
The knots as defined |
A list
object with correlations between each intervals derived parameters.
data("bmigrowth") ls_mod <- time_model( x = "age", y = "log(bmi)", cov = NULL, data = bmigrowth[bmigrowth[["sex"]] == 0, ], method = "linear_splines" ) compute_correlations( fit = ls_mod, method = "linear_splines", period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17)#, # knots = list( # "cubic_slope" = NULL, # "linear_splines" = c(0.75, 5.5, 11), # "cubic_splines" = c(1, 8, 12) # )[[method]] )
data("bmigrowth") ls_mod <- time_model( x = "age", y = "log(bmi)", cov = NULL, data = bmigrowth[bmigrowth[["sex"]] == 0, ], method = "linear_splines" ) compute_correlations( fit = ls_mod, method = "linear_splines", period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17)#, # knots = list( # "cubic_slope" = NULL, # "linear_splines" = c(0.75, 5.5, 11), # "cubic_splines" = c(1, 8, 12) # )[[method]] )
time_model()
.Based on computed area under the curves (i.e., compute_aucs()
)
and slopes (i.e., compute_slopes()
) for several intervals using
a model fitted by time_model()
, compute an outlier detection.
For details, see methods iqr
and zscore
of performance::check_outliers()
.
compute_outliers( fit, method, period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17), knots = list(cubic_slope = NULL, linear_splines = c(0.75, 5.5, 11), cubic_splines = c(1, 8, 12))[[method]], from = c("predicted", "observed"), start = 0.25, end = 10, step = 0.01, filter = NULL, outlier_method = "iqr", outlier_threshold = list(iqr = 2) )
compute_outliers( fit, method, period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17), knots = list(cubic_slope = NULL, linear_splines = c(0.75, 5.5, 11), cubic_splines = c(1, 8, 12))[[method]], from = c("predicted", "observed"), start = 0.25, end = 10, step = 0.01, filter = NULL, outlier_method = "iqr", outlier_threshold = list(iqr = 2) )
fit |
A model object from a statistical model such as
from a call to |
method |
The type of model provided in |
period |
The intervals knots on which AUCs are to be computed. |
knots |
The knots as defined |
from |
A string indicating the type of data to be used for the AP and AR computation, either "predicted" or "observed". Default is "predicted". |
start |
The start of the time window to compute AP and AR. |
end |
The end of the time window to compute AP and AR. |
step |
The step to increment the sequence. |
filter |
A string following |
outlier_method |
The outlier detection method(s). Default is |
outlier_threshold |
A list containing the threshold values for each method (e.g.,
|
A data.frame
listing the individuals which are not outliers based on several criteria.
data("bmigrowth") ls_mod <- time_model( x = "age", y = "log(bmi)", cov = NULL, data = bmigrowth[bmigrowth[["sex"]] == 0, ], method = "cubic_splines" ) head(compute_outliers( fit = ls_mod, method = "cubic_splines", period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17)#, # knots = list( # "cubic_slope" = NULL, # "linear_splines" = c(0.75, 5.5, 11), # "cubic_splines" = c(1, 8, 12) # )[[method]] )[Outlier != 0])
data("bmigrowth") ls_mod <- time_model( x = "age", y = "log(bmi)", cov = NULL, data = bmigrowth[bmigrowth[["sex"]] == 0, ], method = "cubic_splines" ) head(compute_outliers( fit = ls_mod, method = "cubic_splines", period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17)#, # knots = list( # "cubic_slope" = NULL, # "linear_splines" = c(0.75, 5.5, 11), # "cubic_splines" = c(1, 8, 12) # )[[method]] )[Outlier != 0])
time_model()
.Comoute average slopes for "clubic slope", "linear splines" and "cubic splines"
fitted using time_model()
.
compute_slopes( fit, method, period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17), knots = list(cubic_slope = NULL, linear_splines = c(0.75, 5.5, 11), cubic_splines = c(1, 8, 12))[[method]] )
compute_slopes( fit, method, period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17), knots = list(cubic_slope = NULL, linear_splines = c(0.75, 5.5, 11), cubic_splines = c(1, 8, 12))[[method]] )
fit |
A model object from a statistical model such as from a call to |
method |
The type of model provided in |
period |
The intervals knots on which slopes are to be computed. |
knots |
The knots as defined |
A data.frame
with slopes for each individuals/samples.
data("bmigrowth") ls_mod <- time_model( x = "age", y = "log(bmi)", cov = NULL, data = bmigrowth[bmigrowth[["sex"]] == 0, ], method = "linear_splines" ) head(compute_slopes( fit = ls_mod, method = "linear_splines", period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17)#, # knots = list( # "cubic_slope" = NULL, # "linear_splines" = c(0.75, 5.5, 11), # "cubic_splines" = c(1, 8, 12) # )[[method]] ))
data("bmigrowth") ls_mod <- time_model( x = "age", y = "log(bmi)", cov = NULL, data = bmigrowth[bmigrowth[["sex"]] == 0, ], method = "linear_splines" ) head(compute_slopes( fit = ls_mod, method = "linear_splines", period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17)#, # knots = list( # "cubic_slope" = NULL, # "linear_splines" = c(0.75, 5.5, 11), # "cubic_splines" = c(1, 8, 12) # )[[method]] ))
egg_model()
.Derived areas under the curve (AUCs) for differentintervals based
on a fitted cubic splines mixed-effects model from egg_model()
.
This function is a specific version of compute_aucs
designed to work specifically on egg_model()
.
egg_aucs( fit, period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17), knots = c(1, 8, 12) )
egg_aucs( fit, period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17), knots = c(1, 8, 12) )
fit |
A model object from a statistical model
such as from a call to |
period |
The intervals knots on which AUCs are to be computed. |
knots |
The knots as defined |
A data.frame
with AUC for each individuals/samples.
data("bmigrowth") res <- egg_model( formula = log(bmi) ~ age, data = bmigrowth[bmigrowth[["sex"]] == 0, ], id_var = "ID", random_complexity = 1 ) head( egg_aucs( fit = res, period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17), knots = c(1, 8, 12) ) )
data("bmigrowth") res <- egg_model( formula = log(bmi) ~ age, data = bmigrowth[bmigrowth[["sex"]] == 0, ], id_var = "ID", random_complexity = 1 ) head( egg_aucs( fit = res, period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17), knots = c(1, 8, 12) ) )
egg_model()
.Based on computed area under the curves (i.e., egg_aucs()
)
and slopes (i.e., egg_slopes()
) for several intervals using
a model fitted by egg_model()
, compute the correlations between
each intervals derived parameters.
egg_correlations( fit, period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17), knots = c(1, 8, 12), start = 0.25, end = 10, step = 0.01, filter = NULL )
egg_correlations( fit, period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17), knots = c(1, 8, 12), start = 0.25, end = 10, step = 0.01, filter = NULL )
fit |
A model object from a statistical model
such as from a call to |
period |
The intervals knots on which slopes are to be computed. |
knots |
The knots as defined |
start |
The start of the time window to compute AP and AR. |
end |
The end of the time window to compute AP and AR. |
step |
The step to increment the sequence. |
filter |
A string following |
A data.table
object with correlations between each intervals derived parameters.
data("bmigrowth") res <- egg_model( formula = log(bmi) ~ age, data = bmigrowth[bmigrowth[["sex"]] == 0, ], id_var = "ID", random_complexity = 1 ) egg_correlations( fit = res, period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17), knots = c(1, 8, 12) )
data("bmigrowth") res <- egg_model( formula = log(bmi) ~ age, data = bmigrowth[bmigrowth[["sex"]] == 0, ], id_var = "ID", random_complexity = 1 ) egg_correlations( fit = res, period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17), knots = c(1, 8, 12) )
Fit a cubic splines mixed model regression
with three splines parametrisation as random effect.
This function is a specific version of time_model()
.
egg_model( formula, data, id_var, random_complexity = "auto", use_car1 = FALSE, knots = c(1, 8, 12), quiet = FALSE )
egg_model( formula, data, id_var, random_complexity = "auto", use_car1 = FALSE, knots = c(1, 8, 12), quiet = FALSE )
formula |
An object of class "formula": a symbolic description of the model to be fitted with, time component as the first term in the right-hand side. |
data |
A data.frame containing the variables defined in formula. |
id_var |
A string indicating the name of the variable to be used as the individual identifier. |
random_complexity |
A numeric (1-3) indicating the complexity of the random effect term.
Default, |
use_car1 |
A logical indicating whether to use continuous auto-correlation, i.e., CAR(1) as correlation structure. |
knots |
The knots defining the splines. |
quiet |
A logical indicating whether to suppress the output. |
An object of class "lme" representing the linear mixed-effects model fit.
data("bmigrowth") res <- egg_model( formula = log(bmi) ~ age, data = bmigrowth[bmigrowth[["sex"]] == 0, ], id_var = "ID", random_complexity = 1 ) sres <- as.data.frame(summary(res)[["tTable"]]) rownames(sres) <- sub("gsp\\(.*\\)\\)", "gsp(...)", rownames(sres)) sres
data("bmigrowth") res <- egg_model( formula = log(bmi) ~ age, data = bmigrowth[bmigrowth[["sex"]] == 0, ], id_var = "ID", random_complexity = 1 ) sres <- as.data.frame(summary(res)[["tTable"]]) rownames(sres) <- sub("gsp\\(.*\\)\\)", "gsp(...)", rownames(sres)) sres
egg_model()
.Based on computed area under the curves (i.e., egg_aucs()
)
and slopes (i.e., egg_slopes()
) for several intervals using
a model fitted by egg_model()
, compute an outlier detection.
For details, see methods iqr
and zscore
of performance::check_outliers()
.
egg_outliers( fit, period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17), knots = c(1, 8, 12), from = c("predicted", "observed"), start = 0.25, end = 10, step = 0.01, filter = NULL, outlier_method = "iqr", outlier_threshold = list(iqr = 2) )
egg_outliers( fit, period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17), knots = c(1, 8, 12), from = c("predicted", "observed"), start = 0.25, end = 10, step = 0.01, filter = NULL, outlier_method = "iqr", outlier_threshold = list(iqr = 2) )
fit |
A model object from a statistical model
such as from a call to |
period |
The intervals knots on which slopes are to be computed. |
knots |
The knots as defined |
from |
A string indicating the type of data to be used for the AP and AR computation, either "predicted" or "observed". Default is "predicted". |
start |
The start of the time window to compute AP and AR. |
end |
The end of the time window to compute AP and AR. |
step |
The step to increment the sequence. |
filter |
A string following |
outlier_method |
The outlier detection method(s). Default is |
outlier_threshold |
A list containing the threshold values for each method (e.g.,
|
A data.frame
listing the individuals which are not outliers based on several criteria.
data("bmigrowth") res <- egg_model( formula = log(bmi) ~ age, data = bmigrowth[bmigrowth[["sex"]] == 0, ], id_var = "ID", random_complexity = 1 ) head(egg_outliers( fit = res, period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17), knots = c(1, 8, 12) )[Outlier != 0])
data("bmigrowth") res <- egg_model( formula = log(bmi) ~ age, data = bmigrowth[bmigrowth[["sex"]] == 0, ], id_var = "ID", random_complexity = 1 ) head(egg_outliers( fit = res, period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17), knots = c(1, 8, 12) )[Outlier != 0])
egg_model()
.Derived slopes for different intervals based on a fitted
cubic splines mixed-effects model from egg_model()
.
This function a specific version of compute_slopes
designed to work specifically on egg_model()
.
egg_slopes( fit, period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17), knots = c(1, 8, 12) )
egg_slopes( fit, period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17), knots = c(1, 8, 12) )
fit |
A model object from a statistical model
such as from a call to |
period |
The intervals knots on which slopes are to be computed. |
knots |
The knots as defined |
A data.frame
with slopes for each individuals/samples.
data("bmigrowth") res <- egg_model( formula = log(bmi) ~ age, data = bmigrowth[bmigrowth[["sex"]] == 0, ], id_var = "ID", random_complexity = 1 ) head( egg_slopes( fit = res, period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17), knots = c(1, 8, 12) ) )
data("bmigrowth") res <- egg_model( formula = log(bmi) ~ age, data = bmigrowth[bmigrowth[["sex"]] == 0, ], id_var = "ID", random_complexity = 1 ) head( egg_slopes( fit = res, period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17), knots = c(1, 8, 12) ) )
From https://github.com/gmonette/spida2 because namespace and dependencies are not properly listed. Source: https://github.com/gmonette/spida2/blob/master/R/gsp.R, https://github.com/gmonette/spida2/blob/master/R/gsp_util.R
gsp( x, knots, degree = 3, smoothness = pmax(pmin(degree[-1], degree[-length(degree)]) - 1, -1), lin = NULL, periodic = FALSE, intercept = 0, signif = 3 )
gsp( x, knots, degree = 3, smoothness = pmax(pmin(degree[-1], degree[-length(degree)]) - 1, -1), lin = NULL, periodic = FALSE, intercept = 0, signif = 3 )
x |
value(s) where spline is evaluated. |
knots |
vector of knots. |
degree |
vector giving the degree of the spline in each interval. Note
the number of intervals is equal to the number of knots + 1. A value of 0
corresponds to a constant in the interval. If the spline should evaluate to
0 in the interval, use the |
smoothness |
vector with the degree of smoothness at each knot
(0 = continuity, 1 = smoothness with continuous first derivative, 2 = continuous
second derivative, etc. The value -1 allows a discontinuity at the knot.
A scalar is recycled so its length equals the number of knots. Alternatively,
a list of length equal to the number of knots. Each element of the list is a
vector of the orders of derivatives which are required to be smooth. THis allows
non-sequential constraints, e.g., to have the same first and second derivative
on either side of a knot but a possible discontinuity and change in
higher-order derivatives, the vector would be c(1,2). Note that if a list is used,
all elements must provide all desired constraints. That is the list argument corresponding
to |
lin |
provides a matrix specifying additional linear contraints on the 'full' parametrization consisting of blocks of polynomials of degree equal to max(degree) in each of the length(knots)+1 intervals of the spline. See below for examples of a spline that is 0 outside of its boundary knots. |
periodic |
if TRUE generates a period spline on the base interval (0,max(knots)). A constraint is generated so that the coefficients generate the same values to the right of max(knots) as they do to the right of 0. Note that all knots should be strictly positive. |
intercept |
value(s) of x at which the spline has value 0, i.e., the value(s) of x for which yhat is estimated by the intercept term in the model. The default is 0. If NULL, the spline is not constrained to evaluate to 0 for any x. |
signif |
number of significant digits used to label coefficients. |
gsp
returns a matrix generating a spline.
Monette, G. [email protected]
simd <- data.frame( age = rep(1:50, 2), y = sin(2 * pi * (1:100) / 5) + rnorm(100), G = rep(c("male", "female"), c(50, 50)) ) sp <- function(x) { gsp(x, knots = c(10, 25, 40), degree = c(1, 2, 2, 1), smoothness = c(1, 1 ,1)) } summary(lm(formula = y ~ sp(age) * G, data = simd))
simd <- data.frame( age = rep(1:50, 2), y = sin(2 * pi * (1:100) / 5) + rnorm(100), G = rep(c("male", "female"), c(50, 50)) ) sp <- function(x) { gsp(x, knots = c(10, 25, 40), degree = c(1, 2, 2, 1), smoothness = c(1, 1 ,1)) } summary(lm(formula = y ~ sp(age) * G, data = simd))
time_model()
.Plot derived area under the curves from a model fitted by time_model()
.
plot_aucs( fit, method, period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17), knots = list(cubic_slope = NULL, linear_splines = c(0.75, 5.5, 11), cubic_splines = c(1, 8, 12))[[method]] )
plot_aucs( fit, method, period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17), knots = list(cubic_slope = NULL, linear_splines = c(0.75, 5.5, 11), cubic_splines = c(1, 8, 12))[[method]] )
fit |
A model object from a statistical model such as
from a call to |
method |
The type of model provided in |
period |
The intervals knots on which AUCs are to be computed. |
knots |
The knots as defined |
A patchwork
ggplot2
object.
library(ggplot2) library(eggla) data("bmigrowth") ls_mod <- time_model( x = "age", y = "log(bmi)", cov = NULL, data = bmigrowth[bmigrowth[["sex"]] == 0, ], method = "linear_splines" ) plot_aucs( fit = ls_mod, method = "linear_splines" )
library(ggplot2) library(eggla) data("bmigrowth") ls_mod <- time_model( x = "age", y = "log(bmi)", cov = NULL, data = bmigrowth[bmigrowth[["sex"]] == 0, ], method = "linear_splines" ) plot_aucs( fit = ls_mod, method = "linear_splines" )
egg_model()
.Plot derived area under the curves from a model fitted by egg_model()
.
plot_egg_aucs( fit, period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17), knots = c(1, 8, 12) )
plot_egg_aucs( fit, period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17), knots = c(1, 8, 12) )
fit |
A model object from a statistical model
such as from a call to |
period |
The intervals knots on which AUCs are to be computed. |
knots |
The knots as defined |
A patchwork
ggplot2
object.
library(ggplot2) library(eggla) data("bmigrowth") res <- egg_model( formula = log(bmi) ~ age, data = bmigrowth[bmigrowth[["sex"]] == 0, ], id_var = "ID", random_complexity = 1 ) plot_egg_aucs( fit = res, period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17) )
library(ggplot2) library(eggla) data("bmigrowth") res <- egg_model( formula = log(bmi) ~ age, data = bmigrowth[bmigrowth[["sex"]] == 0, ], id_var = "ID", random_complexity = 1 ) plot_egg_aucs( fit = res, period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17) )
egg_model()
.Plot derived slopes from a model fitted by egg_model()
.
plot_egg_slopes( fit, period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17), knots = c(1, 8, 12) )
plot_egg_slopes( fit, period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17), knots = c(1, 8, 12) )
fit |
A model object from a statistical model
such as from a call to |
period |
The intervals knots on which slopes are to be computed. |
knots |
The knots as defined |
A patchwork
ggplot2
object.
library(ggplot2) library(eggla) data("bmigrowth") res <- egg_model( formula = log(bmi) ~ age, data = bmigrowth[bmigrowth[["sex"]] == 0, ], id_var = "ID", random_complexity = 1 ) plot_egg_slopes( fit = res, period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17) )
library(ggplot2) library(eggla) data("bmigrowth") res <- egg_model( formula = log(bmi) ~ age, data = bmigrowth[bmigrowth[["sex"]] == 0, ], id_var = "ID", random_complexity = 1 ) plot_egg_slopes( fit = res, period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17) )
Plot several residuals plots for diagnostics.
plot_residuals(x, y, fit)
plot_residuals(x, y, fit)
x |
A length one character vector with the main covariate name
(i.e., right-hand side), as defined in |
y |
A length one character vector with the variable name to be explained
(i.e., left-hand side), as defined in |
fit |
A model object from a statistical model
such as from a call |
A patchwork
ggplot2
object.
library(ggplot2) library(patchwork) library(eggla) data("bmigrowth") res <- egg_model( formula = log(bmi) ~ age, data = bmigrowth[bmigrowth[["sex"]] == 0, ], id_var = "ID", random_complexity = 1 ) plot_residuals( x = "age", y = "log(bmi)", fit = res ) + plot_annotation( title = "Cubic Splines (Random Linear Splines) - BMI - Female", tag_levels = "A" )
library(ggplot2) library(patchwork) library(eggla) data("bmigrowth") res <- egg_model( formula = log(bmi) ~ age, data = bmigrowth[bmigrowth[["sex"]] == 0, ], id_var = "ID", random_complexity = 1 ) plot_residuals( x = "age", y = "log(bmi)", fit = res ) + plot_annotation( title = "Cubic Splines (Random Linear Splines) - BMI - Female", tag_levels = "A" )
time_model()
.Plot derived slopes from a model fitted by time_model()
.
plot_slopes( fit, method, period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17), knots = list(cubic_slope = NULL, linear_splines = c(0.75, 5.5, 11), cubic_splines = c(1, 8, 12))[[method]] )
plot_slopes( fit, method, period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17), knots = list(cubic_slope = NULL, linear_splines = c(0.75, 5.5, 11), cubic_splines = c(1, 8, 12))[[method]] )
fit |
A model object from a statistical model such as
from a call to |
method |
The type of model provided in |
period |
The intervals knots on which AUCs are to be computed. |
knots |
The knots as defined |
A patchwork
ggplot2
object.
library(ggplot2) library(eggla) data("bmigrowth") ls_mod <- time_model( x = "age", y = "log(bmi)", cov = NULL, data = bmigrowth[bmigrowth[["sex"]] == 0, ], method = "linear_splines" ) plot_slopes( fit = ls_mod, method = "linear_splines" )
library(ggplot2) library(eggla) data("bmigrowth") ls_mod <- time_model( x = "age", y = "log(bmi)", cov = NULL, data = bmigrowth[bmigrowth[["sex"]] == 0, ], method = "linear_splines" ) plot_slopes( fit = ls_mod, method = "linear_splines" )
Predict BMI values a cubic splines mixed model regression
with three splines parametrisation as random effect.
This function also works for any model obtained using time_model()
.
predict_bmi(fit, start = 0.25, end = 10, step = 0.01, filter = NULL)
predict_bmi(fit, start = 0.25, end = 10, step = 0.01, filter = NULL)
fit |
A model object from a statistical model
such as from a call |
start |
The start of the time window to compute AP and AR. |
end |
The end of the time window to compute AP and AR. |
step |
The step to increment the sequence. |
filter |
A string following |
A data.table
object.
data("bmigrowth") res <- egg_model( formula = log(bmi) ~ age, data = bmigrowth[bmigrowth[["sex"]] == 0, ], id_var = "ID", random_complexity = 1 ) predict_bmi(res)[] ## For multiple sources of measures or multiple measures at one age set.seed(1234) dta <- bmigrowth[bmigrowth[["sex"]] == 0, ] dta[["source"]] <- c("A", "B")[rbinom(n = nrow(dta), size = 1, prob = 0.65) + 1] res <- egg_model( formula = log(bmi) ~ age + source, data = dta, id_var = "ID", random_complexity = 1 ) predict_bmi(res)[order(egg_id, egg_ageyears)] predict_bmi(res, filter = "source == 'A'")[order(egg_id, egg_ageyears)]
data("bmigrowth") res <- egg_model( formula = log(bmi) ~ age, data = bmigrowth[bmigrowth[["sex"]] == 0, ], id_var = "ID", random_complexity = 1 ) predict_bmi(res)[] ## For multiple sources of measures or multiple measures at one age set.seed(1234) dta <- bmigrowth[bmigrowth[["sex"]] == 0, ] dta[["source"]] <- c("A", "B")[rbinom(n = nrow(dta), size = 1, prob = 0.65) + 1] res <- egg_model( formula = log(bmi) ~ age + source, data = dta, id_var = "ID", random_complexity = 1 ) predict_bmi(res)[order(egg_id, egg_ageyears)] predict_bmi(res, filter = "source == 'A'")[order(egg_id, egg_ageyears)]
Format VCF file(s) by filtering out all variants not satisfaying "–min-alleles 2 –max-alleles 2 –types snps" and setting IDs (if no annotation file using VEP is provided) with "%CHROM:%POS:%REF:%ALT" (see https://samtools.github.io/bcftools/). GWAS is performed on the formatted VCF file(s) by PLINK2 software (https://www.cog-genomics.org/plink/2.0).
run_eggla_gwas( data, results, id_column, traits = c("slope_.*", "auc_.*", "^AP_.*", "^AR_.*"), covariates, vcfs, working_directory, vep_file = NULL, use_info = TRUE, bin_path = list(bcftools = "/usr/bin/bcftools", plink2 = "/usr/bin/plink2"), bcftools_view_options = NULL, build = "38", strand = "+", info_type = "IMPUTE2 info score via 'bcftools +impute-info'", threads = 1, quiet = FALSE, clean = TRUE )
run_eggla_gwas( data, results, id_column, traits = c("slope_.*", "auc_.*", "^AP_.*", "^AR_.*"), covariates, vcfs, working_directory, vep_file = NULL, use_info = TRUE, bin_path = list(bcftools = "/usr/bin/bcftools", plink2 = "/usr/bin/plink2"), bcftools_view_options = NULL, build = "38", strand = "+", info_type = "IMPUTE2 info score via 'bcftools +impute-info'", threads = 1, quiet = FALSE, clean = TRUE )
data |
Path to the phenotypes stored as a CSV file. |
results |
Paths to the zip archives or directories generated by |
id_column |
Name of the column where sample/individual IDs are stored. |
traits |
One or multiple traits, i.e., columns' names from |
covariates |
One or several covariates, i.e., columns' names from |
vcfs |
Path to the "raw" VCF file(s) containing the genotypes of the individuals to be analysed. |
working_directory |
Directory in which computation will occur and where output files will be saved. |
vep_file |
Path to the VEP annotation file to be used to set variants RSIDs and add gene SYMBOL, etc. |
use_info |
A logical indicating whether to extract all informations stored in the "INFO" field. |
bin_path |
A named list containing the path to the PLINK2 and BCFtools binaries For PLINK2, an URL to the binary can be provided (see https://www.cog-genomics.org/plink/2.0). |
bcftools_view_options |
A string or a vector of strings (which will be pass to |
build |
Build of the genome on which the SNP is orientated. Default is "38". |
strand |
Orientation of the site to the human genome strand used. Should be "+" (default). |
info_type |
Type of information provided in the INFO column, e.g., "IMPUTE2 info score via 'bcftools +impute-info'", |
threads |
Number of threads to be used by some BCFtools and PLINK2 commands. |
quiet |
A logical indicating whether to suppress the output. |
clean |
A logical indicating whether to clean intermediary files or not. |
Path to results file.
if (interactive()) { data("bmigrowth") bmigrowth_csv <- file.path(tempdir(), "bmigrowth.csv") fwrite( x = bmigrowth, file = bmigrowth_csv ) results_archives <- run_eggla_lmm( data = fread( file = file.path(tempdir(), "bmigrowth.csv"), colClasses = list(character = "ID") ), id_variable = "ID", age_days_variable = NULL, age_years_variable = "age", weight_kilograms_variable = "weight", height_centimetres_variable = "height", sex_variable = "sex", covariates = NULL, male_coded_zero = FALSE, random_complexity = 1, parallel = FALSE, parallel_n_chunks = 1, working_directory = tempdir() ) run_eggla_gwas( data = fread( file = file.path(tempdir(), "bmigrowth.csv"), colClasses = list(character = "ID") ), results = results_archives, id_column = "ID", traits = c("slope_.*", "auc_.*", "^AP_.*", "^AR_.*"), covariates = c("sex"), vcfs = list.files( path = system.file("vcf", package = "eggla"), pattern = "\\.vcf$|\\.vcf.gz$", full.names = TRUE ), working_directory = tempdir(), vep_file = NULL, bin_path = list( bcftools = "/usr/bin/bcftools", plink2 = "/usr/bin/plink2" ), threads = 1 ) }
if (interactive()) { data("bmigrowth") bmigrowth_csv <- file.path(tempdir(), "bmigrowth.csv") fwrite( x = bmigrowth, file = bmigrowth_csv ) results_archives <- run_eggla_lmm( data = fread( file = file.path(tempdir(), "bmigrowth.csv"), colClasses = list(character = "ID") ), id_variable = "ID", age_days_variable = NULL, age_years_variable = "age", weight_kilograms_variable = "weight", height_centimetres_variable = "height", sex_variable = "sex", covariates = NULL, male_coded_zero = FALSE, random_complexity = 1, parallel = FALSE, parallel_n_chunks = 1, working_directory = tempdir() ) run_eggla_gwas( data = fread( file = file.path(tempdir(), "bmigrowth.csv"), colClasses = list(character = "ID") ), results = results_archives, id_column = "ID", traits = c("slope_.*", "auc_.*", "^AP_.*", "^AR_.*"), covariates = c("sex"), vcfs = list.files( path = system.file("vcf", package = "eggla"), pattern = "\\.vcf$|\\.vcf.gz$", full.names = TRUE ), working_directory = tempdir(), vep_file = NULL, bin_path = list( bcftools = "/usr/bin/bcftools", plink2 = "/usr/bin/plink2" ), threads = 1 ) }
Perform Daymont's quality-control for BMI,
fit a cubic splines mixed model regression
with linear splines as random effect,
save model object, generates residuals figures fot model validity,
derived area under the curve and slopes for male and femal.
This function is a wrapper around egg_model()
, egg_slopes()
and egg_aucs()
.
run_eggla_lmm( data, id_variable, age_days_variable, age_years_variable, weight_kilograms_variable, height_centimetres_variable, sex_variable, covariates, male_coded_zero = FALSE, random_complexity = "auto", use_car1 = FALSE, knots = c(1, 8, 12), period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17), start = 0.25, end = 10, step = 0.01, filter = NULL, outlier_method = "iqr", outlier_threshold = list(iqr = 2), outlier_exclude = TRUE, parallel = FALSE, parallel_n_chunks = 1, working_directory = getwd(), quiet = FALSE, clean = TRUE )
run_eggla_lmm( data, id_variable, age_days_variable, age_years_variable, weight_kilograms_variable, height_centimetres_variable, sex_variable, covariates, male_coded_zero = FALSE, random_complexity = "auto", use_car1 = FALSE, knots = c(1, 8, 12), period = c(0, 0.5, 1.5, 3.5, 6.5, 10, 12, 17), start = 0.25, end = 10, step = 0.01, filter = NULL, outlier_method = "iqr", outlier_threshold = list(iqr = 2), outlier_exclude = TRUE, parallel = FALSE, parallel_n_chunks = 1, working_directory = getwd(), quiet = FALSE, clean = TRUE )
data |
Phenotypes data that inherits from |
id_variable |
Name of the column where sample/individual IDs are stored. |
age_days_variable |
Name of the column where age in days is stored.
|
age_years_variable |
Name of the column where age in years is stored.
|
weight_kilograms_variable |
Name of the column where weight in kilograms is stored. |
height_centimetres_variable |
Name of the column where height in centimetres is stored. |
sex_variable |
Name of the column where sex is stored. |
covariates |
A vector of columns' names to be used as covariates.
|
male_coded_zero |
Is male coded "0" (and female coded "1")? |
random_complexity |
A numeric (1-3) indicating the complexity of the random effect term.
Default, |
use_car1 |
A logical indicating whether to use continuous auto-correlation, i.e., CAR(1) as correlation structure. |
knots |
The knots defining the splines. |
period |
The intervals knots on which slopes are to be computed. |
start |
The start of the time window to compute AP and AR. |
end |
The end of the time window to compute AP and AR. |
step |
The step to increment the sequence. |
filter |
A string following |
outlier_method |
The outlier detection method(s). Default is |
outlier_threshold |
A list containing the threshold values for each method (e.g.,
|
outlier_exclude |
Whether or not the values/individuals flagged as being outliers should be excluded.
Default is |
parallel |
Determines if |
parallel_n_chunks |
Specify the number of batches (in |
working_directory |
Directory in which computation will occur and where output files will be saved. |
quiet |
A logical indicating whether to suppress the output. |
clean |
A logical indicating whether to clean |
Path to zip archives.
if (interactive()) { data("bmigrowth") fwrite( x = bmigrowth, file = file.path(tempdir(), "bmigrowth.csv") ) res <- run_eggla_lmm( data = fread(file.path(tempdir(), "bmigrowth.csv")), id_variable = "ID", age_days_variable = NULL, age_years_variable = "age", weight_kilograms_variable = "weight", height_centimetres_variable = "height", sex_variable = "sex", covariates = NULL, random_complexity = 1, working_directory = tempdir() ) }
if (interactive()) { data("bmigrowth") fwrite( x = bmigrowth, file = file.path(tempdir(), "bmigrowth.csv") ) res <- run_eggla_lmm( data = fread(file.path(tempdir(), "bmigrowth.csv")), id_variable = "ID", age_days_variable = NULL, age_years_variable = "age", weight_kilograms_variable = "weight", height_centimetres_variable = "height", sex_variable = "sex", covariates = NULL, random_complexity = 1, working_directory = tempdir() ) }
Fit a mixed model regression with "cubic slope", "linear splines" or "cubic splines" as fixed and random effects.
time_model( x, y, cov = NULL, data, method = c("cubic_slope", "linear_splines", "cubic_splines"), knots = list(cubic_slope = NULL, linear_splines = c(0.75, 5.5, 11), cubic_splines = c(1, 8, 12))[[method]], use_car1 = FALSE, id_var = "ID", quiet = FALSE )
time_model( x, y, cov = NULL, data, method = c("cubic_slope", "linear_splines", "cubic_splines"), knots = list(cubic_slope = NULL, linear_splines = c(0.75, 5.5, 11), cubic_splines = c(1, 8, 12))[[method]], use_car1 = FALSE, id_var = "ID", quiet = FALSE )
x |
A length one character vector with the main covariate name (i.e., right-hand side). |
y |
A length one character vector with the variable name to be explained (i.e., left-hand side). |
cov |
A vector of addtional/optional covariates names to included in the fixed effect part of the linear mixed-effects models. |
data |
A data.frame containing the variables named in |
method |
The type of model, i.e., one of |
knots |
The knots defining the splines for |
use_car1 |
A logical indicating whether to use continuous auto-correlation, i.e., CAR(1) as correlation structure. |
id_var |
A string indicating the name of the variable to be used as the individual identifier. |
quiet |
A logical indicating whether to suppress the output. |
An object of class "lme" representing the linear mixed-effects model fit.
data("bmigrowth") ls_mod <- time_model( x = "age", y = "log(bmi)", cov = NULL, data = bmigrowth[bmigrowth[["sex"]] == 0, ], method = "linear_splines" ) sres <- as.data.frame(summary(ls_mod)[["tTable"]]) rownames(sres) <- sub("gsp\\(.*\\)\\)", "gsp(...)", rownames(sres)) sres
data("bmigrowth") ls_mod <- time_model( x = "age", y = "log(bmi)", cov = NULL, data = bmigrowth[bmigrowth[["sex"]] == 0, ], method = "linear_splines" ) sres <- as.data.frame(summary(ls_mod)[["tTable"]]) rownames(sres) <- sub("gsp\\(.*\\)\\)", "gsp(...)", rownames(sres)) sres