From c0de3e94a5b56c6ee75c3f05c440f52e097482ac Mon Sep 17 00:00:00 2001 From: araikes Date: Thu, 23 Apr 2026 13:08:55 -0700 Subject: [PATCH 1/5] Use cheapr for masking and subsetting Add cheapr (>= 1.5.0) to Imports and refactor analyse-helpers.R to use cheapr utilities. The validity mask is now accumulated into a single logical vector, indices are computed with cheapr::which_, and subsetting is done with cheapr::sset. Scalar columns are assembled via cheapr::fast_df and concatenated with cheapr::col_c instead of repeated logical indexing and Reduce, simplifying the code and improving efficiency. --- DESCRIPTION | 3 ++- R/analyse-helpers.R | 19 +++++++++++-------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6e72b9a..b6bea25 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -37,7 +37,8 @@ Imports: pbmcapply, rhdf5, tibble, - tidyr + tidyr, + cheapr (>= 1.5.0) RoxygenNote: 7.3.3 Roxygen: list(markdown = TRUE) Suggests: diff --git a/R/analyse-helpers.R b/R/analyse-helpers.R index cb8ae44..e8186d5 100644 --- a/R/analyse-helpers.R +++ b/R/analyse-helpers.R @@ -426,7 +426,7 @@ response_vals <- scalars(ctx$modelarray)[[ctx$scalar]][i_element, ] # Start the validity mask with the response scalar - masks <- list(is.finite(response_vals)) + valid_mask <- is.finite(response_vals) # Read and reorder additional attached scalars scalar_values <- list() @@ -443,22 +443,25 @@ } scalar_values[[sname]] <- s_vals - masks[[length(masks) + 1L]] <- is.finite(s_vals) + valid_mask <- valid_mask & is.finite(s_vals) } # Intersection mask across all scalars - valid_mask <- Reduce("&", masks) - num_valid <- sum(valid_mask) + which_valid <- cheapr::which_(valid_mask) + num_valid <- length(which_valid) if (!(num_valid > num.subj.lthr)) { return(list(dat = NULL, sufficient = FALSE, num_valid = num_valid)) } # Build filtered data.frame - dat <- ctx$phenotypes[valid_mask, , drop = FALSE] - for (sname in ctx$attached_scalars) { - dat[[sname]] <- scalar_values[[sname]][valid_mask] - } + dat <- cheapr::sset(ctx$phenotypes, which_valid) + scalar_col_list <- lapply( + stats::setNames(ctx$attached_scalars, ctx$attached_scalars), + function(sname) scalar_values[[sname]][which_valid] + ) + scalar_cols <- cheapr::fast_df(.args = scalar_col_list) + dat <- cheapr::col_c(dat, scalar_cols) list(dat = dat, sufficient = TRUE, num_valid = num_valid) } From 46c7f28d99e4fc55ca4bf817368e13f84b674abe Mon Sep 17 00:00:00 2001 From: araikes Date: Thu, 23 Apr 2026 16:02:47 -0700 Subject: [PATCH 2/5] Update analyse.R Allow scalar to be inferred or passed. --- R/analyse.R | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/R/analyse.R b/R/analyse.R index 5b40c2b..531b7db 100644 --- a/R/analyse.R +++ b/R/analyse.R @@ -149,7 +149,11 @@ ModelArray.lm <- function(formula, data, phenotypes, scalar = NULL, element.subs ...) { # Validation ---- .validate_modelarray_input(data) - scalar <- .resolve_formula_scalar(formula, data, scalar) + + if (is.null(scalar)){ + scalar <- .resolve_formula_scalar(formula, data, scalar) + } + element.subset <- .validate_element_subset(element.subset, data, scalar) phenotypes <- .align_phenotypes(data, phenotypes, scalar) @@ -474,7 +478,11 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar = NULL, element.sub ...) { # Validation ---- .validate_modelarray_input(data) - scalar <- .resolve_formula_scalar(formula, data, scalar) + + if (is.null(scalar)){ + scalar <- .resolve_formula_scalar(formula, data, scalar) + } + element.subset <- .validate_element_subset(element.subset, data, scalar) phenotypes <- .align_phenotypes(data, phenotypes, scalar) From 2182a0bbb631123122d28c6195c62b17e33a30d9 Mon Sep 17 00:00:00 2001 From: araikes Date: Thu, 23 Apr 2026 16:34:41 -0700 Subject: [PATCH 3/5] Use cheapr rep_len_ and fast_df Replace uses of rep(NaN, n) with cheapr::rep_len_(NaN, nL) and replace rbind+as.data.frame on result matrices with construction of column lists and cheapr::fast_df(.args = col_list). This improves performance, avoids unnecessary row-binding/coercion overhead, and ensures correct length typing. Changes touch R/ModelArray_Constructor.R (analyseOneElement.*) and R/analyse.R (ModelArray.lm, ModelArray.gam, ModelArray.wrap and reduced-model assembly). --- R/ModelArray_Constructor.R | 16 ++++++++-------- R/analyse.R | 32 ++++++++++++++++++++------------ 2 files changed, 28 insertions(+), 20 deletions(-) diff --git a/R/ModelArray_Constructor.R b/R/ModelArray_Constructor.R index 625a55e..0cc69ac 100644 --- a/R/ModelArray_Constructor.R +++ b/R/ModelArray_Constructor.R @@ -503,7 +503,7 @@ analyseOneElement.lm <- function(i_element, if (flag_initiate) { return(list(column_names = NaN, list.terms = NaN)) } else { - onerow <- c(i_element - 1, rep(NaN, (num.stat.output - 1))) + onerow <- c(i_element - 1, cheapr::rep_len_(NaN, num.stat.output - 1L)) return(onerow) } } @@ -545,7 +545,7 @@ analyseOneElement.lm <- function(i_element, if (flag_initiate) { return(list(column_names = NaN, list.terms = NaN)) } else { - onerow <- c(i_element - 1, rep(NaN, (num.stat.output - 1))) + onerow <- c(i_element - 1, cheapr::rep_len_(NaN, num.stat.output - 1L)) return(onerow) } } @@ -693,7 +693,7 @@ analyseOneElement.gam <- function(i_element, sp.criterion.attr.name = NaN )) } else { - onerow <- c(i_element - 1, rep(NaN, (num.stat.output - 1))) + onerow <- c(i_element - 1, cheapr::rep_len_(NaN, num.stat.output - 1L)) return(onerow) } } @@ -739,7 +739,7 @@ analyseOneElement.gam <- function(i_element, sp.criterion.attr.name = NaN )) } else { - onerow <- c(i_element - 1, rep(NaN, (num.stat.output - 1))) + onerow <- c(i_element - 1, cheapr::rep_len_(NaN, num.stat.output - 1L)) return(onerow) } } @@ -976,7 +976,7 @@ analyseOneElement.wrap <- function(i_element, if (flag_initiate) { return(list(column_names = NaN)) } else { - onerow <- c(i_element - 1, rep(NaN, (num.stat.output - 1))) + onerow <- c(i_element - 1, cheapr::rep_len_(NaN, num.stat.output - 1L)) return(onerow) } } @@ -1019,7 +1019,7 @@ analyseOneElement.wrap <- function(i_element, if (flag_initiate) { return(list(column_names = NaN)) } else { - onerow <- c(i_element - 1, rep(NaN, (num.stat.output - 1))) + onerow <- c(i_element - 1, cheapr::rep_len_(NaN, num.stat.output - 1L)) return(onerow) } } @@ -1033,7 +1033,7 @@ analyseOneElement.wrap <- function(i_element, if (on_error == "skip" || on_error == "debug") { warning(paste0("analyseOneElement.wrap at element ", i_element, ": ", msg)) if (flag_initiate) return(list(column_names = NaN)) - return(c(i_element - 1, rep(NaN, (num.stat.output - 1)))) + return(c(i_element - 1, cheapr::rep_len_(NaN, num.stat.output - 1L))) } stop(msg) } @@ -1057,7 +1057,7 @@ analyseOneElement.wrap <- function(i_element, if (on_error == "skip" || on_error == "debug") { warning(paste0("analyseOneElement.wrap at element ", i_element, ": ", msg)) if (flag_initiate) return(list(column_names = NaN)) - return(c(i_element - 1, rep(NaN, (num.stat.output - 1)))) + return(c(i_element - 1, cheapr::rep_len_(NaN, num.stat.output - 1L))) } stop(msg) } diff --git a/R/analyse.R b/R/analyse.R index 531b7db..ad613b0 100644 --- a/R/analyse.R +++ b/R/analyse.R @@ -304,9 +304,10 @@ ModelArray.lm <- function(formula, data, phenotypes, scalar = NULL, element.subs return(invisible(NULL)) } - df_out <- do.call(rbind, fits_all) - df_out <- as.data.frame(df_out) - colnames(df_out) <- column_names + result_mat <- do.call(rbind, fits_all) + col_list <- lapply(seq_len(ncol(result_mat)), function(j) result_mat[, j]) + names(col_list) <- column_names + df_out <- cheapr::fast_df(.args = col_list) df_out <- .correct_pvalues(df_out, list.terms, correct.p.value.terms, var.terms) df_out <- .correct_pvalues(df_out, "model", correct.p.value.model, var.model) @@ -703,9 +704,10 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar = NULL, element.sub return(invisible(NULL)) } - df_out <- do.call(rbind, fits_all) - df_out <- as.data.frame(df_out) - colnames(df_out) <- column_names + result_mat <- do.call(rbind, fits_all) + col_list <- lapply(seq_len(ncol(result_mat)), function(j) result_mat[, j]) + names(col_list) <- column_names + df_out <- cheapr::fast_df(.args = col_list) # P-value corrections ---- df_out <- .correct_pvalues(df_out, list.smoothTerms, correct.p.value.smoothTerms, var.smoothTerms) @@ -764,9 +766,13 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar = NULL, element.sub ... ) - reduced.model.df_out <- do.call(rbind, reduced.model.fits) - reduced.model.df_out <- as.data.frame(reduced.model.df_out) - colnames(reduced.model.df_out) <- reduced.model.column_names + result_mat_reduced <- do.call(rbind, reduced.model.fits) + col_list_reduced <- lapply( + seq_len(ncol(result_mat_reduced)), + function(j) result_mat_reduced[, j] + ) + names(col_list_reduced) <- reduced.model.column_names + reduced.model.df_out <- cheapr::fast_df(.args = col_list_reduced) ## Compute delta adj R-sq and partial R-sq ---- delta_col <- paste0(changed.rsq.term.shortFormat, ".delta.adj.rsq") @@ -1112,8 +1118,10 @@ ModelArray.wrap <- function(FUN, data, phenotypes, scalar, element.subset = NULL return(invisible(NULL)) } - df_out <- do.call(rbind, fits_all) - df_out <- as.data.frame(df_out) - colnames(df_out) <- column_names + # Better proposed change: + result_mat <- do.call(rbind, fits_all) + col_list <- lapply(seq_len(ncol(result_mat)), function(j) result_mat[, j]) + names(col_list) <- column_names + df_out <- cheapr::fast_df(.args = col_list) df_out } From 6b3d679630f9bbdf33e39a5ea695633c36a2f445 Mon Sep 17 00:00:00 2001 From: araikes Date: Sun, 26 Apr 2026 16:56:18 -0700 Subject: [PATCH 4/5] Update analyse.R Revert change so that tests pass. --- R/analyse.R | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/R/analyse.R b/R/analyse.R index ad613b0..09e3620 100644 --- a/R/analyse.R +++ b/R/analyse.R @@ -150,9 +150,7 @@ ModelArray.lm <- function(formula, data, phenotypes, scalar = NULL, element.subs # Validation ---- .validate_modelarray_input(data) - if (is.null(scalar)){ - scalar <- .resolve_formula_scalar(formula, data, scalar) - } + scalar <- .resolve_formula_scalar(formula, data, scalar) element.subset <- .validate_element_subset(element.subset, data, scalar) phenotypes <- .align_phenotypes(data, phenotypes, scalar) @@ -480,9 +478,7 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar = NULL, element.sub # Validation ---- .validate_modelarray_input(data) - if (is.null(scalar)){ - scalar <- .resolve_formula_scalar(formula, data, scalar) - } + scalar <- .resolve_formula_scalar(formula, data, scalar) element.subset <- .validate_element_subset(element.subset, data, scalar) phenotypes <- .align_phenotypes(data, phenotypes, scalar) From ca73ef91ff9b49dad70c9f9ef53b813b7efc7c13 Mon Sep 17 00:00:00 2001 From: araikes Date: Sun, 26 Apr 2026 16:56:44 -0700 Subject: [PATCH 5/5] Set scalar default to NULL in Rd docs --- man/ModelArray.gam.Rd | 2 +- man/ModelArray.lm.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/man/ModelArray.gam.Rd b/man/ModelArray.gam.Rd index c662f91..e08e336 100644 --- a/man/ModelArray.gam.Rd +++ b/man/ModelArray.gam.Rd @@ -8,7 +8,7 @@ ModelArray.gam( formula, data, phenotypes, - scalar, + scalar = NULL, element.subset = NULL, full.outputs = FALSE, var.smoothTerms = c("statistic", "p.value"), diff --git a/man/ModelArray.lm.Rd b/man/ModelArray.lm.Rd index 20c55e9..af6dd6d 100644 --- a/man/ModelArray.lm.Rd +++ b/man/ModelArray.lm.Rd @@ -8,7 +8,7 @@ ModelArray.lm( formula, data, phenotypes, - scalar, + scalar = NULL, element.subset = NULL, full.outputs = FALSE, var.terms = c("estimate", "statistic", "p.value"),