32 `node1` from `function`

The examples in Chapter 32 require (see the explanation of the function name conflict in Section 7.4)

library(maxEff)
# Loading required package: groupedHyperframe
# Registered S3 method overwritten by 'pROC':
#   method   from            
#   plot.roc spatstat.explore

search path & loadedNamespaces on author’s computer

search()
#  [1] ".GlobalEnv"                "package:maxEff"            "package:groupedHyperframe" "package:stats"             "package:graphics"          "package:grDevices"         "package:utils"            
#  [8] "package:datasets"          "package:methods"           "Autoloads"                 "package:base"
loadedNamespaces() |> sort.int()
#   [1] "abind"             "base"              "caret"             "class"             "cli"               "cluster"           "codetools"         "compiler"          "data.table"       
#  [10] "datasets"          "deldir"            "digest"            "doParallel"        "dplyr"             "evaluate"          "farver"            "fastmap"           "fastmatrix"       
#  [19] "foreach"           "future"            "future.apply"      "generics"          "geomtextpath"      "GET"               "ggplot2"           "globals"           "glue"             
#  [28] "goftest"           "gower"             "graphics"          "grDevices"         "grid"              "gridExtra"         "groupedHyperframe" "gtable"            "hardhat"          
#  [37] "htmltools"         "htmlwidgets"       "ipred"             "iterators"         "jsonlite"          "knitr"             "lattice"           "lava"              "lifecycle"        
#  [46] "listenv"           "lubridate"         "magrittr"          "MASS"              "Matrix"            "matrixStats"       "maxEff"            "methods"           "ModelMetrics"     
#  [55] "nlme"              "nnet"              "otel"              "parallel"          "parallelly"        "patchwork"         "pillar"            "pkgconfig"         "plyr"             
#  [64] "polyclip"          "pracma"            "pROC"              "prodlim"           "purrr"             "R6"                "RColorBrewer"      "Rcpp"              "recipes"          
#  [73] "reshape2"          "rlang"             "rmarkdown"         "rpart"             "rstudioapi"        "S7"                "scales"            "SpatialPack"       "spatstat.data"    
#  [82] "spatstat.explore"  "spatstat.geom"     "spatstat.random"   "spatstat.sparse"   "spatstat.univar"   "spatstat.utils"    "splines"           "stats"             "stats4"           
#  [91] "stringi"           "stringr"           "survival"          "systemfonts"       "tensor"            "textshaping"       "tibble"            "tidyselect"        "timechange"       
# [100] "timeDate"          "tools"             "utils"             "vctrs"             "viridisLite"       "withr"             "xfun"              "yaml"

Function maxEff::node1() (v0.2.1) creates a dichotomizing rule $\mathcal{D}$ of a numeric or factor predictor using the first node of a recursive partitioning and regression tree (rpart.object), and returns an object of S3 class 'node1', which inherits from the S3 class 'function'.

Listing 32.1 shows the S3 methods rpart::*.rpart (Therneau and Atkinson 2025, v4.1.24),

Listing 32.1: S3 methods rpart::*.rpart

Code

suppressPackageStartupMessages(library(rpart))
.S3methods(class = 'rpart', all.names = TRUE) |> 
  attr(which = 'info', exact = TRUE)
#                   visible                from     generic  isS4
# labels.rpart        FALSE registered S3method      labels FALSE
# meanvar.rpart       FALSE registered S3method     meanvar FALSE
# model.frame.rpart   FALSE registered S3method model.frame FALSE
# plot.rpart          FALSE registered S3method        plot FALSE
# post.rpart          FALSE registered S3method        post FALSE
# predict.rpart       FALSE registered S3method     predict FALSE
# print.rpart         FALSE registered S3method       print FALSE
# prune.rpart          TRUE               rpart       prune FALSE
# residuals.rpart     FALSE registered S3method   residuals FALSE
# summary.rpart       FALSE registered S3method     summary FALSE
# text.rpart          FALSE registered S3method        text FALSE

Listing 32.2 shows the exported S3 methods base::*.function and graphics::*.function in R version 4.5.2 (2025-10-31) (R Core Team 2025),

Listing 32.2: S3 methods base::*.function and graphics::*.function

Code

.S3methods(class = 'function', all.names = TRUE) |> 
  attr(which = 'info', exact = TRUE) |>
  subset.data.frame(subset = (from %in% c('base', 'graphics')))
#                    visible     from   generic  isS4
# all.equal.function    TRUE     base all.equal FALSE
# as.list.function      TRUE     base   as.list FALSE
# plot.function         TRUE graphics      plot FALSE
# print.function        TRUE     base     print FALSE

Package maxEff (v0.2.1) implements the following S3 methods to the class 'node1' (Table 32.1),

Table 32.1: S3 methods maxEff::*.node1 (v0.2.1)

	visible	generic	isS4
`get_cutoff.node1`	TRUE	`maxEff::get_cutoff`	FALSE
`labels.node1`	TRUE	`base::labels`	FALSE
`predict.node1`	TRUE	`stats::predict`	FALSE

32.1 Examples

Data set stagec from package rpart contains 146 subjects with Stage C prostate cancer. Listing 32.3 uses the first 135 subjects as the training set, and the last 11 subjects as the test set.

Listing 32.3: Data: training set stagec0 and test set stagec1

stagec0 = rpart::stagec[1:135,] # training set
stagec1 = rpart::stagec[-(1:135),] # test set

32.1.1 `numeric` Predictor

Listing 32.4 creates a recursive partitioning model of the numeric variable age with the endpoint of progression-free survival in the training set stagec0, with parameters

cp = .Machine$double.eps, to ensure at least one node/split of the partitioning tree.
maxdepth = 2L, to reduce the computation cost as we need only the first node.

Listing 32.4: Review: rpart::rpart(), numeric predictor

rp0a = rpart::rpart(
  formula = survival::Surv(pgtime, pgstat) ~ age, data = stagec0, 
  cp = .Machine$double.eps, maxdepth = 2L
)

Review: a recursive-partitioning object rp0a

rp0a
# n= 135 
# 
# node), split, n, deviance, yval
#       * denotes terminal node
# 
# 1) root 135 180.12030 1.0000000  
#   2) age>=53.5 127 163.45310 0.9464647  
#     4) age>=58.5 106 132.43310 0.8919508 *
#     5) age< 58.5 21  30.23755 1.2072260 *
#   3) age< 53.5 8  14.26926 1.8265100 *

Listing 32.5 creates a dichotomizing rule D0a for the numeric variable age based on the first node in the recursive-partitioning model rp0a.

Listing 32.5: Example: node1(), numeric predictor

D0a = rp0a |>
  node1()

Listing 32.6 displays the dichomizing rule D0a and its enclosure environment using the S3 method base::print.function(),

Listing 32.6: Review: base::print.function() on D0a

D0a
# function (newx = age) 
# {
#     ret <- (newx >= 53.5)
#     ret0 <- na.omit(ret)
#     if ((length(ret0) > 1L) && (all(ret0) || !any(ret0))) 
#         warning("Dichotomized values are all-0 or all-1")
#     return(ret)
# }
# <environment: 0x3161550b0>
# attr(,"class")
# [1] "node1_numeric" "node1"         "function"

Listing 32.7 uses the dichotomizing rule D0a as an R function.

Listing 32.7: Example: using D0a

stagec1$age |> 
  D0a()
#  [1]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE

32.1.2 `factor` Predictor

Listing 32.8 creates a recursive partitioning model of the factor variable ploidy with the endpoint of progression-free survival in the training set stagec0, with parameters

cp = .Machine$double.eps, to ensure at least one node/split of the partitioning tree.
maxdepth = 2L, to reduce the computation cost as we need only the first node.

Listing 32.8: Review: rpart::rpart(), factor predictor

rp0b = rpart::rpart(
  formula = survival::Surv(pgtime, pgstat) ~ ploidy, data = stagec0, 
  cp = .Machine$double.eps, maxdepth = 2L
)

Review: a recursive-partitioning object rp0b

rp0b
# n= 135 
# 
# node), split, n, deviance, yval
#       * denotes terminal node
# 
# 1) root 135 180.12030 1.0000000  
#   2) ploidy=diploid 59  61.84909 0.5141615 *
#   3) ploidy=tetraploid,aneuploid 76 105.86350 1.4277700  
#     6) ploidy=tetraploid 66  85.67126 1.3135490 *
#     7) ploidy=aneuploid 10  18.42951 2.0876620 *

Listing 32.9 creates a dichotomizing rule D0b for the factor variable ploidy based on the first node in the recursive partitioning tree rp0b.

Listing 32.9: Example: node1(), factor predictor

D0b = rp0b |>
  node1()

Listing 32.10 displays the dichomizing rule D0b and its enclosure environment using the S3 method base::print.function(),

Listing 32.10: Review: base::print.function() on D0b

D0b
# function (newx = ploidy) 
# {
#     ret <- unclass(newx) %in% c(1L)
#     ret0 <- na.omit(ret)
#     if ((length(ret0) > 1L) && (all(ret0) || !any(ret0))) 
#         warning("Dichotomized values are all-0 or all-1")
#     return(ret)
# }
# <environment: 0x313b611a0>
# attr(,"class")
# [1] "node1_factor" "node1"        "function"

Listing 32.11 uses the dichotomizing rule D0b as an R function.

Listing 32.11: Example: using D0b

stagec1$ploidy |> 
  D0b()
#  [1]  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE

32.2 Labels

The S3 method labels.node1() returns a human-friendly character text to describe the dichotomizing rule $\mathcal{D}$.

Listing 32.12: Example: labels.node1() on D0a

D0a |> 
  labels()
# [1] "age>=53.5"

Listing 32.13: Example: labels.node1() on D0b

D0b |> 
  labels()
# [1] "ploidy in levels c(1L)"

32.3 Predict

The S3 method predict.node1() dichotomizes the corresponding numeric or factor variable in the test set, using the dichotomizing rule $\mathcal{D}$ determined by the training set.

Listing 32.14 dichotomizes the numeric variable age in the test set stagec1, using the dichotomizing rule D0a determined by the training set stagec0.

Listing 32.14: Example: predict.node1() of D0a on stagec1

D0a |> 
  predict(newdata = stagec1)
#  [1]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE

Listing 32.15 dichotomizes the factor variable ploidy in the test set stagec1, using the dichotomizing rule D0b determined by the training set stagec0. Note that readers must ensure that the factor variable being dichotomized has the same levels in the training and the test set.

Listing 32.15: Example: predict.node1() of D0b on stagec1**

stopifnot(identical(levels(stagec0$ploidy), levels(stagec1$ploidy)))
D0b |> 
  predict(newdata = stagec1)
#  [1]  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE

32.4 Get Cutoff

The (tentatively named) S3 method get_cutoff.node1() returns the numeric cutoff value of the dichotomizing rule $\mathcal{D}$ of a numeric predictor.

Listing 32.16: Example: get_cutoff.node1() of D0a

D0a |> 
  get_cutoff()
# [1] 53.5

32.5 Enclosure `environment`

Section 32.5 is not intended for casual R users!

A non-.Primitive R function is a closure that consists of a function and the environment in which it was created. Function node1() intentionally cleans up the enclosure environment of the returned dichotomizing rule $\mathcal{D}$. Listing 32.17 lists the objects in the enclosure environment of the dichomizing rule D0a.

Listing 32.17: Advanced: enclosure environment of D0a

D0a |>
  environment() |>
  ls(envir = _, all.names = TRUE)
# [1] ".fn"

Listing 32.6 shows that the name of the numeric variable, e.g., age, is stored as the formals argument of the parameter newx. This is an advanced R programming trick! The evaluation of the dichotomizing rule D0a discovers the variable age in

the global environment .GlobalEnv
the enclosure environment of the dichotomizing rule, if its parent.environment is a namespace or the .GlobalEnv

Advanced: evaluate in .GlobalEnv

age = stagec1$age
D0a()
#  [1]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE
rm(age)

Advanced: evaluate in environment(D0a), child of package namespace

ev = environment(D0a)
stopifnot(
  isNamespace(parent.env(ev)),
  getNamespaceName(parent.env(ev)) == 'maxEff'
)
ls(envir = ev)
# character(0)
assign(x = 'age', value = stagec1$age, envir = ev)
D0a()
#  [1]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE
rm(age, envir = ev)
ls(envir = environment(D0a))
# character(0)
rm(ev)

Advanced: evaluate in new.env(parent = .GlobalEnv)

ev = new.env(parent = .GlobalEnv)
stopifnot(identical(parent.env(ev), .GlobalEnv))
assign(x = 'age', value = stagec1$age, envir = ev)
ls(envir = ev)
# [1] "age"
D0a. = D0a; environment(D0a.) = ev # correct
D0a.()
#  [1]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE
tryCatch(expr = {
  as.function(D0a, envir = ev)() # wrong
}, error = identity)
# <simpleError in as.function(D0a, envir = ev)(): object 'age' not found>
tryCatch(expr = {
  eval(D0a(), envir = ev) # wrong
}, error = identity)
# <simpleError in D0a(): object 'age' not found>
tryCatch(expr = {
  eval(D0a(), enclos = ev) # wrong
}, error = identity)  
# <simpleError in D0a(): object 'age' not found>
rm(ev, D0a.)

Advanced: evaluate in list2env(., parent = .GlobalEnv)

ev = stagec1 |> 
  list2env(parent = .GlobalEnv) 
stopifnot(identical(parent.env(ev), .GlobalEnv))
D0a. = D0a; environment(D0a.) = ev
D0a.()
#  [1]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE
rm(ev, D0a.)

Advanced: do not evaluate in as.environment(), child of emptyenv() !

ev = stagec1 |>
  as.environment()
stopifnot(identical(parent.env(ev), emptyenv()))
D0a. = D0a; environment(D0a.) = ev
tryCatch(expr = {
  D0a.()
}, error = identity)
# <simpleError in {    ret <- (newx >= 53.5)    ret0 <- na.omit(ret)    if ((length(ret0) > 1L) && (all(ret0) || !any(ret0)))         warning("Dichotomized values are all-0 or all-1")    return(ret)}: could not find function "{">
rm(ev, D0a.)