“discretization” package
- FSelectorRcpp: ‘Rcpp’ Implementation of ‘FSelector’ Entropy-Based Feature Selection Algorithms with a Sparse Matrix Support
 
| 
x
 | 
| 
Sepal.Length
 | 
| 
Sepal.Width
 | 
| 
Petal.Length
 | 
| 
Petal.Width
 | 
| 
Species
 | 
| 
Species
 | 
Sepal.Length
 | 
N
 | 
| 
setosa
 | 
(-Inf,5.55]
 | 
47
 | 
| 
setosa
 | 
(5.55,6.15]
 | 
3
 | 
| 
versicolor
 | 
(-Inf,5.55]
 | 
11
 | 
| 
versicolor
 | 
(5.55,6.15]
 | 
23
 | 
| 
versicolor
 | 
(6.15, Inf]
 | 
16
 | 
| 
virginica
 | 
(-Inf,5.55]
 | 
1
 | 
| 
virginica
 | 
(5.55,6.15]
 | 
10
 | 
| 
virginica
 | 
(6.15, Inf]
 | 
39
 | 
      attr_importance
Sepal.Length 0.4521286 Sepal.Width 0.2672750 Petal.Length 0.9402853 Petal.Width 0.9554360
Species ~ Petal.Width + Petal.Length <environment: 0x562ca312cdd0>
 
Discretize Continuous Variables based on the Outcome
library(FSelectorRcpp)
set.seed(123456)
n <- 300
dt <- data.table(x = rnorm(n), noise = rnorm(n))
dt <- dt[, outcome := x ^ 2 + noise / 5]
discretization <- function(data, predictor, outcome, num = 5){
  data <- data[!is.na(data[[predictor]])]
  predictor_discretized <- paste0(predictor, "_discretized")
  x <- data[[predictor]]
  y <- data[[outcome]]
  data[[predictor_discretized]] <- FSelectorRcpp::discretize(x, y
                                                           , all = TRUE
                                                           , control = list(equalsizeControl(num)))[, 2]
  invisible(data)
}
dt2 <- discretization(data = dt, predictor = "x", outcome = "outcome", num = 5)
crrs <- Wu::Table2_nbyc(data = dt2
                      , group = c("x_discretized")
                      , var = "outcome"
                      , digits_c = 3, digits_p = 4)
crrs %>% prt()
 
| 
Variable
 | 
Level
 | 
N
 | 
Missing
 | 
Mean(SD)
 | 
p(ANOVA)
 | 
Median(IQR)
 | 
p(KW)
 | 
Mode(Range)
 | 
| 
 | 
(-Inf,-0.79366717]
 | 
60
 | 
0 (0.0000%)
 | 
2.13 (1.62)
 | 
< 0.0001
 | 
1.45 (0.955, 2.73)
 | 
< 0.0001
 | 
1.02 (0.239, 7.73)
 | 
| 
 | 
(-0.79366717,-0.24060591]
 | 
60
 | 
0 (0.0000%)
 | 
0.279 (0.271)
 | 
 | 
0.26 (0.0999, 0.42)
 | 
 | 
0.201 (-0.247, 0.949)
 | 
| 
 | 
(-0.24060591,0.28597898]
 | 
60
 | 
0 (0.0000%)
 | 
0.047 (0.189)
 | 
 | 
0.062 (-0.135, 0.166)
 | 
 | 
0.0884 (-0.363, 0.395)
 | 
| 
 | 
(0.28597898,0.88542331]
 | 
60
 | 
0 (0.0000%)
 | 
0.403 (0.321)
 | 
 | 
0.386 (0.139, 0.613)
 | 
 | 
0.542 (-0.133, 1.3)
 | 
| 
 | 
(0.88542331, Inf]
 | 
60
 | 
0 (0.0000%)
 | 
2.03 (1.42)
 | 
 | 
1.52 (1.11, 2.4)
 | 
 | 
1.24 (0.605, 6.84)
 |