“discretization” package
- FSelectorRcpp: ‘Rcpp’ Implementation of ‘FSelector’ Entropy-Based Feature Selection Algorithms with a Sparse Matrix Support
x
|
Sepal.Length
|
Sepal.Width
|
Petal.Length
|
Petal.Width
|
Species
|
Species
|
Sepal.Length
|
N
|
setosa
|
(-Inf,5.55]
|
47
|
setosa
|
(5.55,6.15]
|
3
|
versicolor
|
(-Inf,5.55]
|
11
|
versicolor
|
(5.55,6.15]
|
23
|
versicolor
|
(6.15, Inf]
|
16
|
virginica
|
(-Inf,5.55]
|
1
|
virginica
|
(5.55,6.15]
|
10
|
virginica
|
(6.15, Inf]
|
39
|
attr_importance
Sepal.Length 0.4521286 Sepal.Width 0.2672750 Petal.Length 0.9402853 Petal.Width 0.9554360
Species ~ Petal.Width + Petal.Length <environment: 0x562ca312cdd0>
Discretize Continuous Variables based on the Outcome
library(FSelectorRcpp)
set.seed(123456)
n <- 300
dt <- data.table(x = rnorm(n), noise = rnorm(n))
dt <- dt[, outcome := x ^ 2 + noise / 5]
discretization <- function(data, predictor, outcome, num = 5){
data <- data[!is.na(data[[predictor]])]
predictor_discretized <- paste0(predictor, "_discretized")
x <- data[[predictor]]
y <- data[[outcome]]
data[[predictor_discretized]] <- FSelectorRcpp::discretize(x, y
, all = TRUE
, control = list(equalsizeControl(num)))[, 2]
invisible(data)
}
dt2 <- discretization(data = dt, predictor = "x", outcome = "outcome", num = 5)
crrs <- Wu::Table2_nbyc(data = dt2
, group = c("x_discretized")
, var = "outcome"
, digits_c = 3, digits_p = 4)
crrs %>% prt()
Variable
|
Level
|
N
|
Missing
|
Mean(SD)
|
p(ANOVA)
|
Median(IQR)
|
p(KW)
|
Mode(Range)
|
|
(-Inf,-0.79366717]
|
60
|
0 (0.0000%)
|
2.13 (1.62)
|
< 0.0001
|
1.45 (0.955, 2.73)
|
< 0.0001
|
1.02 (0.239, 7.73)
|
|
(-0.79366717,-0.24060591]
|
60
|
0 (0.0000%)
|
0.279 (0.271)
|
|
0.26 (0.0999, 0.42)
|
|
0.201 (-0.247, 0.949)
|
|
(-0.24060591,0.28597898]
|
60
|
0 (0.0000%)
|
0.047 (0.189)
|
|
0.062 (-0.135, 0.166)
|
|
0.0884 (-0.363, 0.395)
|
|
(0.28597898,0.88542331]
|
60
|
0 (0.0000%)
|
0.403 (0.321)
|
|
0.386 (0.139, 0.613)
|
|
0.542 (-0.133, 1.3)
|
|
(0.88542331, Inf]
|
60
|
0 (0.0000%)
|
2.03 (1.42)
|
|
1.52 (1.11, 2.4)
|
|
1.24 (0.605, 6.84)
|