- The source codes and necessary documentations are available at the repository: ( https://github.com/mdtuhinsheikh/MLSurv).
- The lecture slides are available at the https://mdtuhinsheikh.github.io/MLSurv/.
data(breast, package = "randomForestSRC") breast <- na.omit(breast) names(breast)[1:10] # Displaying only ten variable names
## [1] "status" "mean_radius" "mean_texture" ## [4] "mean_perimeter" "mean_area" "mean_smoothness" ## [7] "mean_compactness" "mean_concavity" "mean_concavepoints" ## [10] "mean_symmetry"
mod1 <- rfsrc(status ~ ., data = breast, nsplit = 10) mod2 <- bagging(status ~ ., data = breast, coob=TRUE) res <- as.data.frame(c(mean(mod1$err.rate[, 1], na.rm = TRUE), mod2$err)) colnames(res) <- "Error Rate" rownames(res) <- c("RSF", "Bagging")
## Error Rate ## RSF 0.2371134 ## Bagging 0.2731959
set.seed(500) data(Boston, package = "MASS") names(Boston)
## [1] "crim" "zn" "indus" "chas" "nox" "rm" "age" ## [8] "dis" "rad" "tax" "ptratio" "black" "lstat" "medv"
index <- sample(1:nrow(Boston), round(0.75*nrow(Boston))) train <- Boston[index, ] test <- Boston[-index, ] lm.fit <- glm(medv~., data=train) pr.lm <- predict(lm.fit, test) MSE.lm <- sum((pr.lm - test$medv)^2)/nrow(test)
maxs <- apply(Boston, 2, max) mins <- apply(Boston, 2, min) scaled <- as.data.frame(scale(Boston, center = mins, scale = maxs - mins)) train_ <- scaled[index, ] test_ <- scaled[-index, ] n <- names(train_) f <- as.formula(paste("medv ~", paste(n[!n %in% "medv"], collapse = " + "))) nn <- neuralnet(f, data=train_, hidden=c(5,3), linear.output=T)
pr.nn <- compute(nn,test_[,1:13]) pr.nn_ <- pr.nn$net.result*(max(Boston$medv) -min(Boston$medv))+min(Boston$medv) test.r <- (test_$medv)*(max(Boston$medv) -min(Boston$medv))+min(Boston$medv) MSE.nn <- sum((test.r - pr.nn_)^2)/nrow(test_) print(paste(MSE.lm, MSE.nn))
## [1] "31.2630222372615 16.4595537665717"
data(veteran, package = "randomForestSRC") names(veteran) # Displaying variable names
## [1] "trt" "celltype" "time" "status" "karno" "diagtime" ## [7] "age" "prior"
mod3 <- coxph(Surv(time, status)~., data=veteran,x=TRUE,y=TRUE) mod4 <- rfsrc(Surv(time, status) ~ ., data = veteran, ntree = 100) cindex <- as.data.frame(c(concordance(mod3)$concordance, 1-mean(mod4$err.rate, na.rm=T))) colnames(cindex) <- "C-index" rownames(cindex) <- c("Cox PH", "RSF")
## C-index ## Cox PH 0.7053612 ## RSF 0.7094312
Breiman, Leo. 1996. “Bagging Predictors.” Machine Learning 24 (2): 123–40.
———. 2001. “Random Forests.” Machine Learning 45 (1): 5–32.
Breiman, Leo, and others. 1998. “Arcing Classifier (with Discussion and a Rejoinder by the Author).” The Annals of Statistics 26 (3): 801–49.
Ching, Travers, Xun Zhu, and Lana X Garmire. 2018. “Cox-Nnet: An Artificial Neural Network Method for Prognosis Prediction of High-Throughput Omics Data.” PLoS Computational Biology 14 (4): e1006076.
Cox, David R. 1972. “Regression Models and Life-Tables.” Journal of the Royal Statistical Society: Series B (Methodological) 34 (2): 187–202.
Faraggi, David, and Richard Simon. 1995. “A Neural Network Model for Survival Data.” Statistics in Medicine 14 (1): 73–82.
Faraggi, David, R Simon, E Yaskil, and A Kramar. 1997. “Bayesian Neural Network Models for Censored Data.” Biometrical Journal 39 (5): 519–32.
Giunchiglia, Eleonora, Anton Nemchenko, and Mihaela van der Schaar. 2018. “RNN-Surv: A Deep Recurrent Model for Survival Analysis.” In International Conference on Artificial Neural Networks, 23–32. Springer.
Goodfellow, Ian, Yoshua Bengio, and Aaron Courville. 2016. Deep Learning. MIT press.
Lee, Changhee, William R Zame, Jinsung Yoon, and Mihaela van der Schaar. 2018. “Deephit: A Deep Learning Approach to Survival Analysis with Competing Risks.” In Thirty-Second Aaai Conference on Artificial Intelligence.
Polson, Nicholas G, Vadim Sokolov, and others. 2017. “Deep Learning: A Bayesian Perspective.” Bayesian Analysis 12 (4): 1275–1304.
Ranganath, Rajesh, Adler Perotte, Noémie Elhadad, and David Blei. 2016. “Deep Survival Analysis.” arXiv Preprint arXiv:1608.02158.
Wang, Hong, and Lifeng Zhou. 2018. “SurvELM: An R Package for High Dimensional Survival Analysis with Extreme Learning Machine.” Knowledge-Based Systems 160: 28–33.
Wang, Ping, Yan Li, and Chandan K Reddy. 2019. “Machine Learning for Survival Analysis: A Survey.” ACM Computing Surveys (CSUR) 51 (6): 110.