[Education] How does a Particular Drug affect Stroke Complications? (Development of Classification Model)

Doyun’s Journey
Doyun’s Lab
Published in
31 min readAug 31, 2020

Subject : Develop a Stroke Complications Forecasting Model Using a Specific Drug List

Language : SAS, SQL, R

Data : ‘HIRA(건강보험심사평가원)’ 데이터

Model : XGBoost

  • hira_data.csv

> 성별, 나이대, 입원일, 약 평균 복용일, 약 복용 여부, 합병증 여부

  • MEDICINE.csv

> 약품의 주성분에 따라 평균 사용량, 평균 처방기간, 평균 처방량, 약 사용 수

1. Topic introduction

  • 뇌혈관 질환 환자에게 예후가 좋은 약품이 실제로 많이 처방 되지않고 있는데, 이러한 약품들을 복용했을 때 예후가 좋은 것을 입증하는 것이 목표
  • 뇌졸중은 의료 기관간 편차가 크며 사망률과 입원일 수도 요양기관간에 차이가 큰 것으로 나타남.

2. Data extraction

> HIRA T200, T300, T400, T530 테이블에서 SQL로 데이터 추출

  • 모델 구축 테이블 생성
libname fs "D:\bigedu";/* 연령대 5단위로 구분 (변수 생성) */
DATA T200_AGE ;
SET FS.T200;
IF 0<=PAT_AGE<=5 THEN AGG = 1;
ELSE IF 6<=PAT_AGE<=10 THEN AGG = 2;
ELSE IF 11<=PAT_AGE<=15 THEN AGG = 3;
ELSE IF 16<=PAT_AGE<=20 THEN AGG = 4;
ELSE IF 21<=PAT_AGE<=25THEN AGG = 5;
ELSE IF 26<=PAT_AGE<=30THEN AGG = 6;
ELSE IF 31<=PAT_AGE<=35 THEN AGG = 7;
ELSE IF 36<=PAT_AGE<=40THEN AGG = 8;
ELSE IF 41<=PAT_AGE<=45 THEN AGG = 9;
ELSE IF 46<=PAT_AGE<=50 THEN AGG = 10;
ELSE IF 51<=PAT_AGE<=55 THEN AGG = 11;
ELSE IF 56<=PAT_AGE<=60 THEN AGG = 12;
ELSE IF 61<=PAT_AGE<=65 THEN AGG = 13;
ELSE IF 66<=PAT_AGE<=70 THEN AGG = 14;
ELSE IF 71<=PAT_AGE<=75 THEN AGG = 15;
ELSE IF 76<=PAT_AGE<=80 THEN AGG = 16;
ELSE IF 81<=PAT_AGE THEN AGG=17;
RUN;
/* 진료과별 입원환자 수 */
PROC SQL;
CREATE TABLE PRE_RATE AS
SELECT DGSBJT_CD, SUM(VST_DDCNT) AS OUT_CNT
FROM FS.T200
WHERE FOM_TP_CD IN ('021', '121') AND
SUBSTR(MAIN_SICK, 1, 3) in ('I63', 'I67', 'O22', 'O87')
GROUP BY DGSBJT_CD;
QUIT;
/* 4가지 주상병에 사용되는 약품 통계 (입원) */
PROC SQL;
CREATE TABLE MEDICINE AS
SELECT B.GNL_CD
, AVG(B.TOT_USE_QTY_OR_EXEC_FQ) AS AVG_USE
, AVG(B.TOT_INJC_DDCNT_EXEC_FQ) AS AVG_DUE
, AVG(B.DY1_INJC_QTY_EXEC_FQ) AS AVG_AMT
, SUM(B.TOT_INJC_DDCNT_EXEC_FQ) AS SUM_DUE
FROM FS.T200 AS A
LEFT JOIN FS.T300 AS B
ON A.MID = B.MID
WHERE SUBSTR(A.MAIN_SICK, 1, 3) IN ('I63', 'I67', 'O22', 'O87') AND
A.FOM_TP_CD IN ('021', '061')
GROUP BY B.GNL_CD;
QUIT;
/* 4가지 주상병에 사용되는 약품 통계 (입원) */
PROC SQL;
CREATE TABLE MEDICINE2 AS
SELECT B.GNL_CD
, AVG(B.TOT_USE_QTY_OR_EXEC_FQ) AS AVG_TOTAL_USE
, AVG(B.DY1_MDCT_QTY) AS AVG_1DAY_DOS
, AVG(B.TOT_INJC_DDCNT_EXEC_FQ) AS AVG_DUE
, SUM(B.TOT_INJC_DDCNT_EXEC_FQ) AS SUM_DUE
FROM FS.T200 AS A
LEFT JOIN FS.T530 AS B
ON A.MID = B.MID
WHERE SUBSTR(A.MAIN_SICK,1,3) IN('I63','I67','O22','O87') AND
A.FOM_TP_CD IN('021','061')
GROUP BY B.GNL_CD;
QUIT;
/* 4가지 주상병에 사용되는 약품 통계 (외래) */
PROC SQL;
CREATE TABLE MEDICINE2_2 AS
SELECT B.GNL_CD
, AVG(B.TOT_USE_QTY_OR_EXEC_FQ) AS AVG_TOTAL_USE
, AVG(B.DY1_MDCT_QTY) AS AVG_1DAY_DOS
, AVG(B.TOT_INJC_DDCNT_EXEC_FQ) AS AVG_DUE
, SUM(B.TOT_INJC_DDCNT_EXEC_FQ) AS SUM_DUE
FROM FS.T200 AS A
LEFT JOIN FS.T530 AS B
ON A.MID = B.MID
WHERE SUBSTR(A.MAIN_SICK,1,3) IN('I63','I67','O22','O87') AND
A.FOM_TP_CD IN('131')
GROUP BY B.GNL_CD;
QUIT;
/* 퇴원한 사람의 입내원 일수 */
PROC SQL;
CREATE TABLE OUT_JID AS
SELECT A.JID AS JID, A.VST_DDCNT AS DAY
FROM FS.T200 AS A
LEFT JOIN FS.T400 AS B
ON A.MID = B.MID
WHERE A.DGRSLT_TP_CD = '9' AND
SUBSTR(A.MAIN_SICK, 1, 3) IN ('I63', 'I67', 'O22', 'O87') AND B.SICK_SNO <= 3;
QUIT;
/* 2. 연도별(15~18)의 기초 통계량 */
/* 아래 코드는 Table 명을 2015, 2016, 2017, 2018로 바꿔주면서 실행 */
/* 아래 코드는 Data Table을 T200_15, T200_16, T200_17, T200_18로 바꿔주면서 실행 */
/* 연도별 주상병별 환자 수 카운트 */
PROC SQL;
CREATE TABLE MAIN_SICK_CNT_2018 AS
SELECT SUBSTR(MAIN_SICK,1,3) AS SICK, COUNT(JID) AS JID_CNT
FROM FS.T200_18
GROUP BY SICK
ORDER BY JID_CNT;
QUIT;
/* 연도별 4가지 코드로 입원한 사람의 약물복용 추세 */
PROC SQL;
CREATE TABLE MED_2018 AS
SELECT B.GNL_CD
,SUM(B.TOT_USE_QTY_OR_EXEC_FQ) AS SUM_USE
,AVG(B.TOT_INJC_DDCNT_EXEC_FQ) AS AVG_DUE
,SUM(B.DY1_INJC_QTY_EXEC_FQ) AS SUM_AMT
,SUM(B.TOT_INJC_DDCNT_EXEC_FQ) AS SUM_DUE
FROM FS.T200_18 AS A
LEFT JOIN FS.T300_18 AS B
ON A.MID = B.MID
WHERE SUBSTR(A.MAIN_SICK,1,3) IN ('I63','I67', 'O22','087') AND
A.FOM_TP_CD IN ('021','061');
GROUP BY B.GNL_CD
QUIT;
/* 연도별 4가지 코드로 외래를 본 환자 추세 파악 */
PROC SQL;
CREATE TABLE CVT_2018 AS
SELECT COUNT(JID) AS JID_CNT
FROM FS.T200_18
WHERE SUBSTR(MAIN_SICK,1,3) IN ('I63','I67', 'O22','087') AND FOM_TP_CD IN('031');
QUIT;
/* 연도별 뇌경색증 성별에 따른 환자 추세 파악 */
PROC SQL;
CREATE TABLE CVT_GEN_2018 AS
SELECT COUNT(JID) AS JID_CNT, SEX_TP_CD AS GENDER
FROM FS.T200_18
WHERE SUBSTR(MAIN_SICK,1,3) IN ('I63', 'I67', 'O22','O87') AND FOM_TP_CD IN ('031')
GROUP BY GENDER;
QUIT;
/* 연도별 뇌경색증 연령대에 따른 환자 추세 파악 */
PROC SQL;
CREATE TABLE CVT_AGG_2015 AS
SELECT COUNT(JID) AS JID_CNT, AGG AS AG
FROM WORK.T200_AGE
WHERE SUBSTR(MAIN_SICK, 1,3) IN ('I63','I67','O22','087') AND FOM_TP_CD IN ('031')
GROUP BY AGG;
QUIT;
/* 입원 중에 합병증이 발생한 사람 불러오기 -> JID가 발생한 사람을 알 수 있는 key point */
PROC SQL;
CREATE TABLE COM_SICK_ALL AS
SELECT A.JID AS JID, A.MID AS MID, A.MAIN_SICK AS SICK, B.SICK_SNO AS NUM
FROM FS.T200 AS A
LEFT JOIN FS.T400 AS B
ON A.MID = B.MID
WHERE FOM_TP_CD IN ('021', '121') AND
SUBSTR(A.MAIN_SICK, 1, 3) IN ('I60', 'I61', 'I62', 'I63') AND B.SICK_SNO <= 3;
QUIT;
/*전체 입원환자가 투약받은 약의 주성분 정보와 병명, 성별, 나이대 */
/* COM_SICK_ALL의 JID가 포함되어 있으면 합병증 있음, JID가 포함되어 있지 않으면 합병증 없음 */
/* I63, I67, O225, O873만 사용할 예정 */
/* 'O' 관련 코드는 O22, O87을 모두 추출 후, O225와 O873만 따로 추출할 것임 */
PROC SQL;
CREATE TABLE TOTAL_IN_PATIENT AS
SELECT B.JID AS JID, A.MID AS MID, B.MAIN_SICK AS SICK, A.GNL_CD AS MED_INFO,
B.SEX_TP_CD AS GENDER, B.AGG AS AGE
FROM FS.T300 AS A
LEFT JOIN WORK.T200_AGE AS B
ON A.MID = B.MID
WHERE B.FOM_TP_CD IN ('021','121') AND
SUBSTR(B.MAIN_SICK, 1, 3) IN ('I63','I67', 'O22','O87');
QUIT;
/* 투약받은 약의 사용량과 처방기간 */
PROC SQL;
CREATE TABLE TOTAL_IN_PATIENT_MED AS
SELECT B.JID AS JID, A.MID AS MID, A.TOT_USE_QTY_OR_EXEC_FQ AS TOT_USE ,
A.TOT_INJC_DDCNT_EXEC_FQ AS DUE
FROM FS.T300 AS A
LEFT JOIN WORK.TOTAL_IN_PATIENT AS B
ON A.MID = B.MID;
QUIT;

3. Data Analysis

> Target 주상병 코드

- [I636] : 대뇌정맥 혈전증에 의한 비화농성 뇌경색증

- [I676] : 두개내정맥계통의 비화농성 혈전증

- [O225] : 임신중 대뇌정맥혈전증

- [O873] : 산후기중 대뇌정맥혈전증

> 분석 과제

- I60, I61, I62이 주.부상병 3번째까지 들어가 발생하여 동반되었는지 조사

> 최종 목표

  • 사용한 약품과 다양한 Feature에 따라서 합병증이 발생하는지 안하는지 예측하는 분류 모델 개발
# library
library(tidyverse)
library(dplyr)
library(caret)
library(e1071)
library(randomForest)
library(ROCR)
library(xgboost)
# Data
setwd("/Users/doyun/Downloads/")
#
med1 <- read.csv("MEDICINE.csv")
med2 <- read.csv("MEDICINE2.csv")
med1 <- med1[-(1:2),]
med2 <- med2[-1,]
med1_use <- med1 %>%
arrange(desc(SUM_DUE))
med2_use <- med2 %>%
arrange(desc(SUM_DUE))
med1_due <- med1 %>%
arrange(desc(AVG_DUE))
med2_due <- med2 %>%
arrange(desc(AVG_DUE))
#
nrow(med1_use)
#
library(googleVis)
plot(gvisPieChart(med1_use[1:10,][,-(2:4)], options=list(
slices="{0: {offset: 0.3}}",
pieHole=0.4)))
plot(gvisPieChart(med2_use[1:10,][,-(2:4)], options=list(
slices="{0: {offset: 0.3}}",
pieHole=0.4)))
plot(gvisPieChart(med1_use[,-(2:4)]))
plot(gvisPieChart(med2_use[,-(2:4)]))
#
ggplot(med1_due[1:5,][,-2][,-(3:4)], aes(x = reorder(GNL_CD, -AVG_DUE), y = AVG_DUE, fill = GNL_CD)) +
geom_bar(stat = "identity", color = "black") +
scale_fill_manual(values = c("#50C2FF", "#D2E1FF", "#E8F5FF", "#78EAFF", "#1E90FF")) +
theme_bw() + theme(
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
panel.background = element_blank(),
plot.background = element_blank()
)
ggplot(med2_due[1:5,][,-(2:3)][,-3], aes(x = reorder(GNL_CD, -AVG_DUE), y = AVG_DUE, fill = GNL_CD)) +
geom_bar(stat = "identity", color = "black") +
scale_fill_manual(values = c("#1E90FF", "#50C2FF", "#78EAFF", "#E8F5FF", "#D2E1FF")) +
theme_bw() + theme(
panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
panel.background = element_blank(),
plot.background = element_blank()
)
#
med_code <- read.csv("주성분코드_최종.csv")
med_code$주성분코드
loca1 <- c()
for (i in med_code$주성분코드){
loca1 <- c(loca1, paste(which(med1$GNL_CD == i), "/", i))
}
loca2 <- c()
for (i in med_code$주성분코드){
loca2 <- c(loca2, paste(which(med2$GNL_CD == i), "/", i))
}
# 300
loca1
# 530
loca2

4. Modeling

  • 생성한 모델 : DT, XGBoost, Neural Network, Naïve Bayes Classification, SVM, Logistic Regression, Random Forest
  • 가장 성능이 좋았던 XGBoost 모델 채택
  • 데이터 불균형 문제로 Up-Sampling 진행 후, 모델 구축
  • 모델 평가 방법 : Confusion Matrix, ROC Curve
  • AUC : 0.8635
  • 표시되어 있는 약품은 실제로 많이 처방하지만 표시가 되어있지 않은 약품 또한 많이 처방되지 않아도 합병증 발생 여부에 큰 영향을 주는 것을 알 수 있었음
# Locale 설정 
Sys.setlocale('LC_ALL','C')
Sys.setlocale(category = "LC_ALL", locale = "ko_KR.UTF-8")
# library
library(tidyverse)
library(dplyr)
library(caret)
library(e1071)
library(randomForest)
library(ROCR)
library(xgboost)
# Data
setwd("/Users/doyun/Downloads/")
data <- read.csv("hira.csv")
# ------------------------------------------------------------------# NA 값 처리
data[is.na(data)] <- 0
# Data summary
str(data)
summary(data)
nrow(data)
# 형변환
data$GENDER <- as.factor(data$GENDER)
data$HB_YN <- as.factor(data$HB_YN)
# col_name <- colnames(data[6:46])
# data_1 <- lapply(data[col_name], as.factor)
# data_1 <- cbind(data[,1:5], data_1)
# data <- data_1
# 입내원 일수가 0인것 빼고 불러오기
data <- subset(data, data$DAY != 0)
nrow(data)
# 95% vs 5%
table(data$HB_YN)
#one_hot <- transform(data, hb_yes = ifelse(HB_YN == 1, 1, 0),
# hb_no = ifelse(HB_YN== 0, 1, 0))
#one_hot$HB_YN <- NULL
# ------------------------------------------------------------------
# ------------------------------------------------------------------
# Model - Sampling 안했을 때
index <- createDataPartition(y = data$HB_YN, p = 0.7, list = FALSE)
train <- data[index, ]
test <- data[-index, ]
set.seed(1234)
model <- train(HB_YN ~ ., data = train, method = "glm")
pred <- predict(model, newdata = test)
confusionMatrix(pred, test$HB_YN)
model_rf <- randomForest(HB_YN ~ ., data = train)pred <- predict(model_rf, newdata = test)
confusionMatrix(pred, test$HB_YN)
# ------------------------------------------------------------------# Model (Down Sampling) [GLM]
x <- downSample(subset(data, select=-HB_YN), data$HB_YN)
table(x$Class)
index <- createDataPartition(y = x$Class, p = 0.7, list = FALSE)
train_down <- x[index, ]
test_down <- x[-index, ]
set.seed(1234)
model <- train(Class ~ ., data = train, method = "glm")
pred <- predict(model, newdata = test)
confusionMatrix(pred, test$Class)
# ------------------------------------------------------------------# Model (Up Sampling) [GLM]
ups <- upSample(subset(data, select=-HB_YN), data$HB_YN)
table(y$Class)
index <- createDataPartition(y = y$Class, p = 0.7, list = FALSE)
train_up <- y[index, ]
test_up <- y[-index, ]
set.seed(1234)
model <- train(Class ~ ., data = train, method = "glm")
pred <- predict(model, newdata = test)
confusionMatrix(pred, test$Class)
summary(model)
# ------------------------------------------------------------------# Model (Up Sampling) - 1 [GLM]
model_1 <- train(Class ~ GENDER + AGE + DAY + DRUG_110801ATB + DRUG_111001ACE + DRUG_111001ATE + DRUG_133202ATB + DRUG_136901ATB +
DRUG_152103BIJ + DRUG_168602BIJ + DRUG_198403BIJ + DRUG_492501ATB + DRUG_498801ATB + DRUG_498900ATB +
DRUG_506100ATB + DRUG_511401ATB + DRUG_511402ATB + DRUG_511403ATB + DRUG_517900ACH + DRUG_617001ATB + DRUG_617002ATB
, data = train_up, method = "glm")
summary(model_1)
pred <- predict(model_1, newdata = test)
confusionMatrix(pred, test$Class)
# ------------------------------------------------------------------# Model (Up Sampling) - 1 [RF] = 70%
model_rf_1 <- randomForest(Class ~ ., data = train_up)
pred <- predict(model_rf_1, newdata = test_up, type = "response")
confusionMatrix(pred, test_up$Class)
pr <- prediction(pred, test_up$Class)
prf <- performance(pr, measure = "tpr", x.measure = "fpr")
win.graph(); plot(prf, main='ROC of Test Data')
# ----------------------------------------------------------------------------------------------------------------------------------
# Model (Down Sampling) - 2 [RF] = 63.7%
model_rf_2 <- randomForest(Class ~ ., data = train_down)
pred <- predict(model_rf_2, newdata = test_down)
confusionMatrix(pred, test_down$Class)
pr <- prediction(pred, test$vote)
prf <- performance(pr, measure = "tpr", x.measure = "fpr")
win.graph(); plot(prf, main='ROC of Test Data')
# ------------------------------------------------------------------# Model (Up Sampling) - 3 [RF] = 67.3%
model_rf_3 <- randomForest(Class ~ GENDER + AGE + DAY + DRUG_110801ATB + DRUG_111001ACE + DRUG_111001ATE + DRUG_133202ATB + DRUG_136901ATB +
DRUG_152103BIJ + DRUG_168602BIJ + DRUG_198403BIJ + DRUG_492501ATB + DRUG_498801ATB + DRUG_498900ATB +
DRUG_506100ATB + DRUG_511401ATB + DRUG_511402ATB + DRUG_511403ATB + DRUG_517900ACH + DRUG_617001ATB + DRUG_617002ATB
, data = train_up)
pred <- predict(model_rf_3, newdata = test_up)
confusionMatrix(pred, test_up$Class)
# ------------------------------------------------------------------
# Model (Up Sampling) - 1 [xgboost]
ups <- upSample(subset(data, select=-HB_YN), data$HB_YN)
set.seed(42)
row <- sample(nrow(ups))
ups_shu <- ups[row,]
index <- createDataPartition(y = ups_shu$Class, p = 0.7, list = FALSE)
train_up <- ups_shu[index, ]
test_up <- ups_shu[-index, ]
x = train_up %>%
select(-Class) %>%
data.matrix
y = as.numeric(train_up$Class)
model_xg_1 <- xgboost(data = x, label = y - 1,
max.depth = 15, eta = 0.3, nthread = 4, nrounds = 100, objective = "binary:logistic", prediction = T)
test_x = test_up %>%
select(-Class) %>%
data.matrix()
test_y = as.numeric(test_up$Class)
pred <- predict(model_xg_1, test_x)
prediction <- as.numeric(pred > 0.5)
print(head(prediction))
err <- mean(as.numeric(pred > 0.5) != test_y - 1)
print(paste("test accuracy = ", 1 - err))
caret::confusionMatrix(as.factor(prediction), as.factor(test_y - 1))pr <- prediction(pred, test_y - 1)
prf <- performance(pr, measure = "tpr", x.measure = "fpr")
plot(prf, main='ROC of XGBoost : AUC - 0.86354')
# AUC = 0.8635 (민감도와 특이도 - 1을 1이라고, 0을 0이라고)
# x - TPR (민감도) [양성율], y - FPR (1-특이도) [위양성율]
auc <- performance(pr, measure = "auc")
auc <- auc@y.values[[1]]; auc
plot.roc
#
importance <- xgb.importance(model = model_xg_1)
print(xgb.plot.importance(importance_matrix = importance))
#
tree_result <- xgb.model.dt.tree(model = model_xg_1)
# ------------------------------------------------------------------
#
x_1 <- train_up %>%
select(-Class, -DAY, -AVG_DDCNT) %>%
data.matrix
model_xg_2 <- xgboost(data = x_1, label = y - 1,
max.depth = 15, eta = 0.3, nthread = 4, nrounds = 100, objective = "binary:logistic", prediction = T)
test_x_1 = test_up %>%
select(-Class, -DAY, -AVG_DDCNT) %>%
data.matrix()
test_y_1 = as.numeric(test_up$Class)
pred_1 <- predict(model_xg_2, test_x_1)
prediction_1 <- as.numeric(pred_1 > 0.5)
print(head(prediction))
err1 <- mean(as.numeric(pred_1 > 0.5) != test_y_1 - 1)
print(paste("test accuracy = ", 1 - err1))
caret::confusionMatrix(as.factor(prediction_1), as.factor(test_y_1 - 1))pr1 <- prediction(pred_1, test_y_1 - 1)
prf1 <- performance(pr1, measure = "tpr", x.measure = "fpr")
plot(prf1, main='ROC of XGBoost : AUC - 0.7465')
# AUC = 0.8635 (민감도와 특이도 - 1을 1이라고, 0을 0이라고)
# x - TPR (민감도) [양성율], y - FPR (1-특이도) [위양성율]
auc1 <- performance(pr1, measure = "auc")
auc1 <- auc1@y.values[[1]]; auc1
#
importance <- xgb.importance(model = model_xg_2)
print(xgb.plot.importance(importance_matrix = importance))
# ------------------------------------------------------------------
set.seed(42)
row <- sample(nrow(ups))
ups_shu <- ups[row,]
# Model (Up Sampling) - 2 [xgboost]
x = ups_shu %>%
select(-Class) %>%
data.matrix
y = as.numeric(ups_shu$Class)
model_xg_cv <- xgb.cv(data = x, label = y - 1,
nfold = 10, nrounds = 200, early_stopping_rounds = 150, eval_metric = "error",
objective = "binary:logistic", verbose = T, prediction = T)
pred_df = model_xg_cv$pred %>% as.data.frame %>%
mutate(pred = levels(y)[max.col(.)] %>% as.factor,actual = y)
pred_df %>% select(pred, actual) %>% table
caret::confusionMatrix(pred_df$pred, pred_df$actual)
cvplot = function(model){
eval.log = model$evaluation_log
std = names(eval.log[,2]) %>% gsub('train_','',.) %>% gsub('_mean','',.)

data.frame(error = c(unlist(eval.log[.2]), unlist(eval.log[,4])),
class = c(rep('train', nrow(eval.log)),
rep('test', nrow(eval.log))),
nround = rep(1:nrow(eval.log), 2)
) %>%
ggplot(aes(nround, error, col = class)) +
geom_point(alpha = 0.2) +
geom_smooth(alpha = 0.4, se = F) +
theme_bw() +
ggtitle("XGBoost Cross-validation",
subtitle = paste0('fold : ', length(model$folds),
' iteration : ', model$niter)
) + ylab(std) + theme(axis.title = element_text(size = 11))
}
cvplot(model_xg_cv)

5. Conclusion

--

--