生存资料校准曲线calibration curve的绘制
生活随笔
收集整理的這篇文章主要介紹了
生存资料校准曲线calibration curve的绘制
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
本文首發于公眾號:醫學和生信筆記
“醫學和生信筆記,專注R語言在臨床醫學中的使用,R語言數據分析和可視化。主要分享R語言做醫學統計學、meta分析、網絡藥理學、臨床預測模型、機器學習、生物信息學等。
前面我們已經講過logistic模型的校準曲線的畫法,這次我們學習生存資料的校準曲線畫法。
- 加載R包和數據
- calibration 方法1
- calibration 方法2
加載R包和數據
library(survival)library(rms)
##?Loading?required?package:?Hmisc
##?Loading?required?package:?lattice
##?Loading?required?package:?Formula
##?Loading?required?package:?ggplot2
##?
##?Attaching?package:?'Hmisc'
##?The?following?objects?are?masked?from?'package:base':
##?
##?????format.pval,?units
##?Loading?required?package:?SparseM
##?
##?Attaching?package:?'SparseM'
##?The?following?object?is?masked?from?'package:base':
##?
##?????backsolve
試試使用自帶數據lung數據集進行演示。
大多數情況下都是使用1代表死亡,0代表刪失,這個數據集用2代表死亡。但有的R包會報錯,需要注意!
rm(list?=?ls())dim(lung)
##?[1]?228??10
str(lung)
##?'data.frame':?228?obs.?of??10?variables:
##??$?inst?????:?num??3?3?3?5?1?12?7?11?1?7?...
##??$?time?????:?num??306?455?1010?210?883?...
##??$?status???:?num??2?2?1?2?2?1?2?2?2?2?...
##??$?age??????:?num??74?68?56?57?60?74?68?71?53?61?...
##??$?sex??????:?num??1?1?1?1?1?1?2?2?1?1?...
##??$?ph.ecog??:?num??1?0?0?1?0?1?2?2?1?2?...
##??$?ph.karno?:?num??90?90?90?90?100?50?70?60?70?70?...
##??$?pat.karno:?num??100?90?90?60?90?80?60?80?80?70?...
##??$?meal.cal?:?num??1175?1225?NA?1150?NA?...
##??$?wt.loss??:?num??NA?15?15?11?0?0?10?1?16?34?...
library(dplyr)
##?
##?Attaching?package:?'dplyr'
##?The?following?objects?are?masked?from?'package:Hmisc':
##?
##?????src,?summarize
##?The?following?objects?are?masked?from?'package:stats':
##?
##?????filter,?lag
##?The?following?objects?are?masked?from?'package:base':
##?
##?????intersect,?setdiff,?setequal,?union
library(tidyr)
df1?<-?lung?%>%?
??mutate(status=ifelse(status?==?1,1,0))
str(lung)
##?'data.frame':?228?obs.?of??10?variables:
##??$?inst?????:?num??3?3?3?5?1?12?7?11?1?7?...
##??$?time?????:?num??306?455?1010?210?883?...
##??$?status???:?num??2?2?1?2?2?1?2?2?2?2?...
##??$?age??????:?num??74?68?56?57?60?74?68?71?53?61?...
##??$?sex??????:?num??1?1?1?1?1?1?2?2?1?1?...
##??$?ph.ecog??:?num??1?0?0?1?0?1?2?2?1?2?...
##??$?ph.karno?:?num??90?90?90?90?100?50?70?60?70?70?...
##??$?pat.karno:?num??100?90?90?60?90?80?60?80?80?70?...
##??$?meal.cal?:?num??1175?1225?NA?1150?NA?...
##??$?wt.loss??:?num??NA?15?15?11?0?0?10?1?16?34?...
calibration 方法1
dd?<-?datadist(df1)options(datadist?=?"dd")
構建cox比例風險模型:
#?1年coxfit1?<-?cph(Surv(time,?status)?~?age?+?sex?+?ph.ecog?+?ph.karno?+?pat.karno,
??????????????data?=?df1,?x=T,y=T,surv?=?T,
??????????????time.inc?=?365?#?1?年
??????????????)
#?m=50表示每次計算50個樣本,一般取4-6個點,u=365和上面的time.inc對應
cal1?<-?calibrate(coxfit1,?cmethod="KM",?method="boot",u=365,m=50,B=500)?
##?Using?Cox?survival?estimates?at?365?Days
然后就是畫圖:
plot(cal1,?????#lwd?=?2,?#?誤差線粗細
?????lty?=?0,?#?誤差線類型,可選0-6
?????errbar.col?=?c("#2166AC"),?#?誤差線顏色
?????xlim?=?c(0.4,1),ylim=?c(0.4,1),
?????xlab?=?"Nomogram-prediced?OS?(%)",ylab?=?"Observed?OS?(%)",
?????cex.lab=1.2,?cex.axis=1,?cex.main=1.2,?cex.sub=0.6)?#?字體大小
lines(cal1[,c('mean.predicted',"KM")],?
??????type?=?'b',?#?連線的類型,可以是"p","b","o"
??????lwd?=?3,?#?連線的粗細
??????pch?=?16,?#?點的形狀,可以是0-20
??????col?=?"tomato")?#?連線的顏色
box(lwd?=?2)?#?邊框粗細
abline(0,1,lty?=?3,?#?對角線為虛線
???????lwd?=?2,?#?對角線的粗細
???????col?=?"grey70"?#?對角線的顏色
???????)?
unnamed-chunk-6-147478960
再介紹一下多個校正曲線圖形畫在一起的方法。
#?2年coxfit2?<-?cph(Surv(time,?status)?~?age?+?sex?+?ph.ecog?+?ph.karno?+?pat.karno,
??????????????data?=?df1,?x=T,y=T,surv?=?T,
??????????????time.inc?=?730?#?2?年
??????????????)
cal2?<-?calibrate(coxfit2,?cmethod="KM",?method="boot",u=730,m=50,B=500)?
##?Using?Cox?survival?estimates?at?730?Days
畫圖:
plot(cal1,lwd?=?2,lty?=?0,errbar.col?=?c("#2166AC"),?????xlim?=?c(0,1),ylim=?c(0,1),
?????xlab?=?"Nomogram-prediced?OS?(%)",ylab?=?"Observed?OS?(%)",
?????col?=?c("#2166AC"),
?????cex.lab=1.2,cex.axis=1,?cex.main=1.2,?cex.sub=0.6)
lines(cal1[,c('mean.predicted',"KM")],
??????type?=?'b',?lwd?=?3,?col?=?c("#2166AC"),?pch?=?16)
plot(cal2,lwd?=?2,lty?=?0,errbar.col?=?c("#B2182B"),
?????xlim?=?c(0,1),ylim=?c(0,1),col?=?c("#B2182B"),add?=?T)
lines(cal2[,c('mean.predicted',"KM")],
??????type?=?'b',?lwd?=?3,?col?=?c("#B2182B"),?pch?=?16)
abline(0,1,?lwd?=?2,?lty?=?3,?col?=?c("#224444"))
legend("bottomright",?#圖例的位置
???????legend?=?c("5-year","8-year"),?#圖例文字
???????col?=c("#2166AC","#B2182B"),?#圖例線的顏色,與文字對應
???????lwd?=?2,#圖例中線的粗細
???????cex?=?1.2,#圖例字體大小
???????bty?=?"n")#不顯示圖例邊框
unnamed-chunk-8-147478960
calibration 方法2
不過這種方法是把多個模型放在一張圖上,不是同一個模型對應多個時間點。
這種方法不能有缺失值。
#?刪除缺失值df2?<-?na.omit(df1)
library(survival)
#?構建模型
cox_fit1?<-?coxph(Surv(time,?status)?~?age?+?sex?+?ph.ecog?+?ph.karno?+?pat.karno,
??????????????data?=?df2,x?=?T,?y?=?T)
cox_fit2?<-?coxph(Surv(time,?status)?~?age?+?ph.ecog?+?ph.karno,
??????????????data?=?df2,x?=?T,?y?=?T)
#?畫圖
library(riskRegression)
##?riskRegression?version?2022.03.22
set.seed(1)
cox_fit_s?<-?Score(list("fit1"?=?cox_fit1,
????????????????????????"fit2"?=?cox_fit2),
???????????????formula?=?Surv(time,?status)?~?1,
???????????????data?=?df2,
???????????????#metrics?=?c("auc","brier"),
???????????????#summary?=?c("risks","IPA","riskQuantile","ibs"),
???????????????plots?=?"calibration",
???????????????#null.model?=?T,
???????????????conf.int?=?T,
???????????????B?=?500,
???????????????M?=?50,
???????????????times=c(700)?#?limit?the?time?range
???????????????)
plotCalibration(cox_fit_s,
????????????????xlab?=?"Predicted?Risk",
????????????????ylab?=?"Observerd?RISK")
##?The?default?method?for?estimating?calibration?curves?based?on?censored?data?has?changed?for?riskRegression?version?2019-9-8?or?higher
##?Set?cens.method="jackknife"?to?get?the?estimate?using?pseudo-values.
##?However,?note?that?the?option?"jackknife"?is?sensititve?to?violations?of?the?assumption?that?the?censoring?is?independent?of?both?the?event?times?and?the?covariates.
##?Set?cens.method="local"?to?suppress?this?message.
當然也是可以用ggplot2畫圖的。
#?獲取數據data_all?<-?plotCalibration(cox_fit_s,plot?=?F)
##?The?default?method?for?estimating?calibration?curves?based?on?censored?data?has?changed?for?riskRegression?version?2019-9-8?or?higher
##?Set?cens.method="jackknife"?to?get?the?estimate?using?pseudo-values.
##?However,?note?that?the?option?"jackknife"?is?sensititve?to?violations?of?the?assumption?that?the?censoring?is?independent?of?both?the?event?times?and?the?covariates.
##?Set?cens.method="local"?to?suppress?this?message.
#?數據轉換
plot_df?<-?bind_rows(data_all$plotFrames)?%>%?
??mutate(fits?=?rep(c("fit1","fit2"),c(56,48)))
#?畫圖
ggplot(plot_df,?aes(Pred,Obs))+
??geom_line(aes(group=fits,color=fits),size=1.2)+
??scale_color_manual(values?=?c("#2166AC","tomato"),name=NULL)+
??scale_x_continuous(limits?=?c(0,1),name?=?"Predicted?Risk")+
??scale_y_continuous(limits?=?c(0,1),name?=?"Observerd?Risk")+
??geom_abline(slope?=?1,intercept?=?0,lty=2)+
??geom_rug(aes(color=fits))+
??theme_bw()
unnamed-chunk-12-147478960
本文首發于公眾號:醫學和生信筆記
“醫學和生信筆記,專注R語言在臨床醫學中的使用,R語言數據分析和可視化。主要分享R語言做醫學統計學、meta分析、網絡藥理學、臨床預測模型、機器學習、生物信息學等。
本文由 mdnice 多平臺發布
總結
以上是生活随笔為你收集整理的生存资料校准曲线calibration curve的绘制的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 崭新的诺基亚:诺记能否借Windows
- 下一篇: FISCO BCOS工程师常用的性能分析