### 12 March 2009 ### ### this example requires "sda" in version 1.1.0 (or later) ### library("sda") ############################################# # Singh et al. (2002) gene expression data data(singh2002) Xtrain = singh2002$x Ytrain = singh2002$y dim(Xtrain) # 102 6033 length(Ytrain) # 102 ############################################## # feature ranking (diagonal covariance, DDA) # ############################################## # ranking using t-scores (DDA) ranking.DDA = sda.ranking(Xtrain, Ytrain, diagonal=TRUE) ranking.DDA[1:10,] # plot t-scores for the top 40 genes plot(ranking.DDA, top=40) # number of features with local FDR < 0.8 # (i.e. features useful for prediction) sum(ranking.DDA[,"lfdr"] < 0.8) # 166 # number of features with local FDR < 0.2 # (i.e. significant non-null features) sum(ranking.DDA[,"lfdr"] < 0.2) # 53 # optimal feature set according to HC score plot(ranking.DDA[,"HC"], type="l") which.max( ranking.DDA[1:1000,"HC"] ) #129 ########################################## # feature ranking (full covariance, LDA) # ########################################## # ranking using cat-scores (LDA) ranking.LDA = sda.ranking(Xtrain, Ytrain, diagonal=FALSE) ranking.LDA[1:10,] # plot cat scores for the top 40 genes plot(ranking.LDA, top=40) # number of features with local FDR < 0.8 # (i.e. features useful for prediction) sum(ranking.LDA[,"lfdr"] < 0.8) # 131 # number of features with local FDR < 0.2 # (i.e. significant non-null features) sum(ranking.LDA[,"lfdr"] < 0.2) # 62 # optimal feature set according to HC score plot(ranking.LDA[,"HC"], type="l") which.max( ranking.LDA[1:1000,"HC"] ) # 116