### 12 March 2009 ### ### this example requires "sda" in version 1.1.0 (or later) ### library("sda") ############################################# data(khan2001) # create data set containing only the SRBCT samples del.idx = which( khan2001$y == "non-SRBCT" ) srbct.x = khan2001$x[-del.idx,] srbct.y = factor(khan2001$y[-del.idx]) dim(srbct.x) levels(srbct.y) # divide into training and test data Xtrain = srbct.x[1:63,] Ytrain = srbct.y[1:63] Xtest = srbct.x[64:83,] Ytest = srbct.y[64:83] ############################################## ## DDA analysis ############################## ############################################## ### step 1 - feature ranking ra = sda.ranking(Xtrain, Ytrain, fdr=TRUE, plot.fdr=TRUE, diagonal=TRUE) sum( ra[, "lfdr"]< 0.80) # 90 genes included in classifier (by FNDR control) which.max( ra[, "HC"] ) # 138 genes according to HC criterion plot(ra, top=90) # pick the top 90 genes idx = ra[1:90,"idx"] Xtrain2 = Xtrain[,idx] Xtest2 = Xtest[,idx] ### step 2 - training the classifier sda.fit = sda(Xtrain2, Ytrain, diagonal=TRUE) ### step 3 - prediction predict(sda.fit, Xtest2) ynew = predict(sda.fit, Xtest2)$class sum(ynew != Ytest) # 0 ############################################## ## LDA analysis ############################## ############################################## ### step 1 - feature ranking ra = sda.ranking(Xtrain, Ytrain, fdr=TRUE, plot.fdr=TRUE) sum( ra[, "lfdr"]< 0.80) # 89 genes included in classifier (by FNDR control) which.max( ra[, "HC"] ) # 174 genes according to HC criterion plot(ra, top=89) # pick the top 89 genes idx = ra[1:89,"idx"] Xtrain2 = Xtrain[,idx] Xtest2 = Xtest[,idx] ### step 2 - training the classifier sda.fit = sda(Xtrain2, Ytrain) ### step 3 - prediction predict(sda.fit, Xtest2) ynew = predict(sda.fit, Xtest2)$class sum(ynew != Ytest) # 0