wouse: svm_ROC.py Source File

Go to the documentation of this file.
00001 #!/usr/bin/env python
00002 
00003 import pickle
00004 import numpy as np
00005 from scipy import interp
00006 import pylab as pl
00007 
00008 from sklearn import preprocessing as pps, svm
00009 from sklearn.metrics import roc_curve, auc
00010 from sklearn.cross_validation import StratifiedKFold, LeaveOneOut
00011 
00012 with open('../data/svm_data.pkl', 'rb') as f:
00013     svm_data = pickle.load(f)
00014 labels = svm_data['labels']
00015 data = svm_data['data']
00016 
00017 scaler = pps.Scaler().fit(data)
00018 print "Mean: ", scaler.mean_
00019 print "Std: ", scaler.std_
00020 data_scaled = scaler.transform(data)
00021 
00022 classifier = svm.SVC(probability=True)
00023 classifier.fit(data_scaled, labels)
00024 
00025 #print "Support Vectors: \r\n", classifier.support_vectors_
00026 print "SV's per class: \r\n", classifier.n_support_
00027 
00028 
00029 ###############################################################################
00030 ## Code below modified from http://scikit-learn.org/stable/auto_examples/plot_roc_crossval.html#example-plot-roc-crossval-py
00031 X, y = data_scaled, np.array(labels)
00032 n_samples, n_features = X.shape
00033 print n_samples, n_features
00034 
00035 ###############################################################################
00036 # Classification and ROC analysis
00037 # Run classifier with crossvalidation and plot ROC curves
00038 cv = StratifiedKFold(y, k=9)
00039 
00040 mean_tpr = 0.0
00041 mean_fpr = np.linspace(0, 1, n_samples)
00042 all_tpr = []
00043 
00044 for i, (train, test) in enumerate(cv):
00045     probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test])
00046     # Compute ROC curve and area the curve
00047     fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1])
00048     mean_tpr += interp(mean_fpr, fpr, tpr)
00049     mean_tpr[0] = 0.0
00050     roc_auc = auc(fpr, tpr)
00051     pl.plot(fpr, tpr, '--', lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc))
00052 
00053 pl.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck')
00054 
00055 mean_tpr /= len(cv)
00056 mean_tpr[-1] = 1.0
00057 mean_auc = auc(mean_fpr, mean_tpr)
00058 pl.plot(mean_fpr, mean_tpr, 'k-', lw=3,
00059         label='Mean ROC (area = %0.2f)' % mean_auc)
00060 
00061 pl.xlim([0, 1])
00062 pl.ylim([0, 1])
00063 pl.xlabel('False Positive Rate')
00064 pl.ylabel('True Positive Rate')
00065 pl.title('Receiver Operating Characteristic')
00066 pl.legend(loc="lower right")
00067 pl.show()