00001
00002
00003 import pickle
00004 import numpy as np
00005 from scipy import interp
00006 import pylab as pl
00007
00008 from sklearn import preprocessing as pps, svm
00009 from sklearn.metrics import roc_curve, auc
00010 from sklearn.cross_validation import StratifiedKFold, LeaveOneOut
00011
00012 with open('../data/svm_data.pkl', 'rb') as f:
00013 svm_data = pickle.load(f)
00014 labels = svm_data['labels']
00015 data = svm_data['data']
00016
00017 scaler = pps.Scaler().fit(data)
00018 print "Mean: ", scaler.mean_
00019 print "Std: ", scaler.std_
00020 data_scaled = scaler.transform(data)
00021
00022 classifier = svm.SVC(probability=True)
00023 classifier.fit(data_scaled, labels)
00024
00025
00026 print "SV's per class: \r\n", classifier.n_support_
00027
00028
00029
00030
00031 X, y = data_scaled, np.array(labels)
00032 n_samples, n_features = X.shape
00033 print n_samples, n_features
00034
00035
00036
00037
00038 cv = StratifiedKFold(y, k=9)
00039
00040 mean_tpr = 0.0
00041 mean_fpr = np.linspace(0, 1, n_samples)
00042 all_tpr = []
00043
00044 for i, (train, test) in enumerate(cv):
00045 probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test])
00046
00047 fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1])
00048 mean_tpr += interp(mean_fpr, fpr, tpr)
00049 mean_tpr[0] = 0.0
00050 roc_auc = auc(fpr, tpr)
00051 pl.plot(fpr, tpr, '--', lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc))
00052
00053 pl.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck')
00054
00055 mean_tpr /= len(cv)
00056 mean_tpr[-1] = 1.0
00057 mean_auc = auc(mean_fpr, mean_tpr)
00058 pl.plot(mean_fpr, mean_tpr, 'k-', lw=3,
00059 label='Mean ROC (area = %0.2f)' % mean_auc)
00060
00061 pl.xlim([0, 1])
00062 pl.ylim([0, 1])
00063 pl.xlabel('False Positive Rate')
00064 pl.ylabel('True Positive Rate')
00065 pl.title('Receiver Operating Characteristic')
00066 pl.legend(loc="lower right")
00067 pl.show()