00001 package edu.tum.cs.clustering;
00002 import weka.clusterers.*;
00003 import weka.core.*;
00004 import java.util.*;
00005 import java.math.*;
00006
00012 public interface ClusterNamer<Cl extends Clusterer> {
00013 public String[] getNames(Cl clusterer) throws Exception;
00014
00019 public static class SimplePrefix implements ClusterNamer<Clusterer> {
00020 protected String prefix;
00021
00022 public SimplePrefix(String prefix) {
00023 this.prefix = prefix;
00024 }
00025
00026 public String[] getNames(Clusterer clusterer) throws Exception {
00027 int n = clusterer.numberOfClusters();
00028 String[] names = new String[n];
00029 for(Integer i = 0; i < n; i++)
00030 names[i] = prefix + i.toString();
00031 return names;
00032 }
00033 }
00034
00039 public static class Fixed implements ClusterNamer<Clusterer> {
00040 protected String[] names;
00041
00042 public Fixed(String[] names) {
00043 this.names = names;
00044 }
00045
00046 public String[] getNames(Clusterer clusterer) throws Exception {
00047 if(clusterer.numberOfClusters() != names.length)
00048 throw new Exception("Number of clusters does not match number of names.");
00049 return names;
00050 }
00051 }
00052
00058 public static class MeanStdDev implements ClusterNamer<SimpleKMeans> {
00059 public String[] getNames(SimpleKMeans clusterer) {
00060 int numClusters = clusterer.getNumClusters();
00061 String[] ret = new String[numClusters];
00062 Instances centroids = clusterer.getClusterCentroids();
00063 Instances stdDevs = clusterer.getClusterStandardDevs();
00064 for(int i = 0; i < numClusters; i++)
00065 ret[i] = String.format("~%.2f +/- %.2f", centroids.instance(i).value(0), stdDevs.instance(i).value(0));
00066 return ret;
00067 }
00068 }
00069
00076 public static class Intervals implements ClusterNamer<SimpleKMeans> {
00085 public static double getIntersection(double e1, double s1, double e2, double s2) {
00086 if(s2 == 0)
00087 return e2;
00088 if(s1 == s2)
00089 return (e1 + e2) / 2;
00090 double r1 = 1.0/2/(s1*s1-s2*s2)*(2*s1*s1*e2-2*s2*s2*e1+2*Math.sqrt(-2*s1*s1*e2*s2*s2*e1+s1*s1*s2*s2*e1*e1-2*s1*s1*s1*s1*Math.log(s2/s1)*s2*s2+s2*s2*s1*s1*e2*e2+2*s2*s2*s2*s2*Math.log(s2/s1)*s1*s1));
00091 if((e1 <= r1 && r1 <= e2) || (e2 <= r1 && r1 <= e1))
00092 return r1;
00093 double r2 = 1.0/2/(s1*s1-s2*s2)*(2*s1*s1*e2-2*s2*s2*e1-2*Math.sqrt(-2*s1*s1*e2*s2*s2*e1+s1*s1*s2*s2*e1*e1-2*s1*s1*s1*s1*Math.log(s2/s1)*s2*s2+s2*s2*s1*s1*e2*e2+2*s2*s2*s2*s2*Math.log(s2/s1)*s1*s1));
00094 return r2;
00095 }
00096
00097 public String[] getNames(SimpleKMeans clusterer) {
00098 int numClusters = clusterer.getNumClusters();
00099 String[] ret = new String[numClusters];
00100 double[] centroids = clusterer.getClusterCentroids().attributeToDoubleArray(0);
00101 double[] stdDevs = clusterer.getClusterStandardDevs().attributeToDoubleArray(0);
00102 double[] sortedCentroids = centroids.clone();
00103 Arrays.sort(sortedCentroids);
00104 int[] sortOrder = new int[numClusters];
00105 for(int i = 0; i < numClusters; i++)
00106 for(int j = 0; j < numClusters; j++)
00107 if(centroids[j] == sortedCentroids[i])
00108 sortOrder[i] = j;
00109 boolean lastNoInterv = false;
00110 for(int i = 0; i < numClusters; i++) {
00111 int idx = sortOrder[i];
00112 if(stdDevs[idx] == 0.0) {
00113 ret[idx] = String.format("%.2f", centroids[idx]);
00114 continue;
00115 }
00116 if(i == 0) {
00117 ret[idx] = String.format("< %.2f (~%.2f)", getIntersection(centroids[idx], stdDevs[idx], centroids[sortOrder[1]], stdDevs[sortOrder[1]]), centroids[idx]);
00118 continue;
00119 }
00120 if(i == numClusters-1) {
00121 ret[idx] = String.format("> %.2f (~%.2f)", getIntersection(centroids[idx], stdDevs[idx], centroids[sortOrder[i-1]], stdDevs[sortOrder[i-1]]), centroids[idx]);
00122 continue;
00123 }
00124 double left = getIntersection(centroids[idx], stdDevs[idx], centroids[sortOrder[i-1]], stdDevs[sortOrder[i-1]]);
00125 double right = getIntersection(centroids[idx], stdDevs[idx], centroids[sortOrder[i+1]], stdDevs[sortOrder[i+1]]);
00126 ret[idx] = String.format("%.2f - %.2f (~%.2f)", left, right, centroids[idx]);
00127 }
00128 return ret;
00129 }
00130 }
00131 }
00132