data_process.py
Go to the documentation of this file.
1 '''
2 ROS Anomaly Detector Framework
3 
4 Author:
5  Vedanth Narayanan
6 File:
7  Data Process class
8 Date:
9  4 May, 2018
10 '''
11 
12 import os
13 from sklearn.preprocessing import MinMaxScaler
14 from sklearn.decomposition import PCA
15 from sklearn.utils import shuffle
16 import numpy as np
17 from sklearn.externals import joblib
18 import pandas as pd
19 
20 
21 class DataProcess(object):
22  """
23  Specifically for processing data and getting it ready
24  for anomaly detection.
25  """
26 
27  def __init__(self, comps):
28  self.__scale = MinMaxScaler(feature_range=(0, 1))
29  self.__reduction_size = comps
30 
31  if comps > 0:
32  self.__reduction = PCA(n_components=comps)
33  else:
34  self.__reduction = None
35 
36 
37  def split(self, in_file, x_cols, y_cols):
38  ''' Read csv, to dataframe, split to x and y. '''
39 
40 
41  dataframe = pd.read_csv(in_file, header=None, engine='python')
42  dataset = dataframe.values
43  data = shuffle(dataset)
44 
45  # Gets rid of redundant entries, more accuracte
46  data = np.unique(data, axis=0)
47 
48  dataframe_x = data[:, x_cols]
49  dataframe_y = data[:, y_cols]
50 
51  return dataframe_x, dataframe_y
52 
53 
54  def scaler_fit(self, data):
55  ''' Fit scaling object '''
56  # NOTE: If model exists, load. or fit and save
57 
58  # print os.getcwd()
59  saveloc = '../data/scale.pkl'
60  if os.path.exists(saveloc):
61  self.__scale = joblib.load(saveloc)
62  data = self.__scale.transform(data)
63  else:
64  data = self.__scale.fit_transform(data)
65  joblib.dump(self.__scale, saveloc)
66 
67  return data
68 
69 
70  def scaler_transform(self, data):
71  ''' Data supplied is scaled and returned. '''
72  transformed_data = self.__scale.transform(data)
73  return transformed_data
74 
75 
76  def reduction_fit(self, data):
77  ''' Fit PCA decomposition. '''
78  # NOTE: If model exists, load. or fit and save
79 
80  if self.__reduction:
81  saveloc = '../data/reduce.pkl'
82  if os.path.exists(saveloc):
83  self.__reduction = joblib.load(saveloc)
84  data = self.__reduction.transform(data)
85  else:
86  data = self.__reduction.fit(data)
87  joblib.dump(self.__reduction, saveloc)
88 
89  return data
90 
91 
92  def reduction_transform(self, data):
93  ''' Reduce dimensionality of given data. '''
94 
95  if self.__reduction:
96  transformed_data = self.__reduction.transform(data)
97  return transformed_data
98  return data
99 
100 
102  ''' Return number of components passed to PCA'''
103 
104  return self.__reduction_size
def split(self, in_file, x_cols, y_cols)
Definition: data_process.py:37


mh5_anomaly_detector
Author(s): Vedanth Narayanan
autogenerated on Mon Jun 10 2019 13:49:20