15 from sys
import argv, exit
18 def err(line_no, msg):
19 print(
"line {0}: {1}".format(line_no, msg))
23 if x.lower().find(
"nan") != -1
or x.lower().find(
"inf") != -1:
30 print(
"Usage: {0} dataset".format(argv[0]))
35 if not os.path.exists(dataset):
36 print(
"dataset {0} not found".format(dataset))
41 for line
in open(dataset,
'r'): 46 err(line_no,
"missing a newline character in the end")
55 if label.find(
',') != -1:
58 for l
in label.split(
','):
61 err(line_no,
"label {0} is not a valid multi-label form".format(label))
67 err(line_no,
"label {0} is not a number".format(label))
70 err(line_no,
"missing label, perhaps an empty line?")
75 for i
in range(len(nodes)):
77 (index, value) = nodes[i].split(
':')
85 err(line_no,
"feature index must be positive; wrong feature {0}".format(nodes[i]))
87 elif index <= prev_index:
88 err(line_no,
"feature indices must be in an ascending order, previous/current features {0} {1}".format(nodes[i-1], nodes[i]))
92 err(line_no,
"feature '{0}' not an <index>:<value> pair, <index> integer, <value> real number ".format(nodes[i]))
100 if error_line_count > 0:
101 print(
"Found {0} lines with error.".format(error_line_count))
107 if __name__ ==
"__main__":