OmniSciDB  c07336695a
analyze-benchmark.py
Go to the documentation of this file.
1 import json
2 import sys
3 from os import listdir
4 import os.path
5 import getopt
6 
7 # loads a single benchmark json at a time, and can access its data
8 
9 
11  def __init__(self, dir_name, filename_list):
12  self.dir_name = dir_name
13  self.filename_list = filename_list
14  self.data = []
15 
16  # reading the benchmark json file
17  def load(self, bench_filename):
18  assert bench_filename in self.filename_list
19 
20  with open(self.dir_name + bench_filename) as json_file:
21  self.data = sorted(
22  json.load(json_file),
23  key=lambda experiment: experiment["results"]["query_id"],
24  )
25 
26  def getFrontAttribute(self, attribute):
27  if self.data:
28  return self.data[0]["results"][attribute]
29  else:
30  return "None"
31 
33  return self.getFrontAttribute("run_label")
34 
35  def getGpuName(self):
36  return self.getFrontAttribute("run_gpu_name")
37 
38  def getRunTableName(self):
39  return self.getFrontAttribute("run_table")
40 
41  # return a list of the attribute, from queries in query_names, stored in self.data
42  def fetchAttribute(self, attribute, query_names):
43  result = []
44  for query in query_names:
45  for experiment in self.data:
46  assert attribute in experiment["results"], (
47  attribute + " is not a valid attribute."
48  )
49  if query == experiment["results"]["query_id"]:
50  result.append(experiment["results"][attribute])
51  break
52  return result
53 
54  def fetchQueryNames(self):
55  result = []
56  for experiment in self.data:
57  result.append(experiment["results"]["query_id"])
58  return result
59 
60 
62  def __init__(self, ref, sample, attribute):
63  assert isinstance(ref, BenchmarkLoader)
64  assert isinstance(sample, BenchmarkLoader)
65  self.__header_info = [ref.getRunTableName(), attribute]
66  self.__label_name_ref = ref.fetchQueryNames()
67  self.__label_name_sample = sample.fetchQueryNames()
71  assert self.__label_name_ref == self.__label_name_sample
72  self.__attribute_ref = ref.fetchAttribute(
73  attribute, self.__label_name_ref
74  )
75  self.__attribute_sample = sample.fetchAttribute(
76  attribute, self.__label_name_sample
77  )
78 
79  # collects all those queries that does not exist in both of the results
81  for query in self.__label_name_ref:
82  if query not in self.__label_name_sample:
84  self.__label_name_ref.remove(query)
85  for query in self.__label_name_sample:
86  if query not in self.__label_name_ref:
87  self.__missing_queries_ref.append(query)
88  self.__label_name_sample.remove(query)
89 
90  def printHeader(self):
91  for h in self.__header_info:
92  print(" " + h, end="")
93 
94  def findAnomaliesRatio(self, epsilon):
95  found = False
96  speedup = compute_speedup(
98  )
99  print("Differences outside of %2.0f%%: " % (epsilon * 100), end="")
100  self.printHeader()
101  for i in range(len(speedup)):
102  if abs(speedup[i] - 1.0) > epsilon:
103  if found == False:
104  found = True
105  print(
106  "\n%s: reference = %.2f ms, sample = %.2f ms, speedup = %.2fx"
107  % (
108  self.__label_name_ref[i],
109  self.__attribute_ref[i],
110  self.__attribute_sample[i],
111  speedup[i],
112  ),
113  end="",
114  )
115  if found == False:
116  print(": None", end="")
117  if self.__missing_queries_ref:
118  print("\n*** Missing queries from reference: ", end="")
119  for query in self.__missing_queries_ref:
120  print(query + " ", end="")
121  if self.__missing_queries_sample:
122  print("\n*** Missing queries from sample: ", end="")
123  for query in self.__missing_queries_sample:
124  print(query + " ", end="")
125  print(
126  "\n======================================================================="
127  )
128 
129 
130 def compute_speedup(x, y):
131  result = []
132  zipped = list(zip(x, y))
133  for q in zipped:
134  result.append(q[0] / q[1])
135  return result
136 
137 
139  def __init__(self, ref, sample, attribute, num_items_per_line=5):
140  assert isinstance(ref, BenchmarkLoader)
141  assert isinstance(sample, BenchmarkLoader)
142  self.__header_info = [
143  ref.getRunTableName(),
144  attribute,
145  ref.getGpuName(),
146  ]
147  self.__num_items_per_line = num_items_per_line
148  self.__label_name_ref = ref.fetchQueryNames()
149  self.__label_name_sample = sample.fetchQueryNames()
152  self.collectMissingQueries()
153  assert self.__label_name_ref == self.__label_name_sample
154  self.__attribute_ref = ref.fetchAttribute(
155  attribute, self.__label_name_ref
156  )
157  self.__attribute_sample = sample.fetchAttribute(
158  attribute, self.__label_name_sample
159  )
162 
163  # collects all those queries that does not exist in both of the results
165  for query in self.__label_name_ref:
166  if query not in self.__label_name_sample:
167  self.__missing_queries_sample.append(query)
168  self.__label_name_ref.remove(query)
169  for query in self.__label_name_sample:
170  if query not in self.__label_name_ref:
171  self.__missing_queries_ref.append(query)
172  self.__label_name_sample.remove(query)
173 
174  def printSolidLine(self, pattern):
175  for i in range(self.__num_items_per_line + 1):
176  for j in range(11):
177  print(pattern, end="")
178  print("")
179 
180  def printHeader(self):
181  for h in self.__header_info:
182  print("\t" + h)
183  self.printSolidLine("=")
184 
186  return self.__ref_line_count * self.__num_items_per_line
187 
188  def printLine(self, array):
189  begin = self.getRefElementsPerLine()
190  end = self.getRefElementsPerLine() + self.__num_items_per_line
191  for i in range(begin, min(end, len(self.__attribute_ref))):
192  if isinstance(array[i], float):
193  print("%10.2f" % (array[i]), end="")
194  elif isinstance(array[i], str):
195  print("%10s" % (array[i]), end="")
196  else:
197  assert False
198  print("")
199 
200  def printAttribute(self):
201  self.printHeader()
202  ref_count = len(self.__attribute_ref)
203  while self.getRefElementsPerLine() < ref_count:
204  print("%10s" % "Queries", end="")
205  self.printLine(self.__label_name_ref)
206  self.printSolidLine("-")
207  print("%10s" % "Reference", end="")
208  self.printLine(self.__attribute_ref)
209  print("%10s" % "Sample", end="")
210  self.printLine(self.__attribute_sample)
211  print("%10s" % "Speedup", end="")
212  self.printLine(
214  )
215  self.printSolidLine("=")
216  self.__ref_line_count += 1
217  print("\n\n\n")
218 
219 
220 def main(argv):
221  try:
222  opts, args = getopt.getopt(
223  argv,
224  "hs:r:e:a:p",
225  [
226  "help",
227  "sample=",
228  "reference=",
229  "epsilon=",
230  "attribute=",
231  "print",
232  ],
233  )
234  except getopt.GetOptError:
235  print(
236  "python3 analyze-benchmark.py -s <sample dir> -r <reference dir> -e <epsilon> -a <attribute> -p"
237  )
238  sys.exit(2)
239 
240  dir_artifact_sample = ""
241  dir_artifact_ref = ""
242  epsilon = 0.05
243  query_attribute = (
244  "query_total_avg"
245  ) # default attribute to use for benchmark comparison
246 
247  to_print = False # printing all the results, disabled by default
248 
249  for opt, arg in opts:
250  if opt in ("-h", "--help"):
251  print(
252  """
253  -s/--sample:\t\t\t directory of the results for the benchmarked sample branch
254  -r/--reference:\t\t\t directory of the results for the benchmarked reference branch
255  -e/--epsilon:\t\t\t ratio tolerance for reporting results outside this range
256  -a/--attribute:\t\t\t attribute to be used for benchmark comparison (default: query_total_avg)
257  -p/--print:\t\t\t\t print all the results
258  """
259  )
260  sys.exit()
261  else:
262  if opt in ("-s", "--sample"):
263  dir_artifact_sample = arg
264  assert os.path.isdir(dir_artifact_sample)
265  elif opt in ("-r", "--reference"):
266  dir_artifact_ref = arg
267  assert os.path.isdir(dir_artifact_ref)
268  elif opt in ("-e", "--epsilon"):
269  epsilon = float(arg)
270  elif opt in ("-a", "--attribute"):
271  query_attribute = arg
272  elif opt in ("-p", "--print"):
273  to_print = True
274 
275  assert dir_artifact_ref is not ""
276  assert dir_artifact_sample is not ""
277  assert epsilon <= 1
278 
279  GPU_list_ref = listdir(dir_artifact_ref)
280  GPU_list_sample = listdir(dir_artifact_sample)
281 
282  for gpu in GPU_list_ref:
283  dir_name_ref = dir_artifact_ref + "/" + gpu + "/Benchmarks"
284  filename_list_ref = listdir(dir_name_ref)
285  dir_name_ref += "/"
286 
287  refBench = BenchmarkLoader(dir_name_ref, filename_list_ref)
288 
289  if gpu in GPU_list_sample:
290  dir_name_sample = dir_artifact_sample + "/" + gpu + "/Benchmarks"
291  filename_list_sample = listdir(dir_name_sample)
292  dir_name_sample += "/"
293 
294  sampleBench = BenchmarkLoader(
295  dir_name_sample, filename_list_sample
296  )
297  first_header = True
298  for index in range(len(filename_list_ref)):
299  refBench.load(filename_list_ref[index])
300  if filename_list_ref[index] in filename_list_sample:
301  sampleBench.load(filename_list_ref[index])
302  if first_header:
303  print(
304  "\n+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"
305  )
306  print("++++ " + sampleBench.getGpuName())
307  print(
308  "++++ reference("
309  + refBench.getFrontAttribute("run_label")
310  + "): "
311  + refBench.getFrontAttribute("run_version")
312  )
313  print(
314  "++++ sample("
315  + sampleBench.getFrontAttribute("run_label")
316  + "): "
317  + sampleBench.getFrontAttribute("run_version")
318  )
319  print(
320  "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n"
321  )
322  first_header = False
323 
324  analyzer = BenchAnalyzer(
325  refBench, sampleBench, query_attribute
326  )
327  analyzer.findAnomaliesRatio(epsilon)
328  if to_print:
329  printer = PrettyPrint(
330  refBench, sampleBench, query_attribute
331  )
332  printer.printAttribute()
333  else:
334  print(
335  "No sample results for table "
336  + refBench.getRunTableName()
337  + " were found."
338  )
339  print(
340  "======================================================================="
341  )
342 
343  else:
344  print(
345  "\n+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"
346  )
347  print("++++ No sample results for GPU " + gpu + " were found.")
348  print(
349  "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n"
350  )
351 
352 
353 if __name__ == "__main__":
354  main(sys.argv[1:])
def __init__(self, ref, sample, attribute)
def __init__(self, dir_name, filename_list)
def findAnomaliesRatio(self, epsilon)
size_t append(FILE *f, const size_t size, int8_t *buf)
Appends the specified number of bytes to the end of the file f from buf.
Definition: File.cpp:136
def getFrontAttribute(self, attribute)
def __init__(self, ref, sample, attribute, num_items_per_line=5)
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:83
def load(self, bench_filename)
def fetchAttribute(self, attribute, query_names)