OmniSciDB  72c90bc290
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
report.py
Go to the documentation of this file.
1 # Copyright 2023 HEAVY.AI, Inc.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 # http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14 
15 # Connect to conbench PostgreSQL database and print a summary report to STDOUT
16 # based on a given GIT_COMMIT.
17 # Requires environment variables (see os.environ calls).
18 # Exit code is 0, 1, 2 for SUCCESS, UNSTABLE, or FAILURE Jenkins stage status.
19 
20 import os
21 import psycopg2
22 import re
23 
24 # Parameterize input and output ENV values
25 
26 # Editable settings
27 params = {'sha': os.environ['GIT_COMMIT'], # commit to run report on
28  'host': os.environ['BENCH_HOST'], # hostname on which to compare statistics
29  'n': 17} # number of prior master benchmarks to compare against
30 
31 benchmark_detail_url = '{}/benchmarks/{{}}/'.format(os.environ['CONBENCH_URL']) # param: benchmark_id
32 
33 # Highlight without warning any z_scores that fall above 99%.
34 ZSCORE_CHECK = 2.5758293035489 # statistics.NormalDist().inv_cdf(0.5 + 0.5 * 0.99)
35 # Warning when any z_score falls above central 99.9% range.
36 ZSCORE_WARNING = 3.2905267314919255 # statistics.NormalDist().inv_cdf(0.5 + 0.5 * 0.999)
37 # Error when any benchmark falls above 4 sigma.
38 ZSCORE_ERROR = 4.0
39 
40 # Connect to PostgreSQL DB and open a cursor.
41 conn = psycopg2.connect(os.environ['CONBENCH_DSN'])
42 cur = conn.cursor()
43 
44 # Get single-row info for the overall benchmark run on this commit.
45 cur.execute("""select co.author_avatar, co.author_name, co.branch, co.message, co.repository -- 1 row
46  from run r
47  join "commit" co on co.id=r.commit_id
48  join hardware h on h.id=r.hardware_id
49  where h.name=%(host)s and co.sha=%(sha)s
50  order by r."timestamp" desc
51  limit 1
52 """, params)
53 
54 # In python 3 this can/should be replaced w/ import html + html.escape().
55 def html_escape(s):
56  return s.replace('&','&amp;').replace('<','&lt;').replace('>','&gt;')
57 
59  row = cur.fetchone()
60  return { 'author_avatar': row[0],
61  'author_name': html_escape(row[1]),
62  'branch': html_escape(row[2]),
63  'commit_message': html_escape(row[3]),
64  'commit_url': '{}/commit/{}'.format(row[4], params['sha']),
65  'short_sha': params['sha'][0:8],
66  'host': params['host'] }
67 
68 html_params = get_commit_results(cur)
69 
70 # Get full report for all benchmark tests on this commit.
71 # The 1234567.89 is a sentinel value designating a test that failed to run
72 # that is translated to NULL in the SQL, and None in python.
73 cur.execute("""with main_run as ( -- 1 row
74  select r.id, r."timestamp"
75  from run r
76  join "commit" co on co.id=r.commit_id
77  join hardware h on h.id=r.hardware_id
78  where h.name=%(host)s and co.sha=%(sha)s
79  order by r."timestamp" desc
80  limit 1
81 ), main_stats as ( -- |b| rows. |b| = number of benchmarks (TPC-DS has 99)
82  select br.id as benchmark_result_id, ca.name, ca.tags->>'benchmark_name' as benchmark_name,
83  nullif(br.mean, 1234567.89) as mean
84  from main_run mr
85  join benchmark_result br on br.run_id=mr.id
86  join "case" ca on ca.id=br.case_id
87 ), prior_runs as ( -- n most recent master runs. n is in the limit clause
88  select r.id
89  from main_run mr
90  join run r on r."timestamp" < mr."timestamp"
91  join "commit" co on co.id=r.commit_id
92  join hardware h on h.id=r.hardware_id
93  where h.name=%(host)s and co.branch='heavyai:master'
94  order by r."timestamp" desc
95  limit %(n)s
96 ), prior_benchmarks as ( -- n*|b| rows
97  select ca.name, ca.tags->>'benchmark_name' as benchmark_name, nullif(br.mean, 1234567.89) as mean
98  from prior_runs pr
99  join benchmark_result br on br.run_id=pr.id
100  join "case" ca on ca.id=br.case_id
101 ), prior_stats as ( -- |b| rows
102  select pb.name, pb.benchmark_name, avg(pb.mean) as avg, stddev_samp(pb.mean) as stddev
103  from prior_benchmarks pb
104  group by pb.name, pb.benchmark_name
105 )
106 select ms.benchmark_result_id as benchmark_id, ms.name, ms.benchmark_name, ms.mean as duration_ms, -- |b| rows
107  ps.avg, ps.stddev, (ms.mean - ps.avg) / ps.stddev as z_score
108 from main_stats ms
109 join prior_stats ps on ps.name=ms.name and ps.benchmark_name=ms.benchmark_name
110 order by ms.name, ms.benchmark_name
111 """, params)
112 
113 header = [desc[0] for desc in cur.description]
114 rows = cur.fetchall()
115 # A query is considered "fixed" if it was previously broken, or its z_score significantly improved.
116 stats = { 'worst_z_score': 0.0, 'nfixes': 0, 'nchecks': 0, 'nwarnings': 0, 'nerrors': 0 }
117 
118 # Class to format and gather statistics on each row from database.
119 class Row:
120  def __init__(self, header, row):
121  self.header = self.fixup_header(header)
122  self.row = self.fixup_row(row) if row != None else self.fixup_header(header)
123  self.duration_idx = self.header.index('duration_ms')
124  self.avg_idx = self.header.index('avg')
125  self.z_score_idx = self.header.index('z_score')
126 
127  # Conditionally format report cells and accumulate stats on z_scores.
128  def cell(self, tag, idx, value):
129  if type(value) is str:
130  return '<{0}>{1}</{0}>'.format(tag, value)
131  else:
132  if self.z_score_idx == idx:
133  if value == None:
134  if self.row[self.avg_idx] == None:
135  return '<{0}>{1}</{0}>'.format(tag, value)
136  else:
137  stats['nerrors'] += 1
138  return '<{0} class="error">{1}</{0}>'.format(tag, value)
139  if stats['worst_z_score'] < value:
140  stats['worst_z_score'] = value
141  if value <= -ZSCORE_ERROR:
142  stats['nfixes'] += 1
143  return '<{0} class="fixed">{1:0.3f}</{0}>'.format(tag, value)
144  elif value < ZSCORE_CHECK:
145  return '<{0}>{1:0.3f}</{0}>'.format(tag, value)
146  elif value < ZSCORE_WARNING:
147  stats['nchecks'] += 1
148  return '<{0} class="check">{1:0.3f}</{0}>'.format(tag, value)
149  elif value < ZSCORE_ERROR:
150  stats['nwarnings'] += 1
151  return '<{0} class="warning">{1:0.3f}</{0}>'.format(tag, value)
152  else:
153  stats['nerrors'] += 1
154  return '<{0} class="error">{1:0.3f}</{0}>'.format(tag, value)
155  else:
156  if value == None:
157  return '<{0}>{1}</{0}>'.format(tag, value)
158  else:
159  if self.duration_idx == idx and self.row[self.avg_idx] == None:
160  stats['nfixes'] += 1
161  return '<{0} class="fixed">{1:0.3f}</{0}>'.format(tag, value)
162  else:
163  return '<{0}>{1:0.3f}</{0}>'.format(tag, value)
164 
165  # Omit columns that are combined with others in the final report.
166  def fixup_header(self, header):
167  fixup = header[:] # copy, not reference
168  fixup.remove('benchmark_id') # Used in benchmark_detail_url
169  fixup.remove('benchmark_name') # Column "name" is based on both "name" and "benchmark_name".
170  return fixup
171 
172  # Omit columns that are combined with others in the final report.
173  def fixup_row(self, row_tuple):
174  row = list(row_tuple)
175  benchmark_id = header.index('benchmark_id')
176  name = header.index('name')
177  benchmark_name = header.index('benchmark_name')
178  # Combine name and benchmark_name together into a single column.
179  if row[name] == 'StringDictionaryBenchmark':
180  p = re.compile('\w+/(\w+)')
181  md = p.search(row[benchmark_name])
182  row[name] = 'StringDictionary {}'.format(md.group(1))
183  else:
184  row[name] = '{} {}'.format(row[name], row[benchmark_name])
185  # Hyperlink benchmark name to the conbench page for the specific benchmark.
186  row[name] = '<a href="{}">{}</a>'.format(benchmark_detail_url.format(row[benchmark_id]), row[name])
187  assert(benchmark_id < benchmark_name) # Must match column removals in fixup_header().
188  del row[benchmark_name]
189  del row[benchmark_id]
190  return row
191 
192  # Return the html table row (tr).
193  def tr(self, tag):
194  zscore_idx = len(self.row) - 1
195  return '<tr>{}</tr>'.format(''.join([self.cell(tag,idx,value) for idx, value in enumerate(self.row)]))
196 
198  return "\n".join([
199  '<tr><td>Fixed Tests</td><td class="fixed">{nfixes}</td></tr>',
200  '<tr><td>Check Tests</td><td class="check">{nchecks}</td></tr>',
201  '<tr><td>Warnings</td><td class="warning">{nwarnings}</td></tr>',
202  '<tr><td>Errors</td><td class="error">{nerrors}</td></tr>',
203  '<tr><td>Worst z_score</td><td>{worst_z_score:0.3f}</td></tr>'
204  ]).format(**stats)
205 
206 # Set html params
207 html_params['header_row'] = Row(header, None).tr('th')
208 html_params['body_rows'] = "\n".join([Row(header, row).tr('td') for row in rows])
209 html_params['summary_body_rows'] = summary_body_rows()
210 
211 # Print html report
212 print("""<!DOCTYPE html>
213 <html>
214 <head>
215  <title>Benchmarks for {branch} / {short_sha} on {host}</title>
216  <style>
217 body {{ font-family: sans-serif }}
218 table {{ border-collapse: collapse }}
219 th {{ text-align: right; padding-right: 1em }}
220 td {{ font-family: monospace; text-align: right; padding-right: 1em }}
221 td.fixed {{ background-color: LightGreen }}
222 td.check {{ background-color: Khaki }}
223 td.warning {{ background-color: Yellow }}
224 td.error {{ background-color: Red }}
225 tr:nth-child(even) {{ background-color: LightCyan }}
226  </style>
227 </head>
228 <body>
229 <h1>Benchmarks for {branch} / <a href="{commit_url}">{short_sha}</a> on {host}</h1>
230 <p><a href="{commit_url}">{commit_message}</a></p>
231 <!-- img disabled due to strict Content Security Policy for HTML Publisher Jenkins plugin -->
232 <p><!--img alt="avatar" src="{author_avatar}" height=25-->{author_name}</p>
233 <table>
234 {summary_body_rows}
235 </table>
236 <table>
237 {header_row}
238 {body_rows}
239 </table>
240 </body>
241 </html>
242 """.format(**html_params))
243 
245  if stats['nerrors']:
246  return 2
247  elif stats['nwarnings']:
248  return 1
249  else:
250  return 0
251 
252 exit(error_code())
def tr
Definition: report.py:193
def cell
Definition: report.py:128
std::string join(T const &container, std::string const &delim)
def get_commit_results
Definition: report.py:58
def summary_body_rows
Definition: report.py:197
def html_escape
Definition: report.py:55
duration_idx
Definition: report.py:123
def __init__
Definition: report.py:120
def fixup_row
Definition: report.py:173
def error_code
Definition: report.py:244
def fixup_header
Definition: report.py:166