OmniSciDB  1dac507f6e
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
run_benchmark Namespace Reference

Functions

def verify_destinations
 
def get_connection
 
def get_run_vars
 
def get_gpu_info
 
def get_machine_info
 
def read_query_files
 
def validate_query_file
 
def execute_query
 
def calculate_query_times
 
def get_mem_usage
 
def run_query
 
def json_format_handler
 
def create_results_dataset
 
def send_results_db
 
def send_results_file_json
 
def send_results_jenkins_bench
 
def send_results_output
 
def process_arguments
 
def benchmark
 

Function Documentation

def run_benchmark.benchmark (   input_arguments)

Definition at line 1338 of file run_benchmark.py.

References create_results_dataset(), get_connection(), get_gpu_info(), get_machine_info(), get_run_vars(), process_arguments(), read_query_files(), run_query(), send_results_db(), send_results_file_json(), send_results_jenkins_bench(), send_results_output(), and verify_destinations().

1339 def benchmark(input_arguments):
1340  # Set input args to vars
1341  args = process_arguments(input_arguments)
1342  verbose = args.verbose
1343  quiet = args.quiet
1344  source_db_user = args.user
1345  source_db_passwd = args.passwd
1346  source_db_server = args.server
1347  source_db_port = args.port
1348  source_db_name = args.name
1349  source_table = args.table
1350  label = args.label
1351  queries_dir = args.queries_dir
1352  iterations = args.iterations
1353  gpu_count = args.gpu_count
1354  gpu_name = args.gpu_name
1355  no_gather_conn_gpu_info = args.no_gather_conn_gpu_info
1356  no_gather_nvml_gpu_info = args.no_gather_nvml_gpu_info
1357  gather_nvml_gpu_info = args.gather_nvml_gpu_info
1358  machine_name = args.machine_name
1359  machine_uname = args.machine_uname
1360  destinations = args.destination
1361  dest_db_user = args.dest_user
1362  dest_db_passwd = args.dest_passwd
1363  dest_db_server = args.dest_server
1364  dest_db_port = args.dest_port
1365  dest_db_name = args.dest_name
1366  dest_table = args.dest_table
1367  dest_table_schema_file = args.dest_table_schema_file
1368  output_file_json = args.output_file_json
1369  output_file_jenkins = args.output_file_jenkins
1370  output_tag_jenkins = args.output_tag_jenkins
1371 
1372  # Hard-coded vars
1373  trim = 0.15
1374  jenkins_thresholds_name = "average"
1375  jenkins_thresholds_field = "query_exec_avg"
1376 
1377  # Set logging output level
1378  if verbose:
1379  logging.basicConfig(level=logging.DEBUG)
1380  elif quiet:
1381  logging.basicConfig(level=logging.WARNING)
1382  else:
1383  logging.basicConfig(level=logging.INFO)
1384 
1385  # Input validation
1386  if (iterations > 1) is not True:
1387  # Need > 1 iteration as first iteration is dropped from calculations
1388  logging.error("Iterations must be greater than 1")
1389  exit(1)
1391  destinations=destinations,
1392  dest_db_server=dest_db_server,
1393  output_file_json=output_file_json,
1394  output_file_jenkins=output_file_jenkins,
1395  ):
1396  logging.debug("Destination(s) have been verified.")
1397  else:
1398  logging.error("No valid destination(s) have been set. Exiting.")
1399  exit(1)
1400 
1401  # Establish connection to mapd db
1402  con = get_connection(
1403  db_user=source_db_user,
1404  db_passwd=source_db_passwd,
1405  db_server=source_db_server,
1406  db_port=source_db_port,
1407  db_name=source_db_name,
1408  )
1409  if not con:
1410  exit(1) # Exit if cannot connect to db
1411  # Set run-specific variables (time, uid, etc.)
1412  run_vars = get_run_vars(con=con)
1413  # Set GPU info depending on availability
1414  gpu_info = get_gpu_info(
1415  gpu_name=gpu_name,
1416  no_gather_conn_gpu_info=no_gather_conn_gpu_info,
1417  con=con,
1418  conn_machine_name=run_vars["conn_machine_name"],
1419  no_gather_nvml_gpu_info=no_gather_nvml_gpu_info,
1420  gather_nvml_gpu_info=gather_nvml_gpu_info,
1421  gpu_count=gpu_count,
1422  )
1423  # Set run machine info
1424  machine_info = get_machine_info(
1425  conn_machine_name=run_vars["conn_machine_name"],
1426  machine_name=machine_name,
1427  machine_uname=machine_uname,
1428  )
1429  # Read queries from files, set to queries dir in PWD if not passed in
1430  if not queries_dir:
1431  queries_dir = os.path.join(os.path.dirname(__file__), "queries")
1432  query_list = read_query_files(
1433  queries_dir=queries_dir, source_table=source_table
1434  )
1435  if not query_list:
1436  exit(1)
1437  # Run queries
1438  queries_results = []
1439  for query in query_list["queries"]:
1440  query_result = run_query(
1441  query=query, iterations=iterations, trim=trim, con=con
1442  )
1443  queries_results.append(query_result)
1444  logging.info("Completed all queries.")
1445  logging.debug("Closing source db connection.")
1446  con.close()
1447  # Generate results dataset
1448  results_dataset = create_results_dataset(
1449  run_guid=run_vars["run_guid"],
1450  run_timestamp=run_vars["run_timestamp"],
1451  run_connection=run_vars["run_connection"],
1452  run_machine_name=machine_info["run_machine_name"],
1453  run_machine_uname=machine_info["run_machine_uname"],
1454  run_driver=run_vars["run_driver"],
1455  run_version=run_vars["run_version"],
1456  run_version_short=run_vars["run_version_short"],
1457  label=label,
1458  source_db_gpu_count=gpu_info["source_db_gpu_count"],
1459  source_db_gpu_driver_ver=gpu_info["source_db_gpu_driver_ver"],
1460  source_db_gpu_name=gpu_info["source_db_gpu_name"],
1461  source_db_gpu_mem=gpu_info["source_db_gpu_mem"],
1462  source_table=source_table,
1463  trim=trim,
1464  iterations=iterations,
1465  query_group=query_list["query_group"],
1466  queries_results=queries_results,
1467  )
1468  results_dataset_json = json.dumps(
1469  results_dataset, default=json_format_handler, indent=2
1470  )
1471  successful_results_dataset = [
1472  x for x in results_dataset if x["succeeded"] is not False
1473  ]
1474  successful_results_dataset_results = []
1475  for results_dataset_entry in successful_results_dataset:
1476  successful_results_dataset_results.append(
1477  results_dataset_entry["results"]
1478  )
1479  # Send results to destination(s)
1480  sent_destination = True
1481  if "mapd_db" in destinations:
1482  if not send_results_db(
1483  results_dataset=successful_results_dataset_results,
1484  table=dest_table,
1485  db_user=dest_db_user,
1486  db_passwd=dest_db_passwd,
1487  db_server=dest_db_server,
1488  db_port=dest_db_port,
1489  db_name=dest_db_name,
1490  table_schema_file=dest_table_schema_file,
1491  ):
1492  sent_destination = False
1493  if "file_json" in destinations:
1494  if not send_results_file_json(
1495  results_dataset_json=results_dataset_json,
1496  output_file_json=output_file_json,
1497  ):
1498  sent_destination = False
1499  if "jenkins_bench" in destinations:
1501  results_dataset=successful_results_dataset_results,
1502  thresholds_name=jenkins_thresholds_name,
1503  thresholds_field=jenkins_thresholds_field,
1504  output_tag_jenkins=output_tag_jenkins,
1505  output_file_jenkins=output_file_jenkins,
1506  ):
1507  sent_destination = False
1508  if "output" in destinations:
1509  if not send_results_output(results_dataset_json=results_dataset_json):
1510  sent_destination = False
1511  if not sent_destination:
1512  logging.error("Sending results to one or more destinations failed")
1513  exit(1)
1514  else:
1515  logging.info(
1516  "Succesfully loaded query results info into destination(s)"
1517  )
1518 
def verify_destinations
def create_results_dataset
def send_results_file_json
def send_results_jenkins_bench

+ Here is the call graph for this function:

def run_benchmark.calculate_query_times (   kwargs)
  Calculates aggregate query times from all iteration times

  Kwargs:
    total_times(list): List of total time calculations
    execution_times(list): List of execution_time calculations
    results_iter_times(list): List of results_iter_time calculations
    connect_times(list): List of connect_time calculations
    trim(float): Amount to trim from iterations set to gather trimmed
                 values. Enter as deciman corresponding to percent to
                 trim - ex: 0.15 to trim 15%.

  Returns:
    query_execution(dict): Query times
    False(bool): The query failed. Exception should be logged.

Definition at line 423 of file run_benchmark.py.

Referenced by create_results_dataset().

424 def calculate_query_times(**kwargs):
425  """
426  Calculates aggregate query times from all iteration times
427 
428  Kwargs:
429  total_times(list): List of total time calculations
430  execution_times(list): List of execution_time calculations
431  results_iter_times(list): List of results_iter_time calculations
432  connect_times(list): List of connect_time calculations
433  trim(float): Amount to trim from iterations set to gather trimmed
434  values. Enter as deciman corresponding to percent to
435  trim - ex: 0.15 to trim 15%.
436 
437  Returns:
438  query_execution(dict): Query times
439  False(bool): The query failed. Exception should be logged.
440  """
441  trim_size = int(kwargs["trim"] * len(kwargs["total_times"]))
442  return {
443  "total_time_avg": round(numpy.mean(kwargs["total_times"]), 1),
444  "total_time_min": round(numpy.min(kwargs["total_times"]), 1),
445  "total_time_max": round(numpy.max(kwargs["total_times"]), 1),
446  "total_time_85": round(numpy.percentile(kwargs["total_times"], 85), 1),
447  "total_time_trimmed_avg": round(
448  numpy.mean(
449  numpy.sort(kwargs["total_times"])[trim_size:-trim_size]
450  ),
451  1,
452  )
453  if trim_size
454  else round(numpy.mean(kwargs["total_times"]), 1),
455  "total_times": kwargs["total_times"],
456  "execution_time_avg": round(numpy.mean(kwargs["execution_times"]), 1),
457  "execution_time_min": round(numpy.min(kwargs["execution_times"]), 1),
458  "execution_time_max": round(numpy.max(kwargs["execution_times"]), 1),
459  "execution_time_85": round(
460  numpy.percentile(kwargs["execution_times"], 85), 1
461  ),
462  "execution_time_25": round(
463  numpy.percentile(kwargs["execution_times"], 25), 1
464  ),
465  "execution_time_std": round(numpy.std(kwargs["execution_times"]), 1),
466  "execution_time_trimmed_avg": round(
467  numpy.mean(
468  numpy.sort(kwargs["execution_times"])[trim_size:-trim_size]
469  )
470  )
471  if trim_size > 0
472  else round(numpy.mean(kwargs["execution_times"]), 1),
473  "execution_time_trimmed_max": round(
474  numpy.max(
475  numpy.sort(kwargs["execution_times"])[trim_size:-trim_size]
476  )
477  )
478  if trim_size > 0
479  else round(numpy.max(kwargs["execution_times"]), 1),
480  "execution_times": kwargs["execution_times"],
481  "connect_time_avg": round(numpy.mean(kwargs["connect_times"]), 1),
482  "connect_time_min": round(numpy.min(kwargs["connect_times"]), 1),
483  "connect_time_max": round(numpy.max(kwargs["connect_times"]), 1),
484  "connect_time_85": round(
485  numpy.percentile(kwargs["connect_times"], 85), 1
486  ),
487  "results_iter_time_avg": round(
488  numpy.mean(kwargs["results_iter_times"]), 1
489  ),
490  "results_iter_time_min": round(
491  numpy.min(kwargs["results_iter_times"]), 1
492  ),
493  "results_iter_time_max": round(
494  numpy.max(kwargs["results_iter_times"]), 1
495  ),
496  "results_iter_time_85": round(
497  numpy.percentile(kwargs["results_iter_times"], 85), 1
498  ),
499  }
500 
def calculate_query_times

+ Here is the caller graph for this function:

def run_benchmark.create_results_dataset (   kwargs)
  Create results dataset

  Kwargs:
    run_guid(str): Run GUID
    run_timestamp(datetime): Run timestamp
    run_connection(str): Connection string
    run_machine_name(str): Run machine name
    run_machine_uname(str): Run machine uname
    run_driver(str): Run driver
    run_version(str): Version of DB
    run_version_short(str): Shortened version of DB
    label(str): Run label
    source_db_gpu_count(int): Number of GPUs on run machine
    source_db_gpu_driver_ver(str): GPU driver version
    source_db_gpu_name(str): GPU name
    source_db_gpu_mem(str): Amount of GPU mem on run machine
    source_table(str): Table to run query against
    trim(float): Trim decimal to remove from top and bottom of results
    iterations(int): Number of iterations of each query to run
    query_group(str): Query group, usually matches table name
    query_results(dict):::
        query_name(str): Name of query
        query_mapdql(str): Query to run
        query_id(str): Query ID
        query_succeeded(bool): Query succeeded
        query_error_info(str): Query error info
        result_count(int): Number of results returned
        initial_iteration_results(dict):::
            first_execution_time(float): Execution time for first query
                iteration
            first_connect_time(float):  Connect time for first query
                iteration
            first_results_iter_time(float): Results iteration time for
                first query iteration
            first_total_time(float): Total time for first iteration
            first_cpu_mem_usage(float): CPU memory usage for first query
                iteration
            first_gpu_mem_usage(float): GPU memory usage for first query
                iteration
        noninitial_iteration_results(list):::
            execution_time(float): Time (in ms) that pymapd reports
                backend spent on query.
            connect_time(float): Time (in ms) for overhead of query,
                calculated by subtracting backend execution time from
                time spent on the execution function.
            results_iter_time(float): Time (in ms) it took to for
                pymapd.fetchone() to iterate through all of the results.
            total_time(float): Time (in ms) from adding all above times.
        query_total_elapsed_time(int): Total elapsed time for query

  Returns:
    results_dataset(list):::
        result_dataset(dict): Query results dataset

Definition at line 743 of file run_benchmark.py.

References calculate_query_times().

Referenced by benchmark().

744 def create_results_dataset(**kwargs):
745  """
746  Create results dataset
747 
748  Kwargs:
749  run_guid(str): Run GUID
750  run_timestamp(datetime): Run timestamp
751  run_connection(str): Connection string
752  run_machine_name(str): Run machine name
753  run_machine_uname(str): Run machine uname
754  run_driver(str): Run driver
755  run_version(str): Version of DB
756  run_version_short(str): Shortened version of DB
757  label(str): Run label
758  source_db_gpu_count(int): Number of GPUs on run machine
759  source_db_gpu_driver_ver(str): GPU driver version
760  source_db_gpu_name(str): GPU name
761  source_db_gpu_mem(str): Amount of GPU mem on run machine
762  source_table(str): Table to run query against
763  trim(float): Trim decimal to remove from top and bottom of results
764  iterations(int): Number of iterations of each query to run
765  query_group(str): Query group, usually matches table name
766  query_results(dict):::
767  query_name(str): Name of query
768  query_mapdql(str): Query to run
769  query_id(str): Query ID
770  query_succeeded(bool): Query succeeded
771  query_error_info(str): Query error info
772  result_count(int): Number of results returned
773  initial_iteration_results(dict):::
774  first_execution_time(float): Execution time for first query
775  iteration
776  first_connect_time(float): Connect time for first query
777  iteration
778  first_results_iter_time(float): Results iteration time for
779  first query iteration
780  first_total_time(float): Total time for first iteration
781  first_cpu_mem_usage(float): CPU memory usage for first query
782  iteration
783  first_gpu_mem_usage(float): GPU memory usage for first query
784  iteration
785  noninitial_iteration_results(list):::
786  execution_time(float): Time (in ms) that pymapd reports
787  backend spent on query.
788  connect_time(float): Time (in ms) for overhead of query,
789  calculated by subtracting backend execution time from
790  time spent on the execution function.
791  results_iter_time(float): Time (in ms) it took to for
792  pymapd.fetchone() to iterate through all of the results.
793  total_time(float): Time (in ms) from adding all above times.
794  query_total_elapsed_time(int): Total elapsed time for query
795 
796  Returns:
797  results_dataset(list):::
798  result_dataset(dict): Query results dataset
799  """
800  results_dataset = []
801  for query_results in kwargs["queries_results"]:
802  if query_results["query_succeeded"]:
803  # Aggregate iteration values
804  execution_times, connect_times, results_iter_times, total_times = (
805  [],
806  [],
807  [],
808  [],
809  )
810  for noninitial_result in query_results[
811  "noninitial_iteration_results"
812  ]:
813  execution_times.append(noninitial_result["execution_time"])
814  connect_times.append(noninitial_result["connect_time"])
815  results_iter_times.append(
816  noninitial_result["results_iter_time"]
817  )
818  total_times.append(noninitial_result["total_time"])
819  # Overwrite result count, same for each iteration
820  result_count = noninitial_result["result_count"]
821  # Calculate query times
822  logging.debug(
823  "Calculating times from query " + query_results["query_id"]
824  )
825  query_times = calculate_query_times(
826  total_times=total_times,
827  execution_times=execution_times,
828  connect_times=connect_times,
829  results_iter_times=results_iter_times,
830  trim=kwargs[
831  "trim"
832  ], # Trim top and bottom n% for trimmed calculations
833  )
834  result_dataset = {
835  "name": query_results["query_name"],
836  "mapdql": query_results["query_mapdql"],
837  "succeeded": True,
838  "results": {
839  "run_guid": kwargs["run_guid"],
840  "run_timestamp": kwargs["run_timestamp"],
841  "run_connection": kwargs["run_connection"],
842  "run_machine_name": kwargs["run_machine_name"],
843  "run_machine_uname": kwargs["run_machine_uname"],
844  "run_driver": kwargs["run_driver"],
845  "run_version": kwargs["run_version"],
846  "run_version_short": kwargs["run_version_short"],
847  "run_label": kwargs["label"],
848  "run_gpu_count": kwargs["source_db_gpu_count"],
849  "run_gpu_driver_ver": kwargs["source_db_gpu_driver_ver"],
850  "run_gpu_name": kwargs["source_db_gpu_name"],
851  "run_gpu_mem_mb": kwargs["source_db_gpu_mem"],
852  "run_table": kwargs["source_table"],
853  "query_group": kwargs["query_group"],
854  "query_id": query_results["query_id"],
855  "query_result_set_count": result_count,
856  "query_error_info": query_results["query_error_info"],
857  "query_conn_first": query_results[
858  "initial_iteration_results"
859  ]["first_connect_time"],
860  "query_conn_avg": query_times["connect_time_avg"],
861  "query_conn_min": query_times["connect_time_min"],
862  "query_conn_max": query_times["connect_time_max"],
863  "query_conn_85": query_times["connect_time_85"],
864  "query_exec_first": query_results[
865  "initial_iteration_results"
866  ]["first_execution_time"],
867  "query_exec_avg": query_times["execution_time_avg"],
868  "query_exec_min": query_times["execution_time_min"],
869  "query_exec_max": query_times["execution_time_max"],
870  "query_exec_85": query_times["execution_time_85"],
871  "query_exec_25": query_times["execution_time_25"],
872  "query_exec_stdd": query_times["execution_time_std"],
873  "query_exec_trimmed_avg": query_times[
874  "execution_time_trimmed_avg"
875  ],
876  "query_exec_trimmed_max": query_times[
877  "execution_time_trimmed_max"
878  ],
879  # Render queries not supported yet
880  "query_render_first": None,
881  "query_render_avg": None,
882  "query_render_min": None,
883  "query_render_max": None,
884  "query_render_85": None,
885  "query_render_25": None,
886  "query_render_stdd": None,
887  "query_total_first": query_results[
888  "initial_iteration_results"
889  ]["first_total_time"],
890  "query_total_avg": query_times["total_time_avg"],
891  "query_total_min": query_times["total_time_min"],
892  "query_total_max": query_times["total_time_max"],
893  "query_total_85": query_times["total_time_85"],
894  "query_total_all": query_results[
895  "query_total_elapsed_time"
896  ],
897  "query_total_trimmed_avg": query_times[
898  "total_time_trimmed_avg"
899  ],
900  "results_iter_count": kwargs["iterations"],
901  "results_iter_first": query_results[
902  "initial_iteration_results"
903  ]["first_results_iter_time"],
904  "results_iter_avg": query_times["results_iter_time_avg"],
905  "results_iter_min": query_times["results_iter_time_min"],
906  "results_iter_max": query_times["results_iter_time_max"],
907  "results_iter_85": query_times["results_iter_time_85"],
908  "cpu_mem_usage_mb": query_results[
909  "initial_iteration_results"
910  ]["first_cpu_mem_usage"],
911  "gpu_mem_usage_mb": query_results[
912  "initial_iteration_results"
913  ]["first_gpu_mem_usage"],
914  },
915  "debug": {
916  "query_exec_times": query_times["execution_times"],
917  "query_total_times": query_times["total_times"],
918  },
919  }
920  elif not query_results["query_succeeded"]:
921  result_dataset = {
922  "name": query_results["query_name"],
923  "mapdql": query_results["query_mapdql"],
924  "succeeded": False,
925  }
926  results_dataset.append(result_dataset)
927  logging.debug("All values set for query " + query_results["query_id"])
928  return results_dataset
929 
def create_results_dataset
def calculate_query_times

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

def run_benchmark.execute_query (   kwargs)
  Executes a query against the connected db using pymapd
  https://pymapd.readthedocs.io/en/latest/usage.html#querying

  Kwargs:
    query_name(str): Name of query
    query_mapdql(str): Query to run
    iteration(int): Iteration number
    con(class): Connection class

  Returns:
    query_execution(dict):::
      result_count(int): Number of results returned
      execution_time(float): Time (in ms) that pymapd reports
                             backend spent on query.
      connect_time(float): Time (in ms) for overhead of query, calculated
                           by subtracting backend execution time
                           from time spent on the execution function.
      results_iter_time(float): Time (in ms) it took to for
                                pymapd.fetchone() to iterate through all
                                of the results.
      total_time(float): Time (in ms) from adding all above times.
    False(bool): The query failed. Exception should be logged.

Definition at line 343 of file run_benchmark.py.

Referenced by run_query().

344 def execute_query(**kwargs):
345  """
346  Executes a query against the connected db using pymapd
347  https://pymapd.readthedocs.io/en/latest/usage.html#querying
348 
349  Kwargs:
350  query_name(str): Name of query
351  query_mapdql(str): Query to run
352  iteration(int): Iteration number
353  con(class): Connection class
354 
355  Returns:
356  query_execution(dict):::
357  result_count(int): Number of results returned
358  execution_time(float): Time (in ms) that pymapd reports
359  backend spent on query.
360  connect_time(float): Time (in ms) for overhead of query, calculated
361  by subtracting backend execution time
362  from time spent on the execution function.
363  results_iter_time(float): Time (in ms) it took to for
364  pymapd.fetchone() to iterate through all
365  of the results.
366  total_time(float): Time (in ms) from adding all above times.
367  False(bool): The query failed. Exception should be logged.
368  """
369  start_time = timeit.default_timer()
370  try:
371  # Run the query
372  query_result = kwargs["con"].execute(kwargs["query_mapdql"])
373  logging.debug(
374  "Completed iteration "
375  + str(kwargs["iteration"])
376  + " of query "
377  + kwargs["query_name"]
378  )
379  except (pymapd.exceptions.ProgrammingError, pymapd.exceptions.Error):
380  logging.exception(
381  "Error running query "
382  + kwargs["query_name"]
383  + " during iteration "
384  + str(kwargs["iteration"])
385  )
386  return False
387 
388  # Calculate times
389  query_elapsed_time = (timeit.default_timer() - start_time) * 1000
390  execution_time = query_result._result.execution_time_ms
391  connect_time = round((query_elapsed_time - execution_time), 1)
392  # Iterate through each result from the query
393  logging.debug(
394  "Counting results from query"
395  + kwargs["query_name"]
396  + " iteration "
397  + str(kwargs["iteration"])
398  )
399  result_count = 0
400  start_time = timeit.default_timer()
401  while query_result.fetchone():
402  result_count += 1
403  results_iter_time = round(
404  ((timeit.default_timer() - start_time) * 1000), 1
405  )
406  query_execution = {
407  "result_count": result_count,
408  "execution_time": execution_time,
409  "connect_time": connect_time,
410  "results_iter_time": results_iter_time,
411  "total_time": execution_time + connect_time + results_iter_time,
412  }
413  logging.debug(
414  "Execution results for query"
415  + kwargs["query_name"]
416  + " iteration "
417  + str(kwargs["iteration"])
418  + ": "
419  + str(query_execution)
420  )
421  return query_execution
422 

+ Here is the caller graph for this function:

def run_benchmark.get_connection (   kwargs)
  Connects to the db using pymapd
  https://pymapd.readthedocs.io/en/latest/usage.html#connecting

  Kwargs:
    db_user(str): DB username
    db_passwd(str): DB password
    db_server(str): DB host
    db_port(int): DB port
    db_name(str): DB name

  Returns:
    con(class): Connection class
    False(bool): The connection failed. Exception should be logged.

Definition at line 61 of file run_benchmark.py.

Referenced by run_benchmark_arrow.benchmark(), benchmark(), and send_results_db().

61 
62 def get_connection(**kwargs):
63  """
64  Connects to the db using pymapd
65  https://pymapd.readthedocs.io/en/latest/usage.html#connecting
66 
67  Kwargs:
68  db_user(str): DB username
69  db_passwd(str): DB password
70  db_server(str): DB host
71  db_port(int): DB port
72  db_name(str): DB name
73 
74  Returns:
75  con(class): Connection class
76  False(bool): The connection failed. Exception should be logged.
77  """
78  try:
79  logging.debug("Connecting to mapd db...")
80  con = pymapd.connect(
81  user=kwargs["db_user"],
82  password=kwargs["db_passwd"],
83  host=kwargs["db_server"],
84  port=kwargs["db_port"],
85  dbname=kwargs["db_name"],
86  )
87  logging.info("Succesfully connected to mapd db")
88  return con
89  except (pymapd.exceptions.OperationalError, pymapd.exceptions.Error):
90  logging.exception("Error connecting to database.")
91  return False
92 

+ Here is the caller graph for this function:

def run_benchmark.get_gpu_info (   kwargs)
  Gets run machine GPU info

  Kwargs:
    gpu_name(str): GPU name from input param
    no_gather_conn_gpu_info(bool): Gather GPU info fields
    con(class 'pymapd.connection.Connection'): Mapd connection
    conn_machine_name(str): Name of run machine
    no_gather_nvml_gpu_info(bool): Do not gather GPU info using nvml
    gather_nvml_gpu_info(bool): Gather GPU info using nvml
    gpu_count(int): Number of GPUs on run machine

  Returns:
    gpu_info(dict):::
        conn_gpu_count(int): Number of GPUs gathered from pymapd con
        source_db_gpu_count(int): Number of GPUs on run machine
        source_db_gpu_mem(str): Amount of GPU mem on run machine
        source_db_gpu_driver_ver(str): GPU driver version
        source_db_gpu_name(str): GPU name

Definition at line 134 of file run_benchmark.py.

Referenced by run_benchmark_arrow.benchmark(), and benchmark().

135 def get_gpu_info(**kwargs):
136  """
137  Gets run machine GPU info
138 
139  Kwargs:
140  gpu_name(str): GPU name from input param
141  no_gather_conn_gpu_info(bool): Gather GPU info fields
142  con(class 'pymapd.connection.Connection'): Mapd connection
143  conn_machine_name(str): Name of run machine
144  no_gather_nvml_gpu_info(bool): Do not gather GPU info using nvml
145  gather_nvml_gpu_info(bool): Gather GPU info using nvml
146  gpu_count(int): Number of GPUs on run machine
147 
148  Returns:
149  gpu_info(dict):::
150  conn_gpu_count(int): Number of GPUs gathered from pymapd con
151  source_db_gpu_count(int): Number of GPUs on run machine
152  source_db_gpu_mem(str): Amount of GPU mem on run machine
153  source_db_gpu_driver_ver(str): GPU driver version
154  source_db_gpu_name(str): GPU name
155  """
156  # Set GPU info fields
157  conn_gpu_count = None
158  source_db_gpu_count = None
159  source_db_gpu_mem = None
160  source_db_gpu_driver_ver = ""
161  source_db_gpu_name = ""
162  if kwargs["no_gather_conn_gpu_info"]:
163  logging.debug(
164  "--no-gather-conn-gpu-info passed, "
165  + "using blank values for source database GPU info fields "
166  + "[run_gpu_count, run_gpu_mem_mb] "
167  )
168  else:
169  logging.debug(
170  "Gathering source database GPU info fields "
171  + "[run_gpu_count, run_gpu_mem_mb] "
172  + "using pymapd connection info. "
173  )
174  conn_hardware_info = kwargs["con"]._client.get_hardware_info(
175  kwargs["con"]._session
176  )
177  conn_gpu_count = conn_hardware_info.hardware_info[0].num_gpu_allocated
178  if conn_gpu_count == 0 or conn_gpu_count is None:
179  no_gather_nvml_gpu_info = True
180  if conn_gpu_count == 0:
181  logging.warning(
182  "0 GPUs detected from connection info, "
183  + "using blank values for source database GPU info fields "
184  + "If running against cpu-only server, make sure to set "
185  + "--no-gather-nvml-gpu-info and --no-gather-conn-gpu-info."
186  )
187  else:
188  no_gather_nvml_gpu_info = kwargs["no_gather_nvml_gpu_info"]
189  source_db_gpu_count = conn_gpu_count
190  try:
191  source_db_gpu_mem = int(
192  conn_hardware_info.hardware_info[0].gpu_info[0].memory
193  / 1000000
194  )
195  except IndexError:
196  logging.error("GPU memory info not available from connection.")
197  if no_gather_nvml_gpu_info:
198  logging.debug(
199  "--no-gather-nvml-gpu-info passed, "
200  + "using blank values for source database GPU info fields "
201  + "[gpu_driver_ver, run_gpu_name] "
202  )
203  elif (
204  kwargs["conn_machine_name"] == "localhost"
205  or kwargs["gather_nvml_gpu_info"]
206  ):
207  logging.debug(
208  "Gathering source database GPU info fields "
209  + "[gpu_driver_ver, run_gpu_name] "
210  + "from local GPU using pynvml. "
211  )
212  import pynvml
213 
214  pynvml.nvmlInit()
215  source_db_gpu_driver_ver = pynvml.nvmlSystemGetDriverVersion().decode()
216  for i in range(source_db_gpu_count):
217  handle = pynvml.nvmlDeviceGetHandleByIndex(i)
218  # Assume all cards are the same, overwrite name value
219  source_db_gpu_name = pynvml.nvmlDeviceGetName(handle).decode()
220  pynvml.nvmlShutdown()
221  # If gpu_count argument passed in, override gathered value
222  if kwargs["gpu_count"]:
223  source_db_gpu_count = kwargs["gpu_count"]
224  if kwargs["gpu_name"]:
225  source_db_gpu_name = kwargs["gpu_name"]
226  gpu_info = {
227  "conn_gpu_count": conn_gpu_count,
228  "source_db_gpu_count": source_db_gpu_count,
229  "source_db_gpu_mem": source_db_gpu_mem,
230  "source_db_gpu_driver_ver": source_db_gpu_driver_ver,
231  "source_db_gpu_name": source_db_gpu_name,
232  }
233  return gpu_info
234 

+ Here is the caller graph for this function:

def run_benchmark.get_machine_info (   kwargs)
  Gets run machine GPU info

  Kwargs:
    conn_machine_name(str): Name of machine from pymapd con
    machine_name(str): Name of machine if passed in
    machine_uname(str): Uname of machine if passed in

  Returns:
    machine_info(dict):::
        run_machine_name(str): Run machine name
        run_machine_uname(str): Run machine uname

Definition at line 235 of file run_benchmark.py.

References join().

Referenced by run_benchmark_arrow.benchmark(), and benchmark().

236 def get_machine_info(**kwargs):
237  """
238  Gets run machine GPU info
239 
240  Kwargs:
241  conn_machine_name(str): Name of machine from pymapd con
242  machine_name(str): Name of machine if passed in
243  machine_uname(str): Uname of machine if passed in
244 
245  Returns:
246  machine_info(dict):::
247  run_machine_name(str): Run machine name
248  run_machine_uname(str): Run machine uname
249  """
250  # Set machine names, using local info if connected to localhost
251  if kwargs["conn_machine_name"] == "localhost":
252  local_uname = os.uname()
253  # If --machine-name passed in, override pymapd con value
254  if kwargs["machine_name"]:
255  run_machine_name = kwargs["machine_name"]
256  else:
257  if kwargs["conn_machine_name"] == "localhost":
258  run_machine_name = local_uname.nodename.split(".")[0]
259  else:
260  run_machine_name = kwargs["conn_machine_name"]
261  # If --machine-uname passed in, override pymapd con value
262  if kwargs["machine_uname"]:
263  run_machine_uname = kwargs["machine_uname"]
264  else:
265  if kwargs["conn_machine_name"] == "localhost":
266  run_machine_uname = " ".join(local_uname)
267  else:
268  run_machine_uname = ""
269  machine_info = {
270  "run_machine_name": run_machine_name,
271  "run_machine_uname": run_machine_uname,
272  }
273  return machine_info
274 
std::string join(T const &container, std::string const &delim)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

def run_benchmark.get_mem_usage (   kwargs)
  Calculates memory statistics from mapd_server _client.get_memory call

  Kwargs:
    con(class 'pymapd.connection.Connection'): Mapd connection
    mem_type(str): [gpu, cpu] Type of memory to gather metrics for

  Returns:
    ramusage(dict):::
      usedram(float): Amount of memory (in MB) used
      freeram(float): Amount of memory (in MB) free
      totalallocated(float): Total amount of memory (in MB) allocated
      errormessage(str): Error if returned by get_memory call
      rawdata(list): Raw data returned from get_memory call

Definition at line 501 of file run_benchmark.py.

Referenced by run_benchmark_arrow.run_query(), and run_query().

502 def get_mem_usage(**kwargs):
503  """
504  Calculates memory statistics from mapd_server _client.get_memory call
505 
506  Kwargs:
507  con(class 'pymapd.connection.Connection'): Mapd connection
508  mem_type(str): [gpu, cpu] Type of memory to gather metrics for
509 
510  Returns:
511  ramusage(dict):::
512  usedram(float): Amount of memory (in MB) used
513  freeram(float): Amount of memory (in MB) free
514  totalallocated(float): Total amount of memory (in MB) allocated
515  errormessage(str): Error if returned by get_memory call
516  rawdata(list): Raw data returned from get_memory call
517  """
518  try:
519  con_mem_data_list = kwargs["con"]._client.get_memory(
520  session=kwargs["con"]._session, memory_level=kwargs["mem_type"]
521  )
522  usedram = 0
523  freeram = 0
524  for con_mem_data in con_mem_data_list:
525  page_size = con_mem_data.page_size
526  node_memory_data_list = con_mem_data.node_memory_data
527  for node_memory_data in node_memory_data_list:
528  ram = node_memory_data.num_pages * page_size
529  is_free = node_memory_data.is_free
530  if is_free:
531  freeram += ram
532  else:
533  usedram += ram
534  totalallocated = usedram + freeram
535  if totalallocated > 0:
536  totalallocated = round(totalallocated / 1024 / 1024, 1)
537  usedram = round(usedram / 1024 / 1024, 1)
538  freeram = round(freeram / 1024 / 1024, 1)
539  ramusage = {}
540  ramusage["usedram"] = usedram
541  ramusage["freeram"] = freeram
542  ramusage["totalallocated"] = totalallocated
543  ramusage["errormessage"] = ""
544  except Exception as e:
545  errormessage = "Get memory failed with error: " + str(e)
546  logging.error(errormessage)
547  ramusage["errormessage"] = errormessage
548  return ramusage
549 

+ Here is the caller graph for this function:

def run_benchmark.get_run_vars (   kwargs)
  Gets/sets run-specific vars such as time, uid, etc.

  Kwargs:
    con(class 'pymapd.connection.Connection'): Mapd connection

  Returns:
    run_vars(dict):::
        run_guid(str): Run GUID
        run_timestamp(datetime): Run timestamp
        run_connection(str): Connection string
        run_driver(str): Run driver
        run_version(str): Version of DB
        run_version_short(str): Shortened version of DB
        conn_machine_name(str): Name of run machine

Definition at line 93 of file run_benchmark.py.

Referenced by run_benchmark_arrow.benchmark(), and benchmark().

93 
94 def get_run_vars(**kwargs):
95  """
96  Gets/sets run-specific vars such as time, uid, etc.
97 
98  Kwargs:
99  con(class 'pymapd.connection.Connection'): Mapd connection
100 
101  Returns:
102  run_vars(dict):::
103  run_guid(str): Run GUID
104  run_timestamp(datetime): Run timestamp
105  run_connection(str): Connection string
106  run_driver(str): Run driver
107  run_version(str): Version of DB
108  run_version_short(str): Shortened version of DB
109  conn_machine_name(str): Name of run machine
110  """
111  run_guid = str(uuid.uuid4())
112  logging.debug("Run guid: " + run_guid)
113  run_timestamp = datetime.datetime.now()
114  run_connection = str(kwargs["con"])
115  logging.debug("Connection string: " + run_connection)
116  run_driver = "" # TODO
117  run_version = kwargs["con"]._client.get_version()
118  if "-" in run_version:
119  run_version_short = run_version.split("-")[0]
120  else:
121  run_version_short = run_version
122  conn_machine_name = re.search(r"@(.*?):", run_connection).group(1)
123  run_vars = {
124  "run_guid": run_guid,
125  "run_timestamp": run_timestamp,
126  "run_connection": run_connection,
127  "run_driver": run_driver,
128  "run_version": run_version,
129  "run_version_short": run_version_short,
130  "conn_machine_name": conn_machine_name,
131  }
132  return run_vars
133 

+ Here is the caller graph for this function:

def run_benchmark.json_format_handler (   x)

Definition at line 734 of file run_benchmark.py.

735 def json_format_handler(x):
736  # Function to allow json to deal with datetime and numpy int
737  if isinstance(x, datetime.datetime):
738  return x.isoformat()
739  if isinstance(x, numpy.int64):
740  return int(x)
741  raise TypeError("Unknown type")
742 
def run_benchmark.process_arguments (   input_arguments)

Definition at line 1112 of file run_benchmark.py.

Referenced by benchmark().

1113 def process_arguments(input_arguments):
1114  # Parse input parameters
1115  parser = ArgumentParser()
1116  optional = parser._action_groups.pop()
1117  required = parser.add_argument_group("required arguments")
1118  parser._action_groups.append(optional)
1119  optional.add_argument(
1120  "-v", "--verbose", action="store_true", help="Turn on debug logging"
1121  )
1122  optional.add_argument(
1123  "-q",
1124  "--quiet",
1125  action="store_true",
1126  help="Suppress script outuput " + "(except warnings and errors)",
1127  )
1128  required.add_argument(
1129  "-u",
1130  "--user",
1131  dest="user",
1132  default="mapd",
1133  help="Source database user",
1134  )
1135  required.add_argument(
1136  "-p",
1137  "--passwd",
1138  dest="passwd",
1139  default="HyperInteractive",
1140  help="Source database password",
1141  )
1142  required.add_argument(
1143  "-s",
1144  "--server",
1145  dest="server",
1146  default="localhost",
1147  help="Source database server hostname",
1148  )
1149  optional.add_argument(
1150  "-o",
1151  "--port",
1152  dest="port",
1153  type=int,
1154  default=6274,
1155  help="Source database server port",
1156  )
1157  required.add_argument(
1158  "-n",
1159  "--name",
1160  dest="name",
1161  default="mapd",
1162  help="Source database name",
1163  )
1164  required.add_argument(
1165  "-t",
1166  "--table",
1167  dest="table",
1168  required=True,
1169  help="Source db table name",
1170  )
1171  required.add_argument(
1172  "-l",
1173  "--label",
1174  dest="label",
1175  required=True,
1176  help="Benchmark run label",
1177  )
1178  required.add_argument(
1179  "-d",
1180  "--queries-dir",
1181  dest="queries_dir",
1182  help='Absolute path to dir with query files. \
1183  [Default: "queries" dir in same location as script]',
1184  )
1185  required.add_argument(
1186  "-i",
1187  "--iterations",
1188  dest="iterations",
1189  type=int,
1190  required=True,
1191  help="Number of iterations per query. Must be > 1",
1192  )
1193  optional.add_argument(
1194  "-g",
1195  "--gpu-count",
1196  dest="gpu_count",
1197  type=int,
1198  default=None,
1199  help="Number of GPUs. Not required when gathering local gpu info",
1200  )
1201  optional.add_argument(
1202  "-G",
1203  "--gpu-name",
1204  dest="gpu_name",
1205  type=str,
1206  default="",
1207  help="Name of GPU(s). Not required when gathering local gpu info",
1208  )
1209  optional.add_argument(
1210  "--no-gather-conn-gpu-info",
1211  dest="no_gather_conn_gpu_info",
1212  action="store_true",
1213  help="Do not gather source database GPU info fields "
1214  + "[run_gpu_count, run_gpu_mem_mb] "
1215  + "using pymapd connection info. "
1216  + "Use when testing a CPU-only server.",
1217  )
1218  optional.add_argument(
1219  "--no-gather-nvml-gpu-info",
1220  dest="no_gather_nvml_gpu_info",
1221  action="store_true",
1222  help="Do not gather source database GPU info fields "
1223  + "[gpu_driver_ver, run_gpu_name] "
1224  + "from local GPU using pynvml. "
1225  + 'Defaults to True when source server is not "localhost". '
1226  + "Use when testing a CPU-only server.",
1227  )
1228  optional.add_argument(
1229  "--gather-nvml-gpu-info",
1230  dest="gather_nvml_gpu_info",
1231  action="store_true",
1232  help="Gather source database GPU info fields "
1233  + "[gpu_driver_ver, run_gpu_name] "
1234  + "from local GPU using pynvml. "
1235  + 'Defaults to True when source server is "localhost". '
1236  + "Only use when benchmarking against same machine that this script "
1237  + "is run from.",
1238  )
1239  optional.add_argument(
1240  "-m",
1241  "--machine-name",
1242  dest="machine_name",
1243  help="Name of source machine",
1244  )
1245  optional.add_argument(
1246  "-a",
1247  "--machine-uname",
1248  dest="machine_uname",
1249  help="Uname info from " + "source machine",
1250  )
1251  optional.add_argument(
1252  "-e",
1253  "--destination",
1254  dest="destination",
1255  default="mapd_db",
1256  help="Destination type: [mapd_db, file_json, output, jenkins_bench] "
1257  + "Multiple values can be input seperated by commas, "
1258  + 'ex: "mapd_db,file_json"',
1259  )
1260  optional.add_argument(
1261  "-U",
1262  "--dest-user",
1263  dest="dest_user",
1264  default="mapd",
1265  help="Destination mapd_db database user",
1266  )
1267  optional.add_argument(
1268  "-P",
1269  "--dest-passwd",
1270  dest="dest_passwd",
1271  default="HyperInteractive",
1272  help="Destination mapd_db database password",
1273  )
1274  optional.add_argument(
1275  "-S",
1276  "--dest-server",
1277  dest="dest_server",
1278  help="Destination mapd_db database server hostname"
1279  + ' (required if destination = "mapd_db")',
1280  )
1281  optional.add_argument(
1282  "-O",
1283  "--dest-port",
1284  dest="dest_port",
1285  type=int,
1286  default=6274,
1287  help="Destination mapd_db database server port",
1288  )
1289  optional.add_argument(
1290  "-N",
1291  "--dest-name",
1292  dest="dest_name",
1293  default="mapd",
1294  help="Destination mapd_db database name",
1295  )
1296  optional.add_argument(
1297  "-T",
1298  "--dest-table",
1299  dest="dest_table",
1300  default="results",
1301  help="Destination mapd_db table name",
1302  )
1303  optional.add_argument(
1304  "-C",
1305  "--dest-table-schema-file",
1306  dest="dest_table_schema_file",
1307  default="results_table_schemas/query-results.sql",
1308  help="Destination table schema file. This must be an executable "
1309  + "CREATE TABLE statement that matches the output of this script. It "
1310  + "is required when creating the results table. Default location is "
1311  + 'in "./results_table_schemas/query-results.sql"',
1312  )
1313  optional.add_argument(
1314  "-j",
1315  "--output-file-json",
1316  dest="output_file_json",
1317  help="Absolute path of .json output file "
1318  + '(required if destination = "file_json")',
1319  )
1320  optional.add_argument(
1321  "-J",
1322  "--output-file-jenkins",
1323  dest="output_file_jenkins",
1324  help="Absolute path of jenkins benchmark .json output file "
1325  + '(required if destination = "jenkins_bench")',
1326  )
1327  optional.add_argument(
1328  "-E",
1329  "--output-tag-jenkins",
1330  dest="output_tag_jenkins",
1331  default="",
1332  help="Jenkins benchmark result tag. "
1333  + 'Optional, appended to table name in "group" field',
1334  )
1335  args = parser.parse_args(args=input_arguments)
1336  return args
1337 

+ Here is the caller graph for this function:

def run_benchmark.read_query_files (   kwargs)
  Gets run machine GPU info

  Kwargs:
    queries_dir(str): Directory with query files
    source_table(str): Table to run query against

  Returns:
    query_list(dict):::
        query_group(str): Query group, usually matches table name
        queries(list)
            query(dict):::
                name(str): Name of query
                mapdql(str): Query syntax to run
    False(bool): Unable to find queries dir

Definition at line 275 of file run_benchmark.py.

References File_Namespace.append(), File_Namespace.open(), split(), and validate_query_file().

Referenced by run_benchmark_arrow.benchmark(), and benchmark().

276 def read_query_files(**kwargs):
277  """
278  Gets run machine GPU info
279 
280  Kwargs:
281  queries_dir(str): Directory with query files
282  source_table(str): Table to run query against
283 
284  Returns:
285  query_list(dict):::
286  query_group(str): Query group, usually matches table name
287  queries(list)
288  query(dict):::
289  name(str): Name of query
290  mapdql(str): Query syntax to run
291  False(bool): Unable to find queries dir
292  """
293  # Read query files contents and write to query_list
294  query_list = {"query_group": "", "queries": []}
295  query_group = kwargs["queries_dir"].split("/")[-1]
296  query_list.update(query_group=query_group)
297  logging.debug("Queries dir: " + kwargs["queries_dir"])
298  try:
299  for query_filename in sorted(os.listdir(kwargs["queries_dir"])):
300  logging.debug("Validating query filename: " + query_filename)
301  if validate_query_file(query_filename=query_filename):
302  with open(
303  kwargs["queries_dir"] + "/" + query_filename, "r"
304  ) as query_filepath:
305  logging.debug(
306  "Reading query with filename: " + query_filename
307  )
308  query_mapdql = query_filepath.read().replace("\n", " ")
309  query_mapdql = query_mapdql.replace(
310  "##TAB##", kwargs["source_table"]
311  )
312  query_list["queries"].append(
313  {"name": query_filename, "mapdql": query_mapdql}
314  )
315  logging.info("Read all query files")
316  return query_list
317  except FileNotFoundError:
318  logging.exception("Could not find queries directory.")
319  return False
320 
size_t append(FILE *f, const size_t size, int8_t *buf)
Appends the specified number of bytes to the end of the file f from buf.
Definition: File.cpp:136
std::vector< std::string > split(const std::string &str, const std::string &delim)
split apart a string into a vector of substrings
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:83

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

def run_benchmark.run_query (   kwargs)
  Takes query name, syntax, and iteration count and calls the
    execute_query function for each iteration. Reports total, iteration,
    and exec timings, memory usage, and failure status.

  Kwargs:
    query(dict):::
        name(str): Name of query
        mapdql(str): Query syntax to run
    iterations(int): Number of iterations of each query to run
    trim(float): Trim decimal to remove from top and bottom of results
    con(class 'pymapd.connection.Connection'): Mapd connection

  Returns:
    query_results(dict):::
        query_name(str): Name of query
        query_mapdql(str): Query to run
        query_id(str): Query ID
        query_succeeded(bool): Query succeeded
        query_error_info(str): Query error info
        result_count(int): Number of results returned
        initial_iteration_results(dict):::
            first_execution_time(float): Execution time for first query
                iteration
            first_connect_time(float):  Connect time for first query
                iteration
            first_results_iter_time(float): Results iteration time for
                first query iteration
            first_total_time(float): Total time for first iteration
            first_cpu_mem_usage(float): CPU memory usage for first query
                iteration
            first_gpu_mem_usage(float): GPU memory usage for first query
                iteration
        noninitial_iteration_results(list):::
            execution_time(float): Time (in ms) that pymapd reports
                backend spent on query.
            connect_time(float): Time (in ms) for overhead of query,
                calculated by subtracting backend execution time from
                time spent on the execution function.
            results_iter_time(float): Time (in ms) it took to for
                pymapd.fetchone() to iterate through all of the results.
            total_time(float): Time (in ms) from adding all above times.
        query_total_elapsed_time(int): Total elapsed time for query
    False(bool): The query failed. Exception should be logged.

Definition at line 550 of file run_benchmark.py.

References File_Namespace.append(), execute_query(), and get_mem_usage().

Referenced by benchmark().

551 def run_query(**kwargs):
552  """
553  Takes query name, syntax, and iteration count and calls the
554  execute_query function for each iteration. Reports total, iteration,
555  and exec timings, memory usage, and failure status.
556 
557  Kwargs:
558  query(dict):::
559  name(str): Name of query
560  mapdql(str): Query syntax to run
561  iterations(int): Number of iterations of each query to run
562  trim(float): Trim decimal to remove from top and bottom of results
563  con(class 'pymapd.connection.Connection'): Mapd connection
564 
565  Returns:
566  query_results(dict):::
567  query_name(str): Name of query
568  query_mapdql(str): Query to run
569  query_id(str): Query ID
570  query_succeeded(bool): Query succeeded
571  query_error_info(str): Query error info
572  result_count(int): Number of results returned
573  initial_iteration_results(dict):::
574  first_execution_time(float): Execution time for first query
575  iteration
576  first_connect_time(float): Connect time for first query
577  iteration
578  first_results_iter_time(float): Results iteration time for
579  first query iteration
580  first_total_time(float): Total time for first iteration
581  first_cpu_mem_usage(float): CPU memory usage for first query
582  iteration
583  first_gpu_mem_usage(float): GPU memory usage for first query
584  iteration
585  noninitial_iteration_results(list):::
586  execution_time(float): Time (in ms) that pymapd reports
587  backend spent on query.
588  connect_time(float): Time (in ms) for overhead of query,
589  calculated by subtracting backend execution time from
590  time spent on the execution function.
591  results_iter_time(float): Time (in ms) it took to for
592  pymapd.fetchone() to iterate through all of the results.
593  total_time(float): Time (in ms) from adding all above times.
594  query_total_elapsed_time(int): Total elapsed time for query
595  False(bool): The query failed. Exception should be logged.
596  """
597  logging.info(
598  "Running query: "
599  + kwargs["query"]["name"]
600  + " iterations: "
601  + str(kwargs["iterations"])
602  )
603  query_id = kwargs["query"]["name"].rsplit(".")[
604  0
605  ] # Query ID = filename without extention
606  query_results = {
607  "query_name": kwargs["query"]["name"],
608  "query_mapdql": kwargs["query"]["mapdql"],
609  "query_id": query_id,
610  "query_succeeded": True,
611  "query_error_info": "",
612  "initial_iteration_results": {},
613  "noninitial_iteration_results": [],
614  "query_total_elapsed_time": 0,
615  }
616  query_total_start_time = timeit.default_timer()
617  # Run iterations of query
618  for iteration in range(kwargs["iterations"]):
619  # Gather memory before running query iteration
620  logging.debug("Getting pre-query memory usage on CPU")
621  pre_query_cpu_mem_usage = get_mem_usage(
622  con=kwargs["con"], mem_type="cpu"
623  )
624  logging.debug("Getting pre-query memory usage on GPU")
625  pre_query_gpu_mem_usage = get_mem_usage(
626  con=kwargs["con"], mem_type="gpu"
627  )
628  # Run query iteration
629  logging.debug(
630  "Running iteration "
631  + str(iteration)
632  + " of query "
633  + kwargs["query"]["name"]
634  )
635  query_result = execute_query(
636  query_name=kwargs["query"]["name"],
637  query_mapdql=kwargs["query"]["mapdql"],
638  iteration=iteration,
639  con=kwargs["con"],
640  )
641  # Gather memory after running query iteration
642  logging.debug("Getting post-query memory usage on CPU")
643  post_query_cpu_mem_usage = get_mem_usage(
644  con=kwargs["con"], mem_type="cpu"
645  )
646  logging.debug("Getting post-query memory usage on GPU")
647  post_query_gpu_mem_usage = get_mem_usage(
648  con=kwargs["con"], mem_type="gpu"
649  )
650  # Calculate total (post minus pre) memory usage after query iteration
651  query_cpu_mem_usage = round(
652  post_query_cpu_mem_usage["usedram"]
653  - pre_query_cpu_mem_usage["usedram"],
654  1,
655  )
656  query_gpu_mem_usage = round(
657  post_query_gpu_mem_usage["usedram"]
658  - pre_query_gpu_mem_usage["usedram"],
659  1,
660  )
661  if query_result:
662  query_results.update(
663  query_error_info="" # TODO - interpret query error info
664  )
665  # Assign first query iteration times
666  if iteration == 0:
667  first_execution_time = round(query_result["execution_time"], 1)
668  first_connect_time = round(query_result["connect_time"], 1)
669  first_results_iter_time = round(
670  query_result["results_iter_time"], 1
671  )
672  first_total_time = (
673  first_execution_time
674  + first_connect_time
675  + first_results_iter_time
676  )
677  query_results.update(
678  initial_iteration_results={
679  "first_execution_time": first_execution_time,
680  "first_connect_time": first_connect_time,
681  "first_results_iter_time": first_results_iter_time,
682  "first_total_time": first_total_time,
683  "first_cpu_mem_usage": query_cpu_mem_usage,
684  "first_gpu_mem_usage": query_gpu_mem_usage,
685  }
686  )
687  else:
688  # Put noninitial iterations into query_result list
689  query_results["noninitial_iteration_results"].append(
690  query_result
691  )
692  # Verify no change in memory for noninitial iterations
693  if query_cpu_mem_usage != 0.0:
694  logging.error(
695  (
696  "Noninitial iteration ({0}) of query ({1}) "
697  + "shows non-zero CPU memory usage: {2}"
698  ).format(
699  iteration,
700  kwargs["query"]["name"],
701  query_cpu_mem_usage,
702  )
703  )
704  if query_gpu_mem_usage != 0.0:
705  logging.error(
706  (
707  "Noninitial iteration ({0}) of query ({1}) "
708  + "shows non-zero GPU memory usage: {2}"
709  ).format(
710  iteration,
711  kwargs["query"]["name"],
712  query_gpu_mem_usage,
713  )
714  )
715  else:
716  logging.warning(
717  "Error detected during execution of query: "
718  + kwargs["query"]["name"]
719  + ". This query will be skipped and "
720  + "times will not reported"
721  )
722  query_results.update(query_succeeded=False)
723  break
724  # Calculate time for all iterations to run
725  query_total_elapsed_time = round(
726  ((timeit.default_timer() - query_total_start_time) * 1000), 1
727  )
728  query_results.update(query_total_elapsed_time=query_total_elapsed_time)
729  logging.info(
730  "Completed all iterations of query " + kwargs["query"]["name"]
731  )
732  return query_results
733 
size_t append(FILE *f, const size_t size, int8_t *buf)
Appends the specified number of bytes to the end of the file f from buf.
Definition: File.cpp:136

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

def run_benchmark.send_results_db (   kwargs)
  Send results dataset to a database using pymapd

  Kwargs:
    results_dataset(list):::
        result_dataset(dict): Query results dataset
    table(str): Results destination table name
    db_user(str): Results destination user name
    db_passwd(str): Results destination password
    db_server(str): Results destination server address
    db_port(int): Results destination server port
    db_name(str): Results destination database name
    table_schema_file(str): Path to destination database schema file

  Returns:
    True(bool): Sending results to destination database succeeded
    False(bool): Sending results to destination database failed. Exception
        should be logged.

Definition at line 930 of file run_benchmark.py.

References get_connection(), and File_Namespace.open().

Referenced by run_benchmark_arrow.benchmark(), and benchmark().

931 def send_results_db(**kwargs):
932  """
933  Send results dataset to a database using pymapd
934 
935  Kwargs:
936  results_dataset(list):::
937  result_dataset(dict): Query results dataset
938  table(str): Results destination table name
939  db_user(str): Results destination user name
940  db_passwd(str): Results destination password
941  db_server(str): Results destination server address
942  db_port(int): Results destination server port
943  db_name(str): Results destination database name
944  table_schema_file(str): Path to destination database schema file
945 
946  Returns:
947  True(bool): Sending results to destination database succeeded
948  False(bool): Sending results to destination database failed. Exception
949  should be logged.
950  """
951  # Create dataframe from list of query results
952  logging.debug("Converting results list to pandas dataframe")
953  results_df = DataFrame(kwargs["results_dataset"])
954  # Establish connection to destination db
955  logging.debug("Connecting to destination db")
956  dest_con = get_connection(
957  db_user=kwargs["db_user"],
958  db_passwd=kwargs["db_passwd"],
959  db_server=kwargs["db_server"],
960  db_port=kwargs["db_port"],
961  db_name=kwargs["db_name"],
962  )
963  if not dest_con:
964  logging.exception("Could not connect to destination db.")
965  return False
966  # Load results into db, creating table if it does not exist
967  tables = dest_con.get_tables()
968  if kwargs["table"] not in tables:
969  logging.info("Destination table does not exist. Creating.")
970  try:
971  with open(kwargs["table_schema_file"], "r") as table_schema:
972  logging.debug(
973  "Reading table_schema_file: " + kwargs["table_schema_file"]
974  )
975  create_table_sql = table_schema.read().replace("\n", " ")
976  create_table_sql = create_table_sql.replace(
977  "##TAB##", kwargs["table"]
978  )
979  except FileNotFoundError:
980  logging.exception("Could not find destination table_schema_file.")
981  return False
982  try:
983  logging.debug("Executing create destination table query")
984  dest_con.execute(create_table_sql)
985  logging.debug("Destination table created.")
986  except (pymapd.exceptions.ProgrammingError, pymapd.exceptions.Error):
987  logging.exception("Error running destination table creation")
988  return False
989  logging.info("Loading results into destination db")
990  try:
991  dest_con.load_table_columnar(
992  kwargs["table"],
993  results_df,
994  preserve_index=False,
995  chunk_size_bytes=0,
996  col_names_from_schema=True,
997  )
998  except (pymapd.exceptions.ProgrammingError, pymapd.exceptions.Error):
999  logging.exception("Error loading results into destination db")
1000  dest_con.close()
1001  return False
1002  dest_con.close()
1003  return True
1004 
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:83

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

def run_benchmark.send_results_file_json (   kwargs)
  Send results dataset to a local json file

  Kwargs:
    results_dataset_json(str): Json-formatted query results dataset
    output_file_json (str): Location of .json file output

  Returns:
    True(bool): Sending results to json file succeeded
    False(bool): Sending results to json file failed. Exception
        should be logged.

Definition at line 1005 of file run_benchmark.py.

References File_Namespace.open().

Referenced by run_benchmark_arrow.benchmark(), and benchmark().

1006 def send_results_file_json(**kwargs):
1007  """
1008  Send results dataset to a local json file
1009 
1010  Kwargs:
1011  results_dataset_json(str): Json-formatted query results dataset
1012  output_file_json (str): Location of .json file output
1013 
1014  Returns:
1015  True(bool): Sending results to json file succeeded
1016  False(bool): Sending results to json file failed. Exception
1017  should be logged.
1018  """
1019  try:
1020  logging.debug("Opening json output file for writing")
1021  with open(kwargs["output_file_json"], "w") as file_json_open:
1022  logging.info(
1023  "Writing to output json file: " + kwargs["output_file_json"]
1024  )
1025  file_json_open.write(kwargs["results_dataset_json"])
1026  return True
1027  except IOError:
1028  logging.exception("Error writing results to json output file")
1029  return False
1030 
def send_results_file_json
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:83

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

def run_benchmark.send_results_jenkins_bench (   kwargs)
  Send results dataset to a local json file formatted for use with jenkins
    benchmark plugin: https://github.com/jenkinsci/benchmark-plugin

  Kwargs:
    results_dataset(list):::
        result_dataset(dict): Query results dataset
    thresholds_name(str): Name to use for Jenkins result field
    thresholds_field(str): Field to use for query threshold in jenkins
    output_tag_jenkins(str): Jenkins benchmark result tag, for different
        sets from same table
    output_file_jenkins (str): Location of .json jenkins file output

  Returns:
    True(bool): Sending results to json file succeeded
    False(bool): Sending results to json file failed. Exception
        should be logged.

Definition at line 1031 of file run_benchmark.py.

References File_Namespace.open().

Referenced by run_benchmark_arrow.benchmark(), and benchmark().

1032 def send_results_jenkins_bench(**kwargs):
1033  """
1034  Send results dataset to a local json file formatted for use with jenkins
1035  benchmark plugin: https://github.com/jenkinsci/benchmark-plugin
1036 
1037  Kwargs:
1038  results_dataset(list):::
1039  result_dataset(dict): Query results dataset
1040  thresholds_name(str): Name to use for Jenkins result field
1041  thresholds_field(str): Field to use for query threshold in jenkins
1042  output_tag_jenkins(str): Jenkins benchmark result tag, for different
1043  sets from same table
1044  output_file_jenkins (str): Location of .json jenkins file output
1045 
1046  Returns:
1047  True(bool): Sending results to json file succeeded
1048  False(bool): Sending results to json file failed. Exception
1049  should be logged.
1050  """
1051  jenkins_bench_results = []
1052  for result_dataset in kwargs["results_dataset"]:
1053  logging.debug("Constructing output for jenkins benchmark plugin")
1054  jenkins_bench_results.append(
1055  {
1056  "name": result_dataset["query_id"],
1057  "description": "",
1058  "parameters": [],
1059  "results": [
1060  {
1061  "name": result_dataset["query_id"]
1062  + "_"
1063  + kwargs["thresholds_name"],
1064  "description": "",
1065  "unit": "ms",
1066  "dblValue": result_dataset[kwargs["thresholds_field"]],
1067  }
1068  ],
1069  }
1070  )
1071  jenkins_bench_json = json.dumps(
1072  {
1073  "groups": [
1074  {
1075  "name": result_dataset["run_table"]
1076  + kwargs["output_tag_jenkins"],
1077  "description": "Source table: "
1078  + result_dataset["run_table"],
1079  "tests": jenkins_bench_results,
1080  }
1081  ]
1082  }
1083  )
1084  try:
1085  logging.debug("Opening jenkins_bench json output file for writing")
1086  with open(kwargs["output_file_jenkins"], "w") as file_jenkins_open:
1087  logging.info(
1088  "Writing to jenkins_bench json file: "
1089  + kwargs["output_file_jenkins"]
1090  )
1091  file_jenkins_open.write(jenkins_bench_json)
1092  return True
1093  except IOError:
1094  logging.exception("Error writing results to jenkins json output file")
1095  return False
1096 
FILE * open(int fileId)
Opens/creates the file with the given id; returns NULL on error.
Definition: File.cpp:83
def send_results_jenkins_bench

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

def run_benchmark.send_results_output (   kwargs)
  Send results dataset script output

  Kwargs:
    results_dataset_json(str): Json-formatted query results dataset

  Returns:
    True(bool): Sending results to output succeeded

Definition at line 1097 of file run_benchmark.py.

Referenced by run_benchmark_arrow.benchmark(), and benchmark().

1098 def send_results_output(**kwargs):
1099  """
1100  Send results dataset script output
1101 
1102  Kwargs:
1103  results_dataset_json(str): Json-formatted query results dataset
1104 
1105  Returns:
1106  True(bool): Sending results to output succeeded
1107  """
1108  logging.info("Printing query results to output")
1109  print(kwargs["results_dataset_json"])
1110  return True
1111 

+ Here is the caller graph for this function:

def run_benchmark.validate_query_file (   kwargs)
  Validates query file. Currently only checks the query file name

  Kwargs:
    query_filename(str): Name of query file

  Returns:
    True(bool): Query succesfully validated
    False(bool): Query failed validation

Definition at line 321 of file run_benchmark.py.

Referenced by read_query_files().

322 def validate_query_file(**kwargs):
323  """
324  Validates query file. Currently only checks the query file name
325 
326  Kwargs:
327  query_filename(str): Name of query file
328 
329  Returns:
330  True(bool): Query succesfully validated
331  False(bool): Query failed validation
332  """
333  if not kwargs["query_filename"].endswith(".sql"):
334  logging.warning(
335  "Query filename "
336  + kwargs["query_filename"]
337  + ' is invalid - does not end in ".sql". Skipping'
338  )
339  return False
340  else:
341  return True
342 

+ Here is the caller graph for this function:

def run_benchmark.verify_destinations (   kwargs)
  Verify script output destination(s)

  Kwargs:
    destinations (list): List of destinations
    dest_db_server (str): DB output destination server
    output_file_json (str): Location of .json file output
    output_file_jenkins (str): Location of .json jenkins file output

  Returns:
    True(bool): Destination(s) is/are valid
    False(bool): Destination(s) is/are not valid

Definition at line 17 of file run_benchmark.py.

Referenced by run_benchmark_arrow.benchmark(), and benchmark().

17 
18 def verify_destinations(**kwargs):
19  """
20  Verify script output destination(s)
21 
22  Kwargs:
23  destinations (list): List of destinations
24  dest_db_server (str): DB output destination server
25  output_file_json (str): Location of .json file output
26  output_file_jenkins (str): Location of .json jenkins file output
27 
28  Returns:
29  True(bool): Destination(s) is/are valid
30  False(bool): Destination(s) is/are not valid
31  """
32  if "mapd_db" in kwargs["destinations"]:
33  valid_destination_set = True
34  if kwargs["dest_db_server"] is None:
35  # If dest_server is not set for mapd_db, then exit
36  logging.error(
37  '"dest_server" is required when destination = "mapd_db"'
38  )
39  if "file_json" in kwargs["destinations"]:
40  valid_destination_set = True
41  if kwargs["output_file_json"] is None:
42  # If output_file_json is not set for file_json, then exit
43  logging.error(
44  '"output_file_json" is required when destination = "file_json"'
45  )
46  if "output" in kwargs["destinations"]:
47  valid_destination_set = True
48  if "jenkins_bench" in kwargs["destinations"]:
49  valid_destination_set = True
50  if kwargs["output_file_jenkins"] is None:
51  # If output_file_jenkins is not set for jenkins_bench, then exit
52  logging.error(
53  '"output_file_jenkins" is required '
54  + 'when destination = "jenkins_bench"'
55  )
56  if not valid_destination_set:
57  return False
58  else:
59  return True
60 
def verify_destinations

+ Here is the caller graph for this function: