Tuesday, August 27, 2019

Automating API Throughput Reports


One of my monthly tasks is a status report that includes statistics on the throughput of our APIs. I do this by using the data gathered by our API Manager. Before I got smart, I would extract the data through the UI week by week, put it into a spreadsheet, chop off the extra headers, and try to convince Excel to produce a set of graphs that were stylized the same way as the previous month's. This often took more time than I liked.

Upon reading about using MatPlotLib and Pandas, I realized this could handily be automated with Python. My first attempt pulled the data from the CSV I pulled from the API Manager, but then I wrote a function to pull it from SQL Server using PyODBC:

def query_api_mgr_monthly(month, year, contract):
    conn = pyodbc.connect(r'Driver={SQL Server};'
                          'Server=;'
                          'Database=;'
                          'Trusted_Connection=yes')
   
    query_str = "select CONVERT(varchar, START_DTS, 1) [date], sum(SUCCESS_COUNT) successes, sum(ERROR_COUNT) failures, avg(MIN_RESP_TIME) minResp, avg(MAX_RESP_TIME) maxResp "
    query_str += "from [sql_soa_prod_svc].METRICS_DAILY_VIEW "
    query_str += "where START_DTS >= '" + str(month) + "/1/" + str(year) +"' "
    query_str += "and CONTRACT_NAME like '%s%%' " % contract
    query_str += "group by START_DTS "
    query_str += "order by START_DTS asc"
   
    apiMgr = pd.read_sql_query(sql=query_str, con=conn)
   
    return apiMgr

There's one function to plot the traffic through the API Manager: The number of success per hour day, the number of failures and the monthly average number of transactions per day.

def plot_traffic(apiMgr, title):
    #calculate average number of transactions
    total = [apiMgr["successes"][i]+apiMgr["failures"][i] for i in range(len(apiMgr["successes"]))]
    ave = np.average(total)
    aveline = list(it.repeat(ave, len(apiMgr["successes"])))
       
    # Build graph of daily transaction counts
    plt.plot(apiMgr["date"], apiMgr["successes"], label="successes")
    plt.plot(apiMgr["date"], apiMgr["failures"], label = "failures")
    plt.plot(aveline, label="average")
   
    plt.xlabel('date')
    plt.ylabel("transactions")
    plt.xticks(ticks=apiMgr["date"], labels=apiMgr["date"], rotation="vertical")
    plt.legend()
    plt.title(title)
    fig_size = plt.rcParams["figure.figsize"]
    fig_size[0] = 10
    fig_size[1] = 4
    plt.rcParams["figure.figsize"] = fig_size
   
    plt.show()

Another function to plot the maximum and minimum response times per day: 

def plot_response_times(apiMgr, title):
    # Build graph of minimum and maximum response times
    plt.plot(apiMgr["date"], apiMgr["minResp"], apiMgr["maxResp"])
    plt.xlabel('date')
    plt.ylabel("response time in milliseconds")
    plt.xticks(ticks=apiMgr["date"], labels=apiMgr["date"], rotation="vertical")
    plt.legend(["minimum","maximum"])
    plt.title(title)
    fig_size = plt.rcParams["figure.figsize"]
    fig_size[0] = 10
    fig_size[1] = 4
    fig_size = plt.rcParams["figure.figsize"]
    plt.show()

Finally, I query the database for the summary to transactions processed every month for the last year:
def query_apiMgr_monthly(month, year, contract):
    conn = pyodbc.connect(r'Driver={SQL Server};'
                          'Server=;'
                          'Database=;'
                          'Trusted_Connection=yes')
   
    query_str = "select CONVERT(varchar, START_DTS, 1) [date], sum(SUCCESS_COUNT) successes, sum(ERROR_COUNT) failures, avg(MIN_RESP_TIME) minResp, avg(MAX_RESP_TIME) maxResp "
    query_str += "from [sql_soa_prod_svc].METRICS_DAILY_VIEW "
    query_str += "where START_DTS >= '" + str(month) + "/1/" + str(year) +"' "
    query_str += "and CONTRACT_NAME like '%s%%' " % contract
    query_str += "group by START_DTS "
    query_str += "order by START_DTS asc"
   
    apiMgr = pd.read_sql_query(sql=query_str, con=conn)
   
    return apiMgr

And Graph it:

def plot_annual_use(apiMgr):
    # Build graph of minimum and maximum response times
    plt.plot(apiMgr["Month"], apiMgr["Count"])
    plt.xlabel('Month')
    plt.ylabel("Total Transactions x 10,000,000")
    plt.xticks(ticks=apiMgr["Month"], labels=apiMgr["Month"], rotation="vertical")
    plt.title("ApiMgr Monthly Totals")
    fig_size = plt.rcParams["figure.figsize"]
    fig_size[0] = 10
    fig_size[1] = 4
    fig_size = plt.rcParams["figure.figsize"]
    plt.show()

Finally, the imports and driver function:
import pandas as pd
import matplotlib.pyplot as plt
import pyodbc
import seaborn as sns
import itertools as it
import numpy as np
sns.set()

def graph_apiMgr(month, year) :
    apiMgr = query_apiMgr_monthly(month, year, "")
    plot_traffic(apiMgr, "Daily Transaction Count")
    plot_response_times(apiMgr, "Minimum and Maximum Response Times")
   
    apiMgr = query_apiMgr_annual()
    plot_annual_use(apiMgr)
   
    apiMgr = query_akana_monthly(month, year, "contract1")
    plot_traffic(apiMgr, "contract1 Daily Transaction Count")
    plot_response_times(apiMgr, "contract1 Minimum and Maximum Response Times")

  

This now gives me some nice, consistent graphs in just the time it takes to run the script in Spyder.







No comments:

Post a Comment