One of my monthly tasks is a status
report that includes statistics on the throughput of our APIs. I do this by
using the data gathered by our API Manager. Before I got smart, I would extract
the data through the UI week by week, put it into a spreadsheet, chop off the
extra headers, and try to convince Excel to produce a set of graphs that were
stylized the same way as the previous month's. This often took more time than I
liked.
Upon reading about using MatPlotLib
and Pandas, I realized this could handily be automated with Python. My first
attempt pulled the data from the CSV I pulled from the API Manager, but then I
wrote a function to pull it from SQL Server using PyODBC:
def
query_api_mgr_monthly(month, year, contract):
conn = pyodbc.connect(r'Driver={SQL Server};'
'Server=;'
'Database=;'
'Trusted_Connection=yes')
query_str
= "select CONVERT(varchar, START_DTS, 1) [date], sum(SUCCESS_COUNT)
successes, sum(ERROR_COUNT) failures, avg(MIN_RESP_TIME) minResp,
avg(MAX_RESP_TIME) maxResp "
query_str
+= "from [sql_soa_prod_svc].METRICS_DAILY_VIEW "
query_str += "where START_DTS >= '" + str(month) + "/1/"
+ str(year) +"' "
query_str
+= "and CONTRACT_NAME like '%s%%' " % contract
query_str += "group by START_DTS "
query_str
+= "order by START_DTS asc"
apiMgr
= pd.read_sql_query(sql=query_str, con=conn)
return
apiMgr
There's one function to plot the
traffic through the API Manager: The number of success per hour day, the number
of failures and the monthly average number of transactions per day.
def plot_traffic(apiMgr,
title):
#calculate average number of transactions
total = [apiMgr["successes"][i]+apiMgr["failures"][i] for i
in range(len(apiMgr["successes"]))]
ave
= np.average(total)
aveline = list(it.repeat(ave, len(apiMgr["successes"])))
#
Build graph of daily transaction counts
plt.plot(apiMgr["date"], apiMgr["successes"],
label="successes")
plt.plot(apiMgr["date"], apiMgr["failures"], label =
"failures")
plt.plot(aveline, label="average")
plt.xlabel('date')
plt.ylabel("transactions")
plt.xticks(ticks=apiMgr["date"], labels=apiMgr["date"],
rotation="vertical")
plt.legend()
plt.title(title)
fig_size = plt.rcParams["figure.figsize"]
fig_size[0] = 10
fig_size[1] = 4
plt.rcParams["figure.figsize"] = fig_size
plt.show()
Another function to plot the maximum
and minimum response times per day:
def
plot_response_times(apiMgr, title):
#
Build graph of minimum and maximum response times
plt.plot(apiMgr["date"], apiMgr["minResp"], apiMgr["maxResp"])
plt.xlabel('date')
plt.ylabel("response time in milliseconds")
plt.xticks(ticks=apiMgr["date"], labels=apiMgr["date"],
rotation="vertical")
plt.legend(["minimum","maximum"])
plt.title(title)
fig_size = plt.rcParams["figure.figsize"]
fig_size[0] = 10
fig_size[1] = 4
fig_size = plt.rcParams["figure.figsize"]
plt.show()
Finally, I query the database for
the summary to transactions processed every month for the last year:
def query_apiMgr_monthly(month,
year, contract):
conn = pyodbc.connect(r'Driver={SQL Server};'
'Server=;'
'Database=;'
'Trusted_Connection=yes')
query_str
= "select CONVERT(varchar, START_DTS, 1) [date], sum(SUCCESS_COUNT)
successes, sum(ERROR_COUNT) failures, avg(MIN_RESP_TIME) minResp,
avg(MAX_RESP_TIME) maxResp "
query_str
+= "from [sql_soa_prod_svc].METRICS_DAILY_VIEW "
query_str += "where START_DTS >= '" + str(month) + "/1/"
+ str(year) +"' "
query_str
+= "and CONTRACT_NAME like '%s%%' " % contract
query_str += "group by START_DTS "
query_str
+= "order by START_DTS asc"
apiMgr
= pd.read_sql_query(sql=query_str, con=conn)
return
apiMgr
And Graph it:
def plot_annual_use(apiMgr):
#
Build graph of minimum and maximum response times
plt.plot(apiMgr["Month"], apiMgr["Count"])
plt.xlabel('Month')
plt.ylabel("Total Transactions x 10,000,000")
plt.xticks(ticks=apiMgr["Month"], labels=apiMgr["Month"],
rotation="vertical")
plt.title("ApiMgr Monthly Totals")
fig_size = plt.rcParams["figure.figsize"]
fig_size[0] = 10
fig_size[1] = 4
fig_size = plt.rcParams["figure.figsize"]
plt.show()
Finally, the imports and driver function:
This now gives me some nice, consistent graphs in just the time it takes to run the script in Spyder.
import pandas as pd
import matplotlib.pyplot as plt
import pyodbc
import seaborn as sns
import itertools as it
import numpy as np
sns.set()
def graph_apiMgr(month, year) :
apiMgr = query_apiMgr_monthly(month, year, "")
plot_traffic(apiMgr, "Daily Transaction Count")
plot_response_times(apiMgr, "Minimum and Maximum Response Times")
apiMgr = query_apiMgr_annual()
plot_annual_use(apiMgr)
apiMgr = query_akana_monthly(month, year, "contract1")
plot_traffic(apiMgr, "contract1 Daily Transaction Count")
plot_response_times(apiMgr, "contract1 Minimum and Maximum Response Times")
This now gives me some nice, consistent graphs in just the time it takes to run the script in Spyder.
No comments:
Post a Comment