Monday, August 31, 2009

Look, everybody! It's a SimPy three tier eBiz simulation! But now with extra statistics! Wowee!

Added some more statistics to the output such as waiting queue length and component utilization by using SimPy Monitors. I couldn't easily use the monitors for the page response times due to the split up nature calls to multiple nodes but for .waitQ and node utilizations they worked out perfectly.



#!/usr/bin/env python

from SimPy.Simulation import *
from random import Random, expovariate, uniform

class Metrics:

metrics = dict()

def Add(self, metricName, frameNumber, value):
if self.metrics.has_key(metricName):
if self.metrics[metricName].has_key(frameNumber):
self.metrics[metricName][frameNumber].append(value)
else:
self.metrics[metricName][frameNumber] = list()
self.metrics[metricName][frameNumber].append(value)
else:
self.metrics[metricName] = dict()
self.metrics[metricName][frameNumber] = list()
self.metrics[metricName][frameNumber].append(value)

def Keys(self):
return self.metrics.keys()

def Mean(self, metricName):
valueArray = list()
if self.metrics.has_key(metricName):
for frame in self.metrics[metricName].keys():
for values in range(len(self.metrics[metricName][frame])):
valueArray.append(self.metrics[metricName][frame][values])

sum = 0.0
for i in range(len(valueArray)):
sum += valueArray[i]

if len(self.metrics[metricName][frame]) != 0:
return sum/len(self.metrics[metricName])
else:
return 0 # Need to learn python throwing exceptions
else:
return 0

class G:

numWS = 1
numAS = 1
numDS = 1

Rnd = random.Random(12345)

PageNames = ["Entry", "Home", "Search", "View", "Login", "Create", "Bid", "Exit" ]

Entry = 0
Home = 1
Search = 2
View = 3
Login = 4
Create = 5
Bid = 6
Exit = 7

WS = 0
AS = 1
DS = 2

CPU = 0
DISK = 1

WS_CPU = 0
WS_DISK = 1
AS_CPU = 2
AS_DISK = 3
DS_CPU = 4
DS_DISK = 5

metrics = Metrics()

# e h s v l c b e
HitCount = [0, 0, 0, 0, 0, 0, 0, 0]

Resources = [[ Resource(1), Resource(1) ], # WS CPU and DISK
[ Resource(1), Resource(1) ], # AS CPU and DISK
[ Resource(1), Resource(1) ]] # DS CPU and DISK

QMon = [[ Monitor(1), Monitor(1)], # WS CPU and DISK
[ Monitor(1), Monitor(1)], # AS CPU and DISK
[ Monitor(1), Monitor(1)]] # DS CPU and DISK

SMon = [[ Monitor(1), Monitor(1)], # WS CPU and DISK
[ Monitor(1), Monitor(1)], # AS CPU and DISK
[ Monitor(1), Monitor(1)]] # DS CPU and DISK

# Enter Home Search View Login Create Bid Exit
ServiceDemand = [ [0.000, 0.008, 0.009, 0.011, 0.060, 0.012, 0.015, 0.000], # WS_CPU
[0.000, 0.030, 0.010, 0.010, 0.010, 0.010, 0.010, 0.000], # WS_DISK
[0.000, 0.000, 0.030, 0.035, 0.025, 0.045, 0.040, 0.000], # AS_CPU
[0.000, 0.000, 0.008, 0.080, 0.009, 0.011, 0.012, 0.000], # AS_DISK
[0.000, 0.000, 0.010, 0.009, 0.015, 0.070, 0.045, 0.000], # DS_CPU
[0.000, 0.000, 0.035, 0.018, 0.050, 0.080, 0.090, 0.000] ] # DS_DISK

# Type B shopper
# 0 1 2 3 4 5 6 7
TransitionMatrix = [ [0.00, 1.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00], # 0
[0.00, 0.00, 0.70, 0.00, 0.10, 0.00, 0.00, 0.20], # 1
[0.00, 0.00, 0.45, 0.15, 0.10, 0.00, 0.00, 0.30], # 2
[0.00, 0.00, 0.00, 0.00, 0.40, 0.00, 0.00, 0.60], # 3
[0.00, 0.00, 0.00, 0.00, 0.00, 0.30, 0.55, 0.15], # 4
[0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 1.00], # 5
[0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 1.00], # 6
[0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00] ] # 7

class RandomPath:

def RowSum(self, Vector):
rowSum = 0.0
for i in range(len(Vector)):
rowSum += Vector[i]
return rowSum

def NextPage(self, T, i):
rowSum = self.RowSum(T[i])
randomValue = G.Rnd.uniform(0, rowSum)

sumT = 0.0

for j in range(len(T[i])):
sumT += T[i][j]
if randomValue < sumT:
break
return j

class ExecuteFrame(Process):

def __init__(self, frameNumber, resource, QMon, SMon, serviceDemand, nodeName, pageName):
Process.__init__(self)
self.frame = frameNumber
self.resource = resource
self.serviceDemand = serviceDemand
self.nodeName = nodeName
self.pageName = pageName
self.QMon = QMon
self.SMon = SMon

def execute(self):
StartUpTime = now()

yield request, self, self.resource
yield hold, self, self.serviceDemand

self.QMon.observe(len(self.resource.waitQ))
self.SMon.observe(self.serviceDemand)

yield release, self, self.resource

R = now() - StartUpTime

G.metrics.Add(self.pageName, self.frame, R)

class CallPage(Process):

def __init__(self, frameNumber, node, pageName):
Process.__init__(self)
self.frame = frameNumber
self.StartUpTime = 0.0
self.currentPage = node
self.pageName = pageName

def execute(self):

if self.currentPage != G.Exit:

print >> sys.stderr, "Working on Frame # ", self.frame, " @ ", now() , " for page ", self.pageName

self.StartUpTime = now()

if G.ServiceDemand[G.WS_CPU][self.currentPage] > 0.0:
wsCPU = ExecuteFrame(self.frame, \
G.Resources[G.WS][G.CPU], \
G.QMon[G.WS][G.CPU], \
G.SMon[G.WS][G.CPU], \
G.ServiceDemand[G.WS_CPU][self.currentPage]/G.numWS, \
"wsCPU", self.pageName)
activate(wsCPU, wsCPU.execute())

if G.ServiceDemand[G.WS_DISK][self.currentPage] > 0.0:
wsDISK = ExecuteFrame(self.frame, \
G.Resources[G.WS][G.DISK], \
G.QMon[G.WS][G.DISK], \
G.SMon[G.WS][G.DISK], \
G.ServiceDemand[G.WS_DISK][self.currentPage]/G.numWS, \
"wsDISK", self.pageName)
activate(wsDISK, wsDISK.execute())

if G.ServiceDemand[G.AS_CPU][self.currentPage] > 0.0:
asCPU = ExecuteFrame(self.frame, \
G.Resources[G.AS][G.CPU], \
G.QMon[G.AS][G.CPU], \
G.SMon[G.AS][G.CPU], \
G.ServiceDemand[G.AS_CPU][self.currentPage]/G.numAS, \
"asCPU", \
self.pageName)
activate(asCPU, asCPU.execute())

if G.ServiceDemand[G.AS_DISK][self.currentPage] > 0.0:
asDISK = ExecuteFrame(self.frame, \
G.Resources[G.AS][G.DISK], \
G.QMon[G.AS][G.DISK], \
G.SMon[G.AS][G.DISK], \
G.ServiceDemand[G.AS_DISK][self.currentPage]/G.numAS, \
"asDISK", \
self.pageName)
activate(asDISK, asDISK.execute())

if G.ServiceDemand[G.DS_CPU][self.currentPage] > 0.0:
dsCPU = ExecuteFrame(self.frame, \
G.Resources[G.DS][G.CPU], \
G.QMon[G.DS][G.CPU], \
G.SMon[G.DS][G.CPU], \
G.ServiceDemand[G.DS_CPU][self.currentPage]/G.numDS, \
"dsCPU", \
self.pageName)
activate(dsCPU, dsCPU.execute())

if G.ServiceDemand[G.DS_DISK][self.currentPage] > 0.0:
dsDISK = ExecuteFrame(self.frame, \
G.Resources[G.DS][G.DISK], \
G.QMon[G.DS][G.DISK], \
G.SMon[G.DS][G.DISK], \
G.ServiceDemand[G.DS_DISK][self.currentPage]/G.numDS, \
"dsDISK", \
self.pageName)
activate(dsDISK, dsDISK.execute())

G.HitCount[self.currentPage] += 1

yield hold, self, 0.00001

class Generator(Process):
def __init__(self, rate, maxT):
Process.__init__(self)
self.name = "Generator"
self.rate = rate
self.maxT = maxT
self.g = Random(11335577)
self.i = 0
self.currentPage = G.Home

def execute(self):
while (now() < self.maxT):
self.i+=1
p = CallPage(self.i,self.currentPage,G.PageNames[self.currentPage])
activate(p,p.execute())
yield hold,self,self.g.expovariate(self.rate)
randomPath = RandomPath()

if self.currentPage == G.Exit:
self.currentPage = G.Home
else:
self.currentPage = randomPath.NextPage(G.TransitionMatrix, self.currentPage)

def main():

Lambda = 4.026*float(sys.argv[1])
maxSimTime = float(sys.argv[2])

initialize()
g = Generator(Lambda, maxSimTime)
activate(g,g.execute())

simulate(until=maxSimTime)

print >> sys.stderr, "Simulated Seconds : ", maxSimTime

print >> sys.stderr, "Page Hits :"
for i in range(len(G.PageNames)):
print >> sys.stderr, "\t", G.PageNames[i], " = ", G.HitCount[i]

print >> sys.stderr, "Throughput : "
for i in range(len(G.PageNames)):
print >> sys.stderr, "\t", G.PageNames[i], " = ", G.HitCount[i]/maxSimTime

print >> sys.stderr, "Mean Response Times:"

for i in G.metrics.Keys():
print >> sys.stderr, "\t", i, " = ", G.metrics.Mean(i)

print >> sys.stderr, "Component Waiting Queues:"
print >> sys.stderr, "\tWeb Server CPU : ", G.QMon[G.WS][G.CPU].mean()
print >> sys.stderr, "\tWeb Server DISK : ", G.QMon[G.WS][G.DISK].mean()
print >> sys.stderr, "\tApplication Server CPU : ", G.QMon[G.AS][G.CPU].mean()
print >> sys.stderr, "\tApplication Server DISK : ", G.QMon[G.AS][G.DISK].mean()
print >> sys.stderr, "\tDatabase Server CPU : ", G.QMon[G.DS][G.CPU].mean()
print >> sys.stderr, "\tDatabase Server DISK : ", G.QMon[G.DS][G.DISK].mean()

print >> sys.stderr, "Component Utilization:"
print >> sys.stderr, "\tWeb Server CPU : ", ((G.SMon[G.WS][G.CPU].mean()*len(G.QMon[G.WS][G.CPU]))/maxSimTime)*100
print >> sys.stderr, "\tWeb Server DISK : ", ((G.SMon[G.WS][G.DISK].mean()*len(G.QMon[G.WS][G.DISK]))/maxSimTime)*100
print >> sys.stderr, "\tApplication Server CPU : ", ((G.SMon[G.AS][G.CPU].mean()*len(G.QMon[G.AS][G.CPU]))/maxSimTime)*100
print >> sys.stderr, "\tApplication Server DISK : ", ((G.SMon[G.AS][G.DISK].mean()*len(G.QMon[G.AS][G.DISK]))/maxSimTime)*100
print >> sys.stderr, "\tDatabase Server CPU : ", ((G.SMon[G.DS][G.CPU].mean()*len(G.QMon[G.DS][G.CPU]))/maxSimTime)*100
print >> sys.stderr, "\tDatabase Server DISK : ", ((G.SMon[G.DS][G.DISK].mean()*len(G.QMon[G.DS][G.DISK]))/maxSimTime)*100

print >> sys.stderr, "Total Component Hits :"
print >> sys.stderr, "\tWeb Server CPU : ", len(G.QMon[G.WS][G.CPU])
print >> sys.stderr, "\tWeb Server DISK : ", len(G.QMon[G.WS][G.DISK])
print >> sys.stderr, "\tApplication Server CPU : ", len(G.QMon[G.AS][G.CPU])
print >> sys.stderr, "\tApplication Server DISK : ", len(G.QMon[G.AS][G.DISK])
print >> sys.stderr, "\tDatabase Server CPU : ", len(G.QMon[G.DS][G.CPU])
print >> sys.stderr, "\tDatabase Server DISK : ", len(G.QMon[G.DS][G.DISK])

print >> sys.stderr, "Total Component Thoughput :"
print >> sys.stderr, "\tWeb Server CPU : ", len(G.QMon[G.WS][G.CPU])/maxSimTime
print >> sys.stderr, "\tWeb Server DISK : ", len(G.QMon[G.WS][G.DISK])/maxSimTime
print >> sys.stderr, "\tApplication Server CPU : ", len(G.QMon[G.AS][G.CPU])/maxSimTime
print >> sys.stderr, "\tApplication Server DISK : ", len(G.QMon[G.AS][G.DISK])/maxSimTime
print >> sys.stderr, "\tDatabase Server CPU : ", len(G.QMon[G.DS][G.CPU])/maxSimTime
print >> sys.stderr, "\tDatabase Server DISK : ", len(G.QMon[G.DS][G.DISK])/maxSimTime

print >> sys.stderr, "Mean Component Svc Demand :"
print >> sys.stderr, "\tWeb Server CPU : ", G.SMon[G.WS][G.CPU].mean()
print >> sys.stderr, "\tWeb Server DISK : ", G.SMon[G.WS][G.DISK].mean()
print >> sys.stderr, "\tApplication Server CPU : ", G.SMon[G.AS][G.CPU].mean()
print >> sys.stderr, "\tApplication Server DISK : ", G.SMon[G.AS][G.DISK].mean()
print >> sys.stderr, "\tDatabase Server CPU : ", G.SMon[G.DS][G.CPU].mean()
print >> sys.stderr, "\tDatabase Server DISK : ", G.SMon[G.DS][G.DISK].mean()

print G.HitCount[G.Home]/maxSimTime, ",", G.metrics.Mean("Home"), ",", G.metrics.Mean("View"), ",", G.metrics.Mean("Search"), ",", G.metrics.Mean("Login"), ",", G.metrics.Mean("Create"), ",", G.metrics.Mean("Bid")

if __name__ == '__main__':
main()

Sunday, August 30, 2009

Improved single queue SimPy code

In this blog entry I had a comparison of PDQ and SimPy for a single queue. The code that I wrote was pretty cheezy and based upon a very early example from Norm Matloff's Introduction to Discrete-Event Simulation and the SimPy Language.

After writing the SimPy code to solve for the three tier eBiz solution I wanted to go back and correct some of my initial code with the single queue.

Below is what I feel is better code than my original single queue solution.




#!/usr/bin/env python

from SimPy.Simulation import *
from random import Random, expovariate, uniform
import time

class G:
# Rnd = random.Random(time.mktime(time.localtime()))
Rnd = random.Random(12345)
MyQueue = Resource(1)
QMon = Monitor()
ServiceTime = 0.50
TotalCalls = 0L
TotalResidence = 0L
TotalWait = 0L
TotalService = 0L

class WorkLoad(Process):

def __init__(self):
Process.__init__(self)
self.StartUpTime = 0.0

def Run(self):
self.StartUpTime = now()
yield request, self, G.MyQueue
G.TotalWait += now() - self.StartUpTime
yield hold, self, G.ServiceTime
G.QMon.observe(len(G.MyQueue.waitQ));
G.TotalResidence += now() - self.StartUpTime
yield release, self, G.MyQueue
G.TotalCalls += 1

class Generator(Process):

def __init__(self, Lambda):
Process.__init__(self)
self.Lambda = Lambda

def execute(self):

while 1:
yield hold, self, G.Rnd.expovariate(self.Lambda)
W = WorkLoad()
activate(W, W.Run())

def main():

Lambda = float(sys.argv[1])
MaxSimTime = 10000.00

G.TotalCalls = 0L
G.TotalResidence = 0L
G.TotalWait = 0L
G.TotalService = 0L

initialize()

print >> sys.stderr, "MaxSimTime = ", MaxSimTime

g = Generator(Lambda)
activate(g, g.execute())

simulate(until=MaxSimTime)

print Lambda, ",", MaxSimTime, ",", G.TotalCalls, ",", G.TotalCalls/MaxSimTime, ",", G.TotalWait, ",", G.TotalResidence, ",", G.TotalResidence/G.TotalCalls, ",", G.TotalWait/G.TotalCalls, ",", (G.TotalResidence/G.TotalCalls) - (G.TotalWait/G.TotalCalls) , ",", ((G.ServiceTime * G.TotalCalls) / MaxSimTime) * 100 , "%",",",G.QMon.mean()

print >> sys.stderr, "Ideal Throughput : ", Lambda
print >> sys.stderr, "Simulated Seconds : ", MaxSimTime
print >> sys.stderr, "Number of calls : ", G.TotalCalls
print >> sys.stderr, "Throughput : ", G.TotalCalls/MaxSimTime
print >> sys.stderr, "Total Wait Time : ", G.TotalWait
print >> sys.stderr, "Total Residence Time : ", G.TotalResidence
print >> sys.stderr, "Mean Residence Time : ", G.TotalResidence/G.TotalCalls
print >> sys.stderr, "Mean Wait Time : ", G.TotalWait/G.TotalCalls
print >> sys.stderr, "Mean Service Time : ", (G.TotalResidence/G.TotalCalls) - (G.TotalWait/G.TotalCalls)
print >> sys.stderr, "Total Utilization : ", ((G.ServiceTime * G.TotalCalls) / MaxSimTime) * 100 , " %"
print >> sys.stderr, "Mean WaitQ : ", G.QMon.mean()

if __name__ == '__main__': main()


The results are the same as before, but now instead of creating a gajillion objects to activate only one "thread" is created that in turns makes the calls to the queue. Much faster than my original code and laid out better.

Friday, August 28, 2009

Hey, everybody! Let's use SimPy to simulate performance of a three tier eBiz solution!

Back in this blog post, I wrote about using PDQ-R to analyze the performance of a three tier eBiz solution.

I had been getting into learning how to simulate queuing networks with SimPy and this week I got my three tier SimPy solution up and running and crunching numbers.

The SimPy solution was MUCH more involved than the PDQ solution and it took me a bit to wrap my head around all the goodies I needed to know before I started getting good results.

I started by looking at the jackson.py code and got an idea of how write the code that was needed to get the job done.

Like the jackson.py code, I also had three computers but unlike the jackson.py code I had two nodes per computer that represented the CPU and Disk. Get that setup was the real tricky part and I had to bang my wall against the SimPy brick wall for a while until I broke through and figured the solution out.

Like the jackson.py code, I use a Process to control the rate that I hit web pages by invoking another process that sets up the hits to the individual components that are also processes. One of the big lessons that I learned is that if the service demand for a component is zero, don't make a yield request call for the component resource.

You'd think that I would have figured that out earlier but it totally slipped past me and it was really kicking my buttocks. Most of the page response times were looking good but in the PDQ analysis, the HomePage didn't have a long response time due to there only being service demand on the Web CPU and Disk and I wasn't seeing that in my SimPy solution.

Here is what I was seeing in my SimPy solution for page response time for the Home page:



I had some other chores to do and as I was walking past my machine I noticed out of the corner of my eye some code and it hit me like a ton of bricks. I was making a yield request for each component even if there was no service demand for the component and that yield request was a blocking operation. Well, duh! A few if statements fixed that problem and a quick test showed that the Home Page response time was no diverging from the PDQ analysis like in the above graph!



Wow, what a difference!

The biggest things that I got out of doing the SimPy solution was that all component requests need to be in their own Processes and if there is no service demand for the component then it should be called at all.

Perhaps next week I'll add the queuing output and component utilization into my SimPy code for more left/right comparisons against PDQ. But over all, I'm fairly happy with my solution but there is some code that needs to be cleaned up.

Down below is my code and the comparisons of my SimPy solution versus my PDQ solution.



#!/usr/bin/env python

from SimPy.Simulation import *
from random import Random, expovariate, uniform

class Metrics:

metrics = dict()

def Add(self, metricName, frameNumber, value):
if self.metrics.has_key(metricName):
if self.metrics[metricName].has_key(frameNumber):
self.metrics[metricName][frameNumber].append(value)
else:
self.metrics[metricName][frameNumber] = list()
self.metrics[metricName][frameNumber].append(value)
else:
self.metrics[metricName] = dict()
self.metrics[metricName][frameNumber] = list()
self.metrics[metricName][frameNumber].append(value)

def Keys(self):
return self.metrics.keys()

def Mean(self, metricName):
valueArray = list()
if self.metrics.has_key(metricName):
for frame in self.metrics[metricName].keys():
for values in range(len(self.metrics[metricName][frame])):
valueArray.append(self.metrics[metricName][frame][values])

sum = 0.0
for i in range(len(valueArray)):
sum += valueArray[i]

if len(self.metrics[metricName][frame]) != 0:
return sum/len(self.metrics[metricName])
else:
return 0 # Need to learn python throwing exceptions
else:
return 0

class RandomPath:

def RowSum(self, Vector):
rowSum = 0.0
for i in range(len(Vector)):
rowSum += Vector[i]
return rowSum

def NextPage(self, T, i):
rowSum = self.RowSum(T[i])
randomValue = G.Rnd.uniform(0, rowSum)

sumT = 0.0

for j in range(len(T[i])):
sumT += T[i][j]
if randomValue < sumT:
break

return j

class G:

numWS = 1
numAS = 1
numDS = 2

Rnd = random.Random(12345)

PageNames = ["Entry", "Home", "Search", "View", "Login", "Create", "Bid", "Exit" ]

Entry = 0
Home = 1
Search = 2
View = 3
Login = 4
Create = 5
Bid = 6
Exit = 7

WS = 0
AS = 1
DS = 2

CPU = 0
DISK = 1

WS_CPU = 0
WS_DISK = 1
AS_CPU = 2
AS_DISK = 3
DS_CPU = 4
DS_DISK = 5

metrics = Metrics()

# e h s v l c b e
HitCount = [0, 0, 0, 0, 0, 0, 0, 0]

Resources = [[ Resource(1), Resource(1) ], # WS CPU and DISK
[ Resource(1), Resource(1) ], # AS CPU and DISK
[ Resource(1), Resource(1) ]] # DS CPU and DISK

# Enter Home Search View Login Create Bid Exit
ServiceDemand = [ [0.000, 0.008, 0.009, 0.011, 0.060, 0.012, 0.015, 0.000], # WS_CPU
[0.000, 0.030, 0.010, 0.010, 0.010, 0.010, 0.010, 0.000], # WS_DISK
[0.000, 0.000, 0.030, 0.035, 0.025, 0.045, 0.040, 0.000], # AS_CPU
[0.000, 0.000, 0.008, 0.080, 0.009, 0.011, 0.012, 0.000], # AS_DISK
[0.000, 0.000, 0.010, 0.009, 0.015, 0.070, 0.045, 0.000], # DS_CPU
[0.000, 0.000, 0.035, 0.018, 0.050, 0.080, 0.090, 0.000] ] # DS_DISK

# Type B shopper
# 0 1 2 3 4 5 6 7
TransitionMatrix = [ [0.00, 1.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00], # 0
[0.00, 0.00, 0.70, 0.00, 0.10, 0.00, 0.00, 0.20], # 1
[0.00, 0.00, 0.45, 0.15, 0.10, 0.00, 0.00, 0.30], # 2
[0.00, 0.00, 0.00, 0.00, 0.40, 0.00, 0.00, 0.60], # 3
[0.00, 0.00, 0.00, 0.00, 0.00, 0.30, 0.55, 0.15], # 4
[0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 1.00], # 5
[0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 1.00], # 6
[0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00] ] # 7

class DoWork(Process):

def __init__(self, i, resource, serviceDemand, nodeName, pageName):
Process.__init__(self)
self.frame = i
self.resource = resource
self.serviceDemand = serviceDemand
self.nodeName = nodeName
self.pageName = pageName

def execute(self):
StartUpTime = now()
yield request, self, self.resource
yield hold, self, self.serviceDemand
yield release, self, self.resource
R = now() - StartUpTime

G.metrics.Add(self.pageName, self.frame, R)

class CallPage(Process):

def __init__(self, i, node, pageName):
Process.__init__(self)
self.frame = i
self.StartUpTime = 0.0
self.currentPage = node
self.pageName = pageName

def execute(self):

if self.currentPage != G.Exit:

print >> sys.stderr, "Working on Frame # ", self.frame, " @ ", now() , " for page ", self.pageName

self.StartUpTime = now()

if G.ServiceDemand[G.WS_CPU][self.currentPage] > 0.0:
wsCPU = DoWork(self.frame, G.Resources[G.WS][G.CPU], G.ServiceDemand[G.WS_CPU][self.currentPage]/G.numWS, "wsCPU", self.pageName)
activate(wsCPU, wsCPU.execute())

if G.ServiceDemand[G.WS_DISK][self.currentPage] > 0.0:
wsDISK = DoWork(self.frame, G.Resources[G.WS][G.DISK], G.ServiceDemand[G.WS_DISK][self.currentPage]/G.numWS, "wsDISK", self.pageName)
activate(wsDISK, wsDISK.execute())

if G.ServiceDemand[G.AS_CPU][self.currentPage] > 0.0:
asCPU = DoWork(self.frame, G.Resources[G.AS][G.CPU], G.ServiceDemand[G.AS_CPU][self.currentPage]/G.numAS, "asCPU", self.pageName)
activate(asCPU, asCPU.execute())

if G.ServiceDemand[G.AS_DISK][self.currentPage] > 0.0:
asDISK = DoWork(self.frame, G.Resources[G.AS][G.DISK], G.ServiceDemand[G.AS_DISK][self.currentPage]/G.numAS, "asDISK", self.pageName)
activate(asDISK, asDISK.execute())

if G.ServiceDemand[G.DS_CPU][self.currentPage] > 0.0:
dsCPU = DoWork(self.frame, G.Resources[G.DS][G.CPU], G.ServiceDemand[G.DS_CPU][self.currentPage]/G.numDS, "dsCPU", self.pageName)
activate(dsCPU, dsCPU.execute())

if G.ServiceDemand[G.DS_DISK][self.currentPage] > 0.0:
dsDISK = DoWork(self.frame, G.Resources[G.DS][G.DISK], G.ServiceDemand[G.DS_DISK][self.currentPage]/G.numDS, "dsDISK", self.pageName)
activate(dsDISK, dsDISK.execute())

G.HitCount[self.currentPage] += 1

yield hold, self, 0.00001 # Needed to prevent an error. Doesn't add any blocking to the six queues above


class Generator(Process):
def __init__(self, rate, maxT, maxN):
Process.__init__(self)
self.name = "Generator"
self.rate = rate
self.maxN = maxN
self.maxT = maxT
self.g = Random(11335577)
self.i = 0
self.currentPage = G.Home

def execute(self):
while (now() < self.maxT):
self.i+=1
p = CallPage(self.i,self.currentPage,G.PageNames[self.currentPage])
activate(p,p.execute())
yield hold,self,self.g.expovariate(self.rate)
randomPath = RandomPath()

if self.currentPage == G.Exit:
self.currentPage = G.Home
else:
self.currentPage = randomPath.NextPage(G.TransitionMatrix, self.currentPage)

def main():

maxWorkLoad = 10000
Lambda = 4.026*float(sys.argv[1])
maxSimTime = float(sys.argv[2])

initialize()
g = Generator(Lambda, maxSimTime, maxWorkLoad)
activate(g,g.execute())

simulate(until=maxSimTime)

print >> sys.stderr, "Simulated Seconds : ", maxSimTime

print >> sys.stderr, "Page Hits :"
for i in range(len(G.PageNames)):
print >> sys.stderr, "\t", G.PageNames[i], " = ", G.HitCount[i]

print >> sys.stderr, "Throughput : "
for i in range(len(G.PageNames)):
print >> sys.stderr, "\t", G.PageNames[i], " = ", G.HitCount[i]/maxSimTime

print >> sys.stderr, "Mean Response Times:"

for i in G.metrics.Keys():
print >> sys.stderr, "\t", i, " = ", G.metrics.Mean(i)

print G.HitCount[G.Home]/maxSimTime, ",", G.metrics.Mean("Home"), ",", G.metrics.Mean("View"), ",", G.metrics.Mean("Search"), ",", G.metrics.Mean("Login"), ",", G.metrics.Mean("Create"), ",", G.metrics.Mean("Bid")

if __name__ == '__main__': main()









Like my previous comparison of SimPy versus PDQ for a single queue, the PDQ results are slightly higher, so no major surprises there.

Monday, August 24, 2009

Hey, everybody! Let's do a quick comparison of SimPy and PDQ for a single queue!

On the side I have slowly been looking into SimPy for queue simulation and I finally got around to doing a comparison of a single queue written up with SimPy and with PDQ.

The scenario is very simple. A single queue with a service time of 0.5 seconds with one visit for a service demand total of 0.5 seconds.

The PDQ solution is very simple to write up and only took a few minutes:




library("pdq");

lambda <- seq(1, 20, 1);
lambda <- lambda / 10;

for (rate in lambda) {
Init("Single Queue");

CreateOpen("WorkLoad", rate);

CreateNode("MyQueue", CEN, FCFS);

SetDemand("MyQueue", "WorkLoad" , 0.5);

SetWUnit("Trans");
SetTUnit("Second");

Solve(CANON);
#Report();
print(sprintf("%f, %f, %f",
rate,
GetResidenceTime("MyQueue","WorkLoad", TRANS),
GetUtilization("MyQueue", "WorkLoad", TRANS)));
};


The SimPy solution took a bit longer for me to write up and to make sure I was doing things correctly. I thought it was neat that I was able to setup a lambda into the queue that was greater than the queue was capable of processing, in this case, the lambda max is 2 requests per second as is given by the reciprocal of 0.5 seconds, 1/0.5 == 2.

PDQ gives a response time of "Inf" for 2.0 requests per second where as the SimPy solution will crunch away, but never get above 2 requests per second and will result with unbounded queueing as we'd expect.

I still need to work on my SimPy solution to add in more metrics collection, but it's a start. After I wrap my brain around metrics collection I'm going to write a solution of one of the previous problems that I worked with related to Jackson networks and the three tier E-biz solution.

One of the interesting things about SimPy is that I had to create a large number of worker "threads" to hit the queue over a long period of time to get to the correct lambda that I had specified. If I only created one worker "thread" there was no queueing taking place and wait time was always zero, as you'd normally expect. I played around a lot with the number of worker "threads" and found that for my open network that between 10,000 to 100,000 workers worked out just fine and that 10,000 seconds was an adequate time frame for the solution.

One of the first things I had to handle with the large number of worker "threads" was to control the inter arrival rate so that the requests per second of all the workers would be the correct value. After that, it was just playing around with the code to find a good number of simulation time and workers.




#!/usr/bin/env python

from SimPy.Simulation import *
from random import Random, expovariate, uniform
import time

class G:
Rnd = random.Random(12345)
MyQueue = Resource(1)
QMon = Monitor()
ServiceTime = 0.50

class WorkLoad(Process):

TotalCalls = 0L
TotalResidence = 0L
TotalWait = 0L
TotalService = 0L

def __init__(self):
Process.__init__(self)
self.StartUpTime = 0.0

def Run(self, MaxWorkLoad, Lambda):
InterArrivalRate = (1.0/Lambda)
while 1:
WaitTime = G.Rnd.expovariate(1.0/(InterArrivalRate*MaxWorkLoad))
yield hold, self, WaitTime
self.StartUpTime = now()
yield request, self, G.MyQueue
WorkLoad.TotalWait += now() - self.StartUpTime # W
yield hold, self, G.ServiceTime # S
G.QMon.observe(len(G.MyQueue.waitQ));
WorkLoad.TotalResidence += now() - self.StartUpTime # R = W + S
WorkLoad.TotalCalls += 1
yield release, self, G.MyQueue

def main():

MaxWorkLoad = 1000
Lambda = float(sys.argv[1])
MaxSimTime = 1000.00

WorkLoad.TotalCalls = 0L
WorkLoad.TotalResidence = 0L
WorkLoad.TotalWait = 0L
WorkLoad.TotalService = 0L

initialize()

print "MaxWorkLoad = ", MaxWorkLoad
print "MaxSimTime = ", MaxSimTime

for I in range(MaxWorkLoad):
W = WorkLoad()
activate(W, W.Run(MaxWorkLoad, Lambda))

TotalWorkLoad = I + 1

simulate(until=MaxSimTime)

print "Ideal Throughput : ", Lambda
print "Simulated Seconds : ", MaxSimTime
print "TotalWorkLoads : ", TotalWorkLoad
print "Number of calls : ", WorkLoad.TotalCalls
print "Throughput : ", WorkLoad.TotalCalls/MaxSimTime
print "Total Wait Time : ", WorkLoad.TotalWait
print "Total Residence Time : ", WorkLoad.TotalResidence
print "Mean Residence Time : ", WorkLoad.TotalResidence/WorkLoad.TotalCalls
print "Mean Wait Time : ", WorkLoad.TotalWait/WorkLoad.TotalCalls
print "Mean Service Time : ", (WorkLoad.TotalResidence/WorkLoad.TotalCalls) -
(WorkLoad.TotalWait/WorkLoad.TotalCalls)
print "Total Utilization : ", ((G.ServiceTime * WorkLoad.TotalCalls) / MaxSimTime) * 100 , " %"
print "Mean waitQ : ", G.QMon.mean()

if __name__ == '__main__': main()


I still have a lot of learning to do with SimPy, that's for sure. It's been almost a decade since I've messed with Python and I am really rusty. Time to take care of that.

Here is the comparison of metrics for utilization and response time for the single queue with both solutions in handy, dandy line graph form:

Utilization:



No surprises here as both lines are almost the same.

Response Time:



This is the big surprise for me. As utilization increases the response times between SimPy and PDQ start to diverge. At low utilizations there almost the same but look at around 30% utilization (0.6 requests per sec) and the divergence starts. I knew that there would be some differences between SimPy and PDQ but I didn't expect this much of a difference. I need to go over the SimPy solution and make sure I didn't fat finger anything as this is my first SimPy solution for a queue.

Queue Waiting:



Like the Response Time graph, we show that PDQ is gonkulating more queuing as a result of higher expected response times than the SimPy model.

While PDQ will respond with "Inf" for 2.0 requests per second and error out with anything greater than 2 req/sec the SimPy solution response time is asymptotic to 2 requests per second. I can push the SimPy solution is 1.9999 requests per second but never hit exactly 2.

Here is an example of the SimPy solution with 2.1 requests per second:




auswipe@auswipe-desktop:~/Desktop/pbe/simpy$ ./q.py 2.1
MaxWorkLoad = 10000
MaxSimTime = 10000.0
Ideal Throughput : 2.1
Simulated Seconds : 10000.0
TotalWorkLoads : 10000
Number of calls : 19976
Throughput : 1.9976
Total Wait Time : 1996958.71394
Total Residence Time : 2006728.44111
Mean Residence Time : 100.45697042
Mean Wait Time : 99.9678971738
Mean Service Time : 0.489073246233
Total Utilization : 99.88 %
Mean WaitQ : 18.6269640142


I've still got a long way to go with SimPy but I thought this was a decent first attempt with a simple problem.

Saturday, August 8, 2009

Manual model building TeamQuest Model gotcha

A while back I started to play around with Perl::PDQ and decided in order to help me learn the R language that I'd just stick with PDQ-R for now. At work I don't have a system yet in place where I can perform measurements and comparisons of actual response times versus PDQ-R results.

I did the best next thing for me, which was to compare the results in a case study. As with some of my previous blog entries I was using case studies from Performance By Design.

For my first attempt of PDQ-R coding I was using the baseline from Chapter 5: Case Study I: A Database Service. This chapter is pretty nifty because it covers the topic of cluster analysis for workload analysis with three classes of workload. That led me to learning the basics of cluster analysis with R.

Here was my solution for the baseline found in 5.4 of Performance By Design.




library("pdq");

# Total req/sec into the open queue model
lambda_into_system <- 1.33;

# Split up the req/sec into each class based upon
# previously supplied ratios

coeff <- c(0.33/1.33, 0.53/1.33, 0.47/1.33);

lambda <- coeff * lambda_into_system;

# Class1,Class2,Class3
cpu_demand <- c(0.096, 0.615, 0.193);
disk1_demand <- c(0.088, 0.683, 0.763);
disk2_demand <- c(0.119, 0.795, 0.400);

workStreamName <- 1:3;


Init("Chapter 5.4");

for (n in 1:3) {
workStreamName[n] <- sprintf("class_%d", n);
CreateOpen(workStreamName[n], lambda[n]);
};

CreateNode("CPU", CEN, FCFS);
CreateNode("Disk1", CEN, FCFS);
CreateNode("Disk2", CEN, FCFS);

for (n in 1:3) {
SetDemand("CPU", workStreamName[n], cpu_demand[n] );
SetDemand("Disk1", workStreamName[n], disk1_demand[n]);
SetDemand("Disk2", workStreamName[n], disk2_demand[n]);
};

SetWUnit("Trans");
SetTUnit("Second");

Solve(CANON);
#Report();

response <- 1:3;

for (n in 1:3) {
response[n] <- GetResponse(TRANS, workStreamName[n] );
};

for (n in 1:3) {
print(sprintf(" PDQ-R computation shows response time for class %d is %f seconds", n, response[n]));
};


Which yields the results of:



> source('baseline.R')
[1] " PDQ-R computation shows response time for class 1 is 0.864179 seconds"
[1] " PDQ-R computation shows response time for class 2 is 6.105397 seconds"
[1] " PDQ-R computation shows response time for class 3 is 4.535833 seconds"


These values match the results in the book and the available Excel spreadsheet the authors developed in support of the book.

After I coded my PDQ-R solution I started to look into TeamQuest Model for capacity analysis and decided to do the same case study and see if the numbers agreed between all three sources.

I developed my TeamQuest Model and setup the visits and service time based upon the computed service demand for all three classes of workload:



However, the results did not match at all!



I contacted the TeamQuest folks with what I had done and showed my initial work both with PDQ-R and computations by hand and figured that I was doing something wrong with TeamQuest Model. I wanted to know what was up with the difference in values.

TeamQuest tech support finally got back to me with the solution. It appears that TeamQuest Model expects the service time to be set to 0.001 seconds and the number of visits modified to meet the service demand desired. I must've overlooked that in the TeamQuest Model tutorial but sure enough, it works. After sending some e-mails back and forth with TeamQuest it appears that the 0.001 second service time limit is only for CPUs. I am told that it is OK to use the actual visits and service times with AR IO's, but I haven't tested it out yet.



Results with:



Apparently if the TeamQuest agent is used to automatically create models based upon hardware and system utilization this is automatically set and is a non-issue. It's only when building a model from scratch from the ground up that it shows up. It's an easy fix but an unexpected issue.

Sunday, August 2, 2009

Hey, everybody! Let's analyze the performance of a three tier system with PDQ-R!

This post is a continuation of this post that I did earlier this week.

Continuing the solution for Case Study IV: An E-Business Service from the book, Performance by Design: Computer Capacity Planning By Example.

In the previous post I discuss how to take the transition probability matrix and work backwards toward the original series of linear equations for solving the number of visits to a series of web pages. In this case study there are actually two types of visitors that result in two transition probability matrices that must be utilized. There are 25% of Type A visitors and 75% of Type B visitors.

Each tier of the hypothetical e-biz service is made up by a single CPU and a single disk drive. A matrix is supplied with the total service demand for each component by each page that is hit by visitors.

While it is simple to write some code to analyze web logs to generate the transition probability matrix based upon customer traffic it is very difficult to isolate the total demand at each component with chaotic customer traffic. But that is why we have load testing tools that are available to us. In a pseudo-production environment we are capable of simulating customer traffic to one page at a time and calculating the total demand for components. In this particular case only the CPU and disk drives are being modeled but for a real service we'd want to model the CPU, disk drives, memory system, network system, etc.

After running simulated customer traffic against isolated page hits we could generate a similar demand matrix for components and use it for what-if analysis.

I went ahead and kept my solution in R even though I saw where I thought that a perl solution would be more elegant (did I just use perl and elegant in the same sentence?) I designed my solution with two separate programs, one to spit out numbers and another to generate graphs of page response times and another showing component utilization. Both pieces of code make use of PDQ-R and allow for a variable number of web servers, application servers and database servers.




# Solution parameters

gamma <- 10.96; # Rate into system
numWS <- 1; # Number of Web Servers
numAS <- 1; # Number of Application Servers
numDS <- 1; # Number of Database Servers

# external library
library("pdq");

# Constants #

E <- 1;
H <- 2;
S <- 3;
V <- 4;
G <- 5;
C <- 6;
B <- 7;
X <- 8;

PAGE_NAMES <- c("Enter", "HomePage", "Search", "ViewBids", "Login", "CreateAuction", "PlaceBid", "Exit");
COMPONENTS <- c("CPU", "Disk");
SERVER_TYPES <- c("WS", "AS", "DS");

WS_CPU <- 1;
WS_DISK <- 2;
AS_CPU <- 3;
AS_DISK <- 4;
DS_CPU <- 5;
DS_DISK <- 6;

# Functions used in solution

VisitsByTransitionMatrix <- function(M, B) {
A <- t(M);
A <- -1 * A;
for (i in 1:sqrt(length(A))) {
j <- i;
A[i,j] <- A[i,j] + 1;
};
return(solve(A,B));
};

CalculateLambda <- function(gamma, f_a, f_b, V_a, V_b, index) {
return (
gamma*((f_a*V_a[index]) + (f_b*V_b[index]))
);
};


f_a <- 0.25; # Fraction of TypeA users
f_b <- 1 - f_a; # Fraction of TypeB users

lambda <- 1:X; # Array of lambda for each page

SystemInput <- matrix(c(1,0,0,0,0,0,0,0),nrow=8,ncol=1) # 8.3, Figure 8.2, page 208
TypeA <- matrix(c(0,1,0,0,0,0,0,0,0,0,0.7,0,0.1,0,0,
0.2,0,0,0.4,0.2,0.15,0,0,0.25,0,0,
0,0,0.65,0,0,0.35,0,0,0,0,0,0.3,0.6,
0.1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,
0,0,0,0,0,0,0,0), ncol=8, nrow=8, byrow=TRUE); # 8.4, Table 8.1, page 209
TypeB <- matrix(c(0,1,0,0,0,0,0,0,0,0,0.7,0,0.1,0,0,
0.2,0,0,0.45,0.15,0.1,0,0,0.3,0,0,
0,0,0.4,0,0,0.6,0,0,0,0,0,0.3,0.55,
0.15,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
1,0,0,0,0,0,0,0,0), nrow=8, ncol=8, byrow=TRUE); # 8.4, Table 8.2, page 210
DemandTable <- matrix(c(0,0.008,0.009,0.011,0.06,0.012,0.015,
0,0,0.03,0.01,0.01,0.01,0.01,0.01,0,
0,0,0.03,0.035,0.025,0.045,0.04,0,0,
0,0.008,0.08,0.009,0.011,0.012,0,0,
0,0.01,0.009,0.015,0.07,0.045,0,0,0,
0.035,0.018,0.05,0.08,0.09,0), ncol=8, nrow=6, byrow=TRUE); # 8.4, Table 8.4, page 212 (with modifications)

VisitsA <- VisitsByTransitionMatrix(TypeA, SystemInput);
VisitsB <- VisitsByTransitionMatrix(TypeB, SystemInput);

lambda[E] <- 0; # Not used in calculations
lambda[H] <- CalculateLambda(gamma, f_a, f_b, VisitsA, VisitsB, H);
lambda[S] <- CalculateLambda(gamma, f_a, f_b, VisitsA, VisitsB, S);
lambda[V] <- CalculateLambda(gamma, f_a, f_b, VisitsA, VisitsB, V);
lambda[G] <- CalculateLambda(gamma, f_a, f_b, VisitsA, VisitsB, G);
lambda[C] <- CalculateLambda(gamma, f_a, f_b, VisitsA, VisitsB, C);
lambda[B] <- CalculateLambda(gamma, f_a, f_b, VisitsA, VisitsB, B);
lambda[X] <- 0 # Not used in calculations

Init("e_biz_service");

# Define workstreams

for (n in H:B) {
workStreamName <- sprintf("%s", PAGE_NAMES[n]);
CreateOpen(workStreamName, lambda[n]);
};

# Define Web Server Queues

for (i in 1:numWS) {
for (j in 1:length(COMPONENTS)) {
nodeName <- sprintf("WS_%d_%s", i, COMPONENTS[j]);
CreateNode(nodeName, CEN, FCFS);
};
};

# Define Application Server Queues

for (i in 1:numAS) {
for (j in 1:length(COMPONENTS)) {
nodeName <- sprintf("AS_%d_%s", i, COMPONENTS[j]);
CreateNode(nodeName, CEN, FCFS);
};
};

# Define Database Server Queues

for (i in 1:numDS) {
for (j in 1:length(COMPONENTS)) {
nodeName <- sprintf("DS_%d_%s", i, COMPONENTS[j]);
CreateNode(nodeName, CEN, FCFS);
};
};

# Set Demand for the Web Servers

for (i in 1:numWS) {
demandIndex <- WS_CPU;
for (j in 1:length(COMPONENTS)) {
nodeName <- sprintf("WS_%d_%s", i, COMPONENTS[j]);
for (k in H:B) {
workStreamName <- sprintf("%s", PAGE_NAMES[k]);
SetDemand(nodeName, workStreamName, (DemandTable[demandIndex + (j-1), k])/numWS);
};
};
};

# Set Demand for the App Servers

for (i in 1:numAS) {
demandIndex <- AS_CPU;
for (j in 1:length(COMPONENTS)) {
nodeName <- sprintf("AS_%d_%s", i, COMPONENTS[j]);
for (k in H:B) {
workStreamName <- sprintf("%s", PAGE_NAMES[k]);
SetDemand(nodeName, workStreamName, (DemandTable[demandIndex + (j-1), k])/numAS);
};
};
};

# Set Demand for the Database Servers

for (i in 1:numDS) {
demandIndex <- DS_CPU;
for (j in 1:length(COMPONENTS)) {
nodeName <- sprintf("DS_%d_%s", i, COMPONENTS[j]);
for (k in H:B) {
workStreamName <- sprintf("%s", PAGE_NAMES[k]);
SetDemand(nodeName, workStreamName, (DemandTable[demandIndex + (j-1), k])/numDS);
};
};
};

SetWUnit("Trans");
SetTUnit("Second");

Solve(CANON);

print("Arrival Rates for each page:");

for (i in H:B) {
print(sprintf("%s = %f", PAGE_NAMES[i], lambda[i]));
};

print("[-------------------------------------------------]");

print("Page Response Times");

for (i in H:B) {
workStreamName <- sprintf("%s", PAGE_NAMES[i]);
print(sprintf("%s = %f seconds.", PAGE_NAMES[i], GetResponse(TRANS, workStreamName)));
};

print("[-------------------------------------------------]");

print("Component Utilizations");

for (i in 1:numWS) {
for (j in 1:length(COMPONENTS)) {
totalUtilization <- 0;
nodeName <- sprintf("WS_%s_%s", i, COMPONENTS[j]);
for (k in H:B) {
workStreamName <- sprintf("%s", PAGE_NAMES[k]);
totalUtilization <- totalUtilization + GetUtilization(nodeName, workStreamName, TRANS);
};
print(sprintf("%s = %3.2f %%", nodeName, totalUtilization * 100));
};
};

for (i in 1:numAS) {
for (j in 1:length(COMPONENTS)) {
totalUtilization <- 0;
nodeName <- sprintf("AS_%s_%s", i, COMPONENTS[j]);
for (k in H:B) {
workStreamName <- sprintf("%s", PAGE_NAMES[k]);
totalUtilization <- totalUtilization + GetUtilization(nodeName, workStreamName, TRANS);
};
print(sprintf("%s = %3.2f %%", nodeName, totalUtilization * 100));
};
};

for (i in 1:numDS) {
for (j in 1:length(COMPONENTS)) {
totalUtilization <- 0;
nodeName <- sprintf("DS_%s_%s", i, COMPONENTS[j]);
for (k in H:B) {
workStreamName <- sprintf("%s", PAGE_NAMES[k]);
totalUtilization <- totalUtilization + GetUtilization(nodeName, workStreamName, TRANS);
};
print(sprintf("%s = %3.2f %%", nodeName, totalUtilization * 100));
};
};



Here is a bit of sample output with 10.96 users entering the system per second:




[1] "Arrival Rates for each page:"
[1] "HomePage = 10.960000"
[1] "Search = 13.658485"
[1] "ViewBids = 2.208606"
[1] "Login = 3.664958"
[1] "CreateAuction = 1.099487"
[1] "PlaceBid = 2.074180"
[1] "[-------------------------------------------------]"
[1] "Page Response Times"
[1] "HomePage = 0.083517 seconds."
[1] "Search = 1.612366 seconds."
[1] "ViewBids = 1.044683 seconds."
[1] "Login = 2.323417 seconds."
[1] "CreateAuction = 3.622690 seconds."
[1] "PlaceBid = 3.983755 seconds."
[1] "[-------------------------------------------------]"
[1] "Component Utilizations"
[1] "WS_1_CPU = 49.91 %"
[1] "WS_1_Disk = 55.59 %"
[1] "AS_1_CPU = 71.11 %"
[1] "AS_1_Disk = 35.59 %"
[1] "DS_1_CPU = 38.17 %"
[1] "DS_1_Disk = 97.57 %"


Take a look at that database server disk utilization. Almost 100%! That isn't any good. Let's run the model with two database servers just to ease up on the poor drives!

I modify the line that reads "numDS <- 1; # Number of Database Servers" to read "numDS <- 2; # Number of Database Servers" and let 'er rip:




[1] "Arrival Rates for each page:"
[1] "HomePage = 10.960000"
[1] "Search = 13.658485"
[1] "ViewBids = 2.208606"
[1] "Login = 3.664958"
[1] "CreateAuction = 1.099487"
[1] "PlaceBid = 2.074180"
[1] "[-------------------------------------------------]"
[1] "Page Response Times"
[1] "HomePage = 0.083517 seconds."
[1] "Search = 0.237452 seconds."
[1] "ViewBids = 0.336113 seconds."
[1] "Login = 0.358981 seconds."
[1] "CreateAuction = 0.462042 seconds."
[1] "PlaceBid = 0.440903 seconds."
[1] "[-------------------------------------------------]"
[1] "Component Utilizations"
[1] "WS_1_CPU = 49.91 %"
[1] "WS_1_Disk = 55.59 %"
[1] "AS_1_CPU = 71.11 %"
[1] "AS_1_Disk = 35.59 %"
[1] "DS_1_CPU = 19.09 %"
[1] "DS_1_Disk = 48.78 %"
[1] "DS_2_CPU = 19.09 %"
[1] "DS_2_Disk = 48.78 %"


That's better. There is a significant reduction in page response time to boot:




Page 1 DS 2 DS Diff
HomePage 0.083517 0.083517 0
Search 1.612366 0.237452 -1.374914
ViewBids 1.044683 0.336113 -0.70857
Login 2.323417 0.358981 -1.964436
CreateAuction 3.62269 0.462042 -3.160648
PlaceBid 3.983755 0.440903 -3.542852


Holy smoke! Adding that second database server makes a heck of a difference, doesn't it?

Being able to pull up numbers is great, but it doesn't have the impact that a good graph does. I love graphs! They are so great for conveying information to non-technical folks.

So, to do that I took my original solution and did a code spin, fold and mutilation to allow the generation of graphs. Here is the code that I wrote to generate the graphs.




# Solution parameters

maxGamma <- 11.2 # Maximum rate into system
steppingValue <- 0.1;

numWS <- 1; # Number of Web Servers
numAS <- 1; # Number of Application Servers
numDS <- 1; # Number of Database Servers

# external library
library("pdq");

# Constants

E <- 1;
H <- 2;
S <- 3;
V <- 4;
G <- 5;
C <- 6;
B <- 7;
X <- 8;

PAGE_NAMES <- c("Enter", "HomePage", "Search", "ViewBids", "Login", "CreateAuction", "PlaceBid", "Exit");
COMPONENTS <- c("CPU", "Disk");
SERVER_TYPES <- c("WS", "AS", "DS");

WS_CPU <- 1;
WS_DISK <- 2;
AS_CPU <- 3;
AS_DISK <- 4;
DS_CPU <- 5;
DS_DISK <- 6;

# Functions used in solution

VisitsByTransitionMatrix <- function(M, B) {
A <- t(M);
A <- -1 * A;
for (i in 1:sqrt(length(A))) {
j <- i;
A[i,j] <- A[i,j] + 1;
};
return(solve(A,B));
};

CalculateLambda <- function(gamma, f_a, f_b, V_a, V_b, index) {
return (
gamma*((f_a*V_a[index]) + (f_b*V_b[index]))
);
};


f_a <- 0.25; # Fraction of TypeA users
f_b <- 1 - f_a; # Fraction of TypeB users

lambda <- 1:X; # Array of lambda for each page

SystemInput <- matrix(c(1,0,0,0,0,0,0,0),nrow=8,ncol=1) # 8.3, Figure 8.2, page 208
TypeA <- matrix(c(0,1,0,0,0,0,0,0,0,0,0.7,0,0.1,0,0,
0.2,0,0,0.4,0.2,0.15,0,0,0.25,0,0,
0,0,0.65,0,0,0.35,0,0,0,0,0,0.3,0.6,
0.1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,
0,0,0,0,0,0,0,0), ncol=8, nrow=8, byrow=TRUE); # 8.4, Table 8.1, page 209
TypeB <- matrix(c(0,1,0,0,0,0,0,0,0,0,0.7,0,0.1,0,0,
0.2,0,0,0.45,0.15,0.1,0,0,0.3,0,0,
0,0,0.4,0,0,0.6,0,0,0,0,0,0.3,0.55,
0.15,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
1,0,0,0,0,0,0,0,0), nrow=8, ncol=8, byrow=TRUE); # 8.4, Table 8.2, page 210
DemandTable <- matrix(c(0,0.008,0.009,0.011,0.06,0.012,0.015,
0,0,0.03,0.01,0.01,0.01,0.01,0.01,0,
0,0,0.03,0.035,0.025,0.045,0.04,0,0,
0,0.008,0.08,0.009,0.011,0.012,0,0,
0,0.01,0.009,0.015,0.07,0.045,0,0,0,
0.035,0.018,0.05,0.08,0.09,0), ncol=8, nrow=6, byrow=TRUE); # 8.4, Table 8.4, page 212 (with modifications)

VisitsA <- VisitsByTransitionMatrix(TypeA, SystemInput);
VisitsB <- VisitsByTransitionMatrix(TypeB, SystemInput);

numSteps <- (maxGamma/steppingValue)+1;
#numSteps <- (maxGamma/steppingValue);
numElements <- numSteps*X;
numUElements <- numSteps*(3*length(COMPONENTS));

steppingArray <- 1:numSteps;
responseArray <- 1:numElements;
utilArray <- 1:numUElements;

responseArray <- responseArray * 0;
utilArray <- utilArray * 0;

componentList <- length(COMPONENTS)*length(SERVER_TYPES);

entryNumber <- 1;
for (serverType in SERVER_TYPES) {
for (serverComponent in COMPONENTS) {
componentList[entryNumber] <- sprintf("%s %s", serverType, serverComponent);
entryNumber <- entryNumber + 1;
};
};

dim(responseArray) = c(X, round(numElements/X));
dim(utilArray) = c(3*length(COMPONENTS), round(numUElements/(3*length(COMPONENTS))));

loopCount <- 1;

for (gamma in seq(0, maxGamma, steppingValue)) {

steppingArray[loopCount] <- gamma;

lambda[E] <- 0; # Not used in calculations
lambda[H] <- CalculateLambda(gamma, f_a, f_b, VisitsA, VisitsB, H);
lambda[S] <- CalculateLambda(gamma, f_a, f_b, VisitsA, VisitsB, S);
lambda[V] <- CalculateLambda(gamma, f_a, f_b, VisitsA, VisitsB, V);
lambda[G] <- CalculateLambda(gamma, f_a, f_b, VisitsA, VisitsB, G);
lambda[C] <- CalculateLambda(gamma, f_a, f_b, VisitsA, VisitsB, C);
lambda[B] <- CalculateLambda(gamma, f_a, f_b, VisitsA, VisitsB, B);
lambda[X] <- 0 # Not used in calculations

Init("e_biz_service");

# Define workstreams

for (n in H:B) {
workStreamName <- sprintf("%s", PAGE_NAMES[n]);
CreateOpen(workStreamName, lambda[n]);
};

# Define Web Server Queues

for (i in 1:numWS) {
for (j in 1:length(COMPONENTS)) {
nodeName <- sprintf("WS_%d_%s", i, COMPONENTS[j]);
CreateNode(nodeName, CEN, FCFS);
};
};

# Define Application Server Queues

for (i in 1:numAS) {
for (j in 1:length(COMPONENTS)) {
nodeName <- sprintf("AS_%d_%s", i, COMPONENTS[j]);
CreateNode(nodeName, CEN, FCFS);
};
};

# Define Database Server Queues

for (i in 1:numDS) {
for (j in 1:length(COMPONENTS)) {
nodeName <- sprintf("DS_%d_%s", i, COMPONENTS[j]);
CreateNode(nodeName, CEN, FCFS);
};
};

# Set Demand for the Web Servers

for (i in 1:numWS) {
demandIndex <- WS_CPU;
for (j in 1:length(COMPONENTS)) {
nodeName <- sprintf("WS_%d_%s", i, COMPONENTS[j]);
for (k in H:B) {
workStreamName <- sprintf("%s", PAGE_NAMES[k]);
SetDemand(nodeName, workStreamName, (DemandTable[demandIndex + (j-1), k])/numWS);
};
};
};

# Set Demand for the App Servers

for (i in 1:numAS) {
demandIndex <- AS_CPU;
for (j in 1:length(COMPONENTS)) {
nodeName <- sprintf("AS_%d_%s", i, COMPONENTS[j]);
for (k in H:B) {
workStreamName <- sprintf("%s", PAGE_NAMES[k]);
SetDemand(nodeName, workStreamName, (DemandTable[demandIndex + (j-1), k])/numAS);
};
};
};

# Set Demand for the Database Servers

for (i in 1:numDS) {
demandIndex <- DS_CPU;
for (j in 1:length(COMPONENTS)) {
nodeName <- sprintf("DS_%d_%s", i, COMPONENTS[j]);
for (k in H:B) {
workStreamName <- sprintf("%s", PAGE_NAMES[k]);
SetDemand(nodeName, workStreamName, (DemandTable[demandIndex + (j-1), k])/numDS);
};
};
};

Solve(CANON);

for (i in H:B) {
workStreamName <- sprintf("%s", PAGE_NAMES[i]);
responseArray[i, loopCount] <- GetResponse(TRANS, workStreamName);
};

uArrayEntries <- 0;
for (i in 1:numWS) {
for (j in 1:length(COMPONENTS)) {
totalUtilization <- 0;
nodeName <- sprintf("WS_%s_%s", i, COMPONENTS[j]);
for (k in H:B) {
workStreamName <- sprintf("%s", PAGE_NAMES[k]);
totalUtilization <- totalUtilization + GetUtilization(nodeName, workStreamName, TRANS);
if (i == 1) {
utilArray[uArrayEntries+j, loopCount] <- totalUtilization*100;
};
};
};
};

uArrayEntries <- 2;
for (i in 1:numAS) {
for (j in 1:length(COMPONENTS)) {
totalUtilization <- 0;
nodeName <- sprintf("AS_%s_%s", i, COMPONENTS[j]);
for (k in H:B) {
workStreamName <- sprintf("%s", PAGE_NAMES[k]);
totalUtilization <- totalUtilization + GetUtilization(nodeName, workStreamName, TRANS);
if (i == 1) {
utilArray[uArrayEntries+j, loopCount] <- totalUtilization * 100;
};
};
};
};

uArrayEntries <- 4;
for (i in 1:numDS) {
for (j in 1:length(COMPONENTS)) {
totalUtilization <- 0;
nodeName <- sprintf("DS_%s_%s", i, COMPONENTS[j]);
for (k in H:B) {
workStreamName <- sprintf("%s", PAGE_NAMES[k]);
totalUtilization <- totalUtilization + GetUtilization(nodeName, workStreamName, TRANS);
if (i == 1) {
utilArray[uArrayEntries+j, loopCount] <- totalUtilization * 100;
};
};
};
};

loopCount <- loopCount + 1;

};

# Generate Response Time Graph

loopCount <- 0;
for (i in H:B) {
arrayLength = numElements/X;
if (loopCount == 0) {
jpeg(file=sprintf("response_time_%d_WS_%d_AS_%d_DS.jpg", numWS, numAS, numDS), height=768, width=1024, quality=100);
plot(steppingArray,
responseArray[i, 1:arrayLength],
xlab="Hits Per Second into System",
ylab="Response Time",
col=i,
ylim=c(0,ceiling(max(responseArray))),
type="l",
pch=i,
lwd=4);
title(main=sprintf("Page Response Times for E-Biz with %d WS, %d AS and %d DS", numWS, numAS, numDS), font.main=2);
} else {
lines(steppingArray, responseArray[i, 1:arrayLength], col=i, pch=i, lwd=4);
};

loopCount <- loopCount + 1;
};
legend(1, ceiling(max(responseArray)), PAGE_NAMES[H:B], col=H:B, lty=1, lwd=4, cex=1.2);
dev.off()

# Graph component utilization

loopCount <- 0;
for (i in 1:length(componentList)) {
arrayLength = numSteps;
if (loopCount == 0) {
jpeg(file=sprintf("resource_utilization_%d_WS_%d_AS_%d_DS.jpg", numWS, numAS, numDS), height=768, width=1024, quality=100);
plot(steppingArray,
utilArray[i, 1:arrayLength],
xlab="Hits Per Second into System",
ylab="Resource Utilization, %",
col=i,
ylim=c(0,100),
type="l",
pch=i,
lwd=4);
title(main=sprintf("Resource Utilization for E-Biz with %d WS, %d AS and %d DS", numWS, numAS, numDS), font.main=2);
} else {
lines(steppingArray, utilArray[i, 1:arrayLength], col=i, pch=i, lwd=4);
};
loopCount <- loopCount + 1;
};
legend(0, 100, componentList, col=1:length(componentList), lty=1, lwd=4, cex=1.2);
dev.off()


The code isn't what I would call elegant at this time as a lot of it is still hardcoded for the specific solution but it is a step in the right direction.

And let's take a look at response time between the single database server and dual load bearing database servers:



Take a look at the effect that the overworked database disk drive has on the response time. At 11.2 customers per second into the site and the response time has shot up to the 30 second mark. Definitely not what you want your customers to have to suffer through to be sure.

Here is a graph generated with the dual load bearing database server solution:



Holy guacamole is that a heck of an improvement or what? Any management type can look at these two graphs and immediately realize the impact to the system and the need for extra equipment.

And just for good measure, here is the resource utilization with both single and dual database servers:





Even a MBA grad can look at those graphs and realize that there is a problem that needs to be solved. w00t!

Ain't heuristic analysis fun?

Applying PDQ-R to this case study was a great exercise and I'm glad I undertook it. I can't wait to apply this to a real system and compare the results to see how well it works "in the real world." One thing that this type of heuristic analysis won't show is the interaction between pages with negative performance. In some circumstances I have seen performance programs between page that were thought to not be related. When Page A is put under duress Page B slows down even though it is thought that Page A is not related to Page B. Often it has been found that in a spaghetti line of object dependency that in some way the two pages were related. With the analysis of service demand of pages we can also find pages that have a lot of high service demand as well.

In the future along with page response times and normal metric reporting I think I'll add in service demand as well so that when pages do start to perform poorly hopefully the change in service demand will give an area to look into at the start of the analysis of the root cause of the performance problem to assist the developers.

Dr. Gunther has some more examples of applying Perl::PDQ to a variety of systems in his book, Analyzing Computer System Performance with Perl::PDQ. The source to the solutions in the book can be found in the PDQ download. But developing this solution from the ground up really drove some points home for me that will no doubt be useful in my future endeavors.