I spent some more time on it, and I figured something out.
I looked at the data, and instead of getting 1 2 3 4 NULL NULL 5 6 7, I was getting 1 2 3 4 NULL NULL 7 1 2, starting at the beginning again. So, I figured out how to do loops and made a series of vectors, containing the dates in one, and load averages per VM.
I looked at the data, and instead of getting 1 2 3 4 NULL NULL 5 6 7, I was getting 1 2 3 4 NULL NULL 7 1 2, starting at the beginning again. So, I figured out how to do loops and made a series of vectors, containing the dates in one, and load averages per VM.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env Rscript | |
require( Cairo , quietly=TRUE ) | |
# read in file | |
logfile = read.table('.uptime.log') | |
# set column names | |
colnames(logfile)=c('date','time','log','host','colon','load','x','y') | |
# create new table with just the columns we need | |
needed = logfile[c("host","date","time","load")] | |
# break into four tables: genomics,genomics-apps, genomics-test, genomics-db | |
needed$datetime <- paste( as.character(needed$date) , as.character(needed$time) ) | |
# remove unnecessary seconds from time | |
needed$datetime <- sub('......$','',needed$datetime) | |
needed$datetime <- sub('/','',needed$datetime) | |
needed$datetime <- sub('/','',needed$datetime) | |
needed$datetime <- sub(' ','',needed$datetime) | |
needed$datetime <- sub('$','00',needed$datetime) | |
# instead of 'YYYY/MM/DD hh:mm:ss', we have 'YYYYMMDDhh00' | |
# remove unnecessary columns | |
final <- needed[c("host","datetime","load")] | |
# all dates in data table | |
dates <- unique(final$datetime) | |
# all tables handled the same: | |
# subset gives table of those with correct hostname | |
# then cut host column | |
genomics <- subset(final, host=='genomics') | |
genomics <- genomics[c("datetime","load")] | |
genomicsapps <- subset(final, host=='genomics-apps') | |
genomicsapps <- genomicsapps[c("datetime","load")] | |
genomicsdb <- subset(final, host=='genomics-db') | |
genomicsdb <- genomicsdb[c("datetime","load")] | |
genomicstest <- subset(final, host=='genomics-test') | |
genomicstest <- genomicstest[c("datetime","load")] | |
# I create an empty vector | |
# for each date in dates, which contains all datetimes | |
# I get v, which is either the value or NA | |
# if it is NA, which I cannot deal with, | |
# I make it -1, which I can | |
# and push it onto the vector | |
genomics.v <- vector() | |
for( k in dates ) { | |
v <- genomics$load[ match( k,genomics$datetime ) ] | |
if ( is.na(v) ) { v <- -1 } | |
genomics.v <- c(genomics.v,v) | |
} | |
genomicsapps.v <- vector() | |
for( k in dates ) { | |
v <- genomicsapps$load[ match( k,genomicsapps$datetime ) ] | |
if ( is.na(v) ) { v <- -1 } | |
genomicsapps.v <- c(genomicsapps.v,v) | |
} | |
genomicsdb.v <- vector() | |
for( k in dates ) { | |
v <- genomicsdb$load[ match( k,genomicsdb$datetime ) ] | |
if ( is.na(v) ) { v <- -1 } | |
genomicsdb.v <- c(genomicsdb.v,v) | |
} | |
genomicstest.v <- vector() | |
for( k in dates ) { | |
v <- genomicstest$load[ match( k,genomicstest$datetime ) ] | |
if ( is.na(v) ) { v <- -1 } | |
genomicstest.v <- c(genomicstest.v,v) | |
} | |
# this pulls things back to the last four days, which is all I want ATM | |
fourdays = 96 | |
label_range=seq(0,96,8) | |
dates <- tail( dates , n=fourdays ) | |
genomics.v <- tail( genomics.v , n=fourdays ) | |
genomicsapps.v <- tail( genomicsapps.v , n=fourdays ) | |
genomicsdb.v <- tail( genomicsdb.v , n=fourdays ) | |
genomicstest.v <- tail( genomicstest.v , n=fourdays ) | |
# names and colors for the plot | |
names = c("genomics", "genomics-apps", "genomics-db", "genomics-test" ) | |
p_cols <- c( "blue" , "red" , "orange" , "green" ) | |
yrange <-range(-1, 5, genomics.v , genomicstest.v , genomicsdb.v , genomicsapps.v ) | |
# from here on, there's plotting administrivia | |
CairoPNG( filename="load.png" | |
, width = 600 | |
, height = 400 | |
, pointsize = 12 | |
) | |
plot ( genomics.v | |
, col=p_cols[1] | |
, type="l" | |
, xlab="time" | |
, ylab="load avg" | |
, ylim=yrange | |
, las=1 | |
, xaxt="n" | |
, sub="hours ago" | |
) | |
lines( genomicsapps.v | |
, col=p_cols[2] | |
, type="l" | |
) | |
lines( genomicsdb.v | |
, col=p_cols[3] | |
, type="l" | |
) | |
lines( genomicstest.v | |
, col=p_cols[4] | |
, type="l" | |
) | |
# where to put the legend box, based on x/y of plot | |
legend_x <- 4 | |
legend_y <- 4.5 | |
legend( | |
legend_x, legend_y, names, col=p_cols , | |
cex=0.8 , lty=c(1,1,1) | |
) | |
axis( 1 , at=label_range , labels=rev(label_range) ) | |
box() | |
title( main="Genomics VM Load Average" ) | |
Lev suggested that this is not how a real R person would do it. True. But this works, and I know how to plot vectors but not data tables. So, a few more changes (having the date in the title is good) and I can finish it up and put it into my workflow. Yay me.
No comments:
Post a Comment