User Tools

Site Tools


anomalies_detection_log_data
#########################  ANOMALY DETECTION
 
library(AnomalyDetection)
library(xts)
 
# data preparation
data <- read.table('/Users/cenzo/Desktop/women-access1.log', sep='', header = )
newV4 <- strptime(data[,4] , format('[%d/%b/%Y:%H:%M:%S'))
hours = format(newV4, '%Y-%m-%d %H:%M')
hoursFreq <- data.frame(hours, grp = 1)
hoursFreqCount <- sqldf('select hours, count(*) 
                        from hoursFreq
                        group by hours')
hoursFreqCount$hours <- as.POSIXct(gsub("\\:[0-9]+\\..*$", "", hoursFreqCount$hours), format = "%Y-%m-%d %H:%M")
 
#time series decomposition all request
ts <- xts(hoursFreqCount$`count(*)`,hoursFreqCount$hours)
plot(ts)
 
tsforecast <- HoltWinters(ts, gamma=FALSE)
plot(tsforecast)
 
#time series decomposition 404
 
subts = subset(data, data[,7] == '404' )
newV4 <- strptime(subts[,4] , format('[%d/%b/%Y:%H:%M:%S'))
hours = format(newV4, '%Y-%m-%d %H:%M')
hoursFreq <- data.frame(hours, grp = 1)
hoursFreqCount <- sqldf('select hours, count(*) 
                        from hoursFreq
                        group by hours')
hoursFreqCount$hours <- as.POSIXct(gsub("\\:[0-9]+\\..*$", "", hoursFreqCount$hours), format = "%Y-%m-%d %H:%M")
 
ts404 <- xts(hoursFreqCount$`count(*)`,hoursFreqCount$hours)
plot(ts404)
 
#time series decomposition 5*
 
subts = subset(data, grepl('^5', data[,7]))
newV4 <- strptime(subts[,4] , format('[%d/%b/%Y:%H:%M:%S'))
hours = format(newV4, '%Y-%m-%d %H:%M')
hoursFreq <- data.frame(hours, grp = 1)
hoursFreqCount <- sqldf('select hours, count(*) 
                        from hoursFreq
                        group by hours')
hoursFreqCount$hours <- as.POSIXct(gsub("\\:[0-9]+\\..*$", "", hoursFreqCount$hours), format = "%Y-%m-%d %H:%M")
 
ts505 <- xts(hoursFreqCount$`count(*)`,hoursFreqCount$hours)
plot(ts505)
 
#anomaly detection global and local maxima
hoursFreqCount$`count(*)` <- as.numeric(hoursFreqCount$`count(*)`)
anomalies = AnomalyDetectionTs(hoursFreqCount, max_anoms=0.001,plot=TRUE)
anomalies$plot
 
 
#anomaly detection 1 day ago using all the rest as training data
hoursFreqCount$`count(*)` <- as.numeric(hoursFreqCount$`count(*)`)
anomalies = AnomalyDetectionTs(hoursFreqCount, max_anoms=0.001, only_last='day',plot=TRUE)
anomalies$plot
anomalies_detection_log_data.txt · Last modified: 2015/09/29 04:13 by vincenzo