User Tools

Site Tools


1.install_sale_analysis.r
#SUMMARY#
#1# INSTALL PLOT AND HOLT WINTERS FORECAST 7 DAYS (SALES, INSTALLS)
#2# INCREMENTAL SALES, GLOBAL and NETWORK LEVEL
#3# AVERAGE TIME TO ACTIVATION ANALYSIS
 
library(bigrquery)
library(dplyr)
library(ggplot2)
library(xts)
library(forecast)
library(reshape)
library(scales)
 
 
project <- "panda-premium-analytics"
 
# query adjust.01_mobile_step01_nodup
sql <- "
SELECT
log_date,
id,
time,
event,
right('0' + string(datediff(timestamp(time),timestamp(prev_time))),2) + ' ' + time(sec_to_timestamp((timestamp_to_sec(timestamp(time)) - timestamp_to_sec(timestamp(prev_time))))) as timedifference_as_dd_hh_mm_ss,
(timestamp_to_sec(timestamp(time)) - timestamp_to_sec(timestamp(prev_time)))/60 as timefifference_in_min,
network,
fb_campaign,
campaign,
event_country,
RANK() OVER (PARTITION BY id ORDER BY time ASC ) AS ORDER
FROM (
SELECT
log_date,
id,
time,
event,
LAG(time,1) OVER(PARTITION BY id ORDER BY time) AS prev_time,
network,
fb_campaign,
campaign,
event_country
FROM (
SELECT
log_date,
id,
timestamp AS time,
event,
network,
fb_campaign,
campaign,
event_country
FROM
adjust.1_step1_allcountries_ios  
 
where event_country = 'IN' or event_country = 'SG' or event_country = 'HK' or event_country = 'RU' and id is not null
) )
ORDER BY
id,
time"
 
 
data <- query_exec(sql, project = project, max_pages = Inf)
 
 
data_plot <- data
data_plot$log_date <- as.Date(data$log_date, format="%Y%m%d")
 
data_label <- data
data_label$log_date <- as.Date(data$log_date, format="%Y%m%d")
 
data_distribution <- data
data_distribution$log_date <- as.Date(data_distribution$log_date, format="%Y%m%d")
 
 
 
# Installs
data_install <- data_plot %>%
  group_by(id, log_date) %>%
  filter(first(event)=='install' & event_country == 'IN') %>%
  group_by(log_date) %>%
  summarise(count=sum(event=='install'))
 
ggplot(data_install, aes(x=data_install$log_date, y=data_install$count)) + geom_line()  +
  xlab("time") + ylab("Installs") +
  ggtitle("Installs")
 
# Time series predictions 
data_install <- as.data.frame(data_install)
ts <- ts(data_install$count, frequency = 7)
m <- HoltWinters(ts,seasonal = "mult")
plot(m)
plot(ts)
plot(fitted(m))
 
 
m_predict <- predict(m, n = 10)
Start = c(4, 5) 
End = c(5, 7) 
Frequency = 7 
 
plot(m, m_predict)
 
write.csv(ts, '/Users/vincenzo/Desktop/1.install_sale_analysis/ios/IN/ts_in_install_ios.csv')
write.csv(m_predict, '/Users/vincenzo/Desktop/1.install_sale_analysis/ios/IN/ts_in_predict_install_ios.csv')
 
# Purchases
data_purchase <- data  %>%
  group_by(id, log_date) %>%
  filter(first(event)=='install')
 
data_purchase <- data %>%
  group_by(id, log_date) %>%
  filter(event=='sale' & event_country == 'IN') %>%
  group_by(log_date) %>%
  summarise(count=sum(event=='sale'))
 
data_purchase <- as.data.frame(data_purchase)
ts <- ts(data_purchase$count, frequency = 7)
m <- HoltWinters(ts,seasonal = "mult")
plot(m)
plot(ts)
plot(fitted(m))
 
m_predict <- predict(m, n = 10)
Start = c(4, 5) 
End = c(5, 7) 
Frequency = 7 
 
plot(m, m_predict)
 
write.csv(ts, '/Users/vincenzo/Desktop/1.install_sale_analysis/ios/IN/ts_in_order_ios.csv')
write.csv(m_predict, '/Users/vincenzo/Desktop/1.install_sale_analysis/ios/IN/ts_in_predict_order_ios.csv')
 
# Global incremental Purchase (globally)
 
data_label <- data
#data_label$log_date <- as.Date(data$log_date, format="%Y%m%d")
 
data_label <- as.data.frame(data_label)
 
data_label <- data_label %>%
  group_by(id) %>%
  filter(first(event)=='install' & event_country == 'IN') 
 
data_label <- data_label %>% 
  group_by(id) %>% 
  mutate(label = if(n() == 1) 0 else as.numeric(sum(event == "sale")))
 
data_label <- select(data_label, id, event, network, label)
 
data_count_global <- data_label %>% 
  group_by(label) %>%
  summarise(unique_elements =n_distinct(id))
  names(data_count_global)[2] <- "count"
 
data_count_global <- data_count_global[data_count_global$label !=  "0", ]
 
# plot histograms (factorizing first)
 
data_count_global$count <- as.numeric(data_count_global$count)
data_count_global$label <- factor(data_count_global$label,
                                  levels = data_count_global$label[order(data_count_global$label,
                                                                         decreasing = TRUE)])
 
ggplot(data_count_global, aes(x=label, y=count)) + geom_bar(stat = "identity")  +
  xlab("Count of Orders") + ggtitle("Incremental Order Distribution - Global") +
  ylab("Number of Orders")
 
write.csv(data_count_global, '/Users/vincenzo/Desktop/1.install_sale_analysis/ios/IN/2.orderdistribution_in_ios_global.csv')
 
# label the users
 
data_label <- data
data_label$log_date <- as.Date(data$log_date, format="%Y%m%d")
 
data_label <- as.data.frame(data_label)
 
data_label <- data_label %>%
  group_by(id) %>%
  filter(first(event)=='install' & event_country == 'IN') 
 
data_label <- data_label %>% 
  group_by(id) %>% 
  mutate(label = if(n() == 1) 0 else as.numeric(sum(event == "sale")))
 
data_label <- select(data_label, id, event, network, label)
 
data_count_network <- data_label %>% 
  group_by(label, network) %>%
    summarise(unique_elements =n_distinct(id)) 
      names(data_count_network)[2] <- "count"
 
data_count_network_bigplayer <- data_count_network[data_count_network$count == 'Jampp' | data_count_network$count == 'Facebook Install' | data_count_network$count == 'Google Search'| data_count_network$count == 'Kiss My Ads'| data_count_network$count == 'Lemmonet'| data_count_network$count == 'Email'| data_count_network$count == 'Kiss my Ads' ,]
 
 
ggplot(data=data_count_network_bigplayer,
      aes(x=label, y=unique_elements, colour=count)) +
        geom_line() + scale_y_log10()  +
        xlab("Count of Orders") + ggtitle("Incremental Order Distribution - per Network - Top Players") +
        ylab("Number of Orders")
 
 
write.csv(data_count_network, '/Users/vincenzo/Desktop/1.install_sale_analysis/ios/IN/2.networks_events_in_ios.csv')
 
## Time Analysis 
 
# round the minutes
data$timedif_in_min <- round(data$timefifference_in_min, digits=3)  
 
time_diff <- data %>%
    group_by(id) %>%
      filter(first(event)=='install' & event_country == 'IN')
 
test_heatmap_id_ios.csv <- write.csv(time_diff, file = '/Users/vincenzo/Documents/OM/Mobile/Mobile\ Reporting/Working\ Files/test_heatmap_id_ios.csv')
 
# move to excel but eventually do it in here
 
# import the results
test <- read.csv("/Users/vincenzo/Desktop/1.install_sale_analysis/ios/IN/heatmap_in_ios.csv")
 
# rescaling and obtaining the time in hours.
test_division <- test
test_division$Network <- NULL
test_division <- test_division / 60
 
#rescale the data
test.m <- melt(test)
test.m <- transform(test.m,value2=rescale(value,to=c(0,100)))
#test.m <- subset(test.m, Network!="Lemmonet" & Network!="OMG" & Network!="Website" & Network!="Headway Digital")
 
# test.m <- ddply(test.m, .(variable), transform, rescale = scale(value))
 
write.csv(test.m, file = "rescaling.csv")
 
# Average time to activation per channel
p <- ggplot(test.m, aes(variable, Network)) + 
  geom_tile(aes(fill = value2), colour = "white") + 
  scale_fill_gradient(low = "lightblue", high = "red",name="Average Time Activation")  +
  xlab("Incremental Purchase") + ggtitle("Average Time to Activation and Incremental Orders")
 
plot(p)
 
test_first <- read.csv("/Users/vincenzo/Desktop/1.install_sale_analysis/ios/IN/heatmap_in_ios_first.csv")
 
# rescaling and obtaining the time in hours.
test_division <- test
test_division$Network <- NULL
test_division <- test_division / 60
 
#rescale the data
test.m <- melt(test_first)
test.m <- transform(test.m,value2=rescale(value,to=c(0,100)))
#test.m <- subset(test.m, Network!="Lemmonet" & Network!="OMG" & Network!="Website" & Network!="Headway Digital")
 
# test.m <- ddply(test.m, .(variable), transform, rescale = scale(value))
 
write.csv(test.m, file = "rescaling.csv")
 
# Average time to activation per channel
p <- ggplot(test.m, aes(variable, Network)) + 
  geom_tile(aes(fill = value2), colour = "white") + 
  scale_fill_gradient(low = "lightblue", high = "red",name="Average Time Activation")  +
  xlab("Incremental Purchase") + ggtitle("Average Time to Activation to first and second order")
 
plot(p)
1.install_sale_analysis.r.txt · Last modified: 2015/12/16 12:58 by vincenzo