User Tools

Site Tools


attribution_model_adwords_optimization
# coding: utf-8
 
# In[1]:
 
# Identify non performing keywords that do not contribute in the conversion path, combining Big Query data #
# Define the query and all the parameters needed #
project_id = '74417662869'
dataset_id = '86553600'
startDate = "2015-10-01"
endDate = "2015-10-15"
select_query = 'hits.transaction.transactionId as TransactionID,date,visitStarttime' +                    ',fullVisitorId,trafficSource.campaign as Campaign,' +                   'trafficSource.source as Source,trafficSource.medium as Medium,' +                   'trafficSource.keyword as Keyword,trafficSource.adContent as AdContent,' +                 'trafficSource.adwordsClickInfo.adGroupId as AdGroupID,trafficSource.adwordsClickInfo.customerId as AccountID,trafficSource.adwordsClickInfo.criteriaId as criteriaId'
from_query = 'TABLE_DATE_RANGE([' + dataset_id + '.ga_sessions_],TIMESTAMP("' + startDate + '"),TIMESTAMP("' + endDate + '"))' 
where_query = 'trafficSource.source = "google" and trafficSource.medium = "cpc" and hits.page.hostname = "www.foodpanda.in"'
groupby_query = 'date,visitStarttime,visitId,TransactionID,fullVisitorId,CAMPAIGN,Source,Medium,Keyword,adContent,AdGroupID,AccountID,criteriaId'
 
query = ('SELECT ' + select_query +
             ' FROM ' + from_query + 
             ' WHERE ' + where_query +
             ' GROUP BY ' + groupby_query + ';')
 
 
select_query2 = 'hits.transaction.transactionId as TransactionID,date,visitStarttime,fullVisitorId'
where_query2 = 'hits.transaction.transactionId is not Null and STRING(trafficSource.adwordsClickInfo.customerId) = "8706521887"'
groupby_query2 = '1,2,3,4'
 
query2 = ('SELECT ' + select_query2 +
             ' FROM ' + from_query + 
             ' WHERE ' + where_query2 +
             ' GROUP BY ' + groupby_query2 + ';') 
 
 
# In[ ]:
 
# Load data from BQ
import pandas as pd
project_id = "panda-premium-analytics"
allSessions = pd.read_gbq(query, project_id=project_id, reauth=False)
print "allSessions done"
conversions = pd.read_gbq(query2, project_id="panda-premium-analytics", reauth=False)
print "conversions done"
 
 
# In[3]:
 
from rocketanalytics import GoogleAnalyticsDataClient
client = GoogleAnalyticsDataClient() 
 
# Execute query and get data
result = client.query(**{'ids': 'ga:87004169',
                         'metrics': 'ga:adclicks,ga:adcost, ga:transactions',
                         'dimensions': 'ga:adwordsCustomerID,ga:adwordsCriteriaID,ga:keyword,ga:adwordsAdgroupID',
                         'start_date': '2015-10-01',
                         'end_date': '2015-10-15'})
 
costData = result.to_pandas()
print "GA done"
 
 
# In[6]:
 
# Merge on fullVisitor Id and visitStarttime
merge_1 = allSessions.merge(conversions, on=["fullVisitorId","visitStarttime"], how="left")
len(merge_1.axes[0])
 
 
# In[8]:
 
# Merge on fullVisitor Id again
merge_2 = merge_1.merge(conversions, on=["fullVisitorId"], how="left")
 
 
# In[11]:
 
# get the not null transactions
merge_3_notnull = merge_2[merge_2.TransactionID.notnull()]
len(merge_3_notnull.axes[0])
 
 
# In[12]:
 
#filter if date_x  < date_y 
filtered = merge_3_notnull[merge_3_notnull.visitStarttime_x  <= merge_3_notnull.visitStarttime_y ]
len(filtered.axes[0])
 
 
# In[13]:
 
# Filter by Keyword and Adgroup ID and then drop duplicates
Keyword_notdup = filtered[['Keyword', 'AdGroupID']]
Keyword_notdup = Keyword_notdup.drop_duplicates()
len(Keyword_notdup.axes[0])
 
Keyword_notdup['type'] = 'in'
 
 
# In[14]:
 
# Rename GA columns
costData.rename(columns={'ga:keyword': 'Keyword'}, inplace=True)
costData.rename(columns={'ga:adwordsAdgroupID': 'AdGroupID'}, inplace=True)
 
 
# In[15]:
 
costData.dtypes
 
 
# In[16]:
 
# From object to float
costData['AdGroupID'] = costData['AdGroupID'].convert_objects(convert_numeric=True) 
 
 
# In[18]:
 
costData2 = costData
costKeyword = pd.merge(costData2, Keyword_notdup, on =["Keyword", "AdGroupID"], how ="left")
costKeyword
 
 
# In[21]:
 
Final = costKeyword[costKeyword.type != "in"]
# Final.to_csv('final_keywords_to_pause_in.csv')
Final
 
 
# In[379]:
 
# Check the number of transaction, match the keywords Idp
# per Final e costData
 
Final.to_csv('check_keywords.csv')
 
 
# In[382]:
 
costData.to_csv('check_GA.csv')
attribution_model_adwords_optimization.txt · Last modified: 2015/12/16 12:52 by vincenzo