User Tools

Site Tools


mongodb
import pandas as pd
from pymongo import MongoClient
from datetime import datetime
 
# Define parameters
db = 'db_name'
collection = 'collection'
host='host.local'
port= 27017
username='user'
password='pass'
 
 
# Open connection
def _connect_mongo(host, port, username, password, db):
    """ A util for making a connection to mongo """
    if username and password:
        mongo_uri = 'mongodb://%s:%s@%s:%s/%s' % (username, password, host, port, db)
        conn = MongoClient(mongo_uri)
    else:
        conn = MongoClient(host, port)
    return conn[db]
 
 
# Read DB
def read_mongo(db, collection, filter={}, projection={}, host='localhost', port=27017, username=None, password=None, no_id=True):
    """ Read from Mongo and Store into DataFrame """
    # Connect to MongoDB
    db = _connect_mongo(host=host, port=port, username=username,  password=password, db=db)
    # Make a query to the specific DB and Collection
    cursor = db[collection].aggregate([filter, projection])
    # Expand the cursor and construct the DataFrame
    df =  pd.DataFrame(list(cursor))
    # Delete the _id
    # if no_id:
    #    del df['_id']
    return df
 
# Query - Filter + Project
 
d = datetime(2016,9,7)
 
query_filter = { "status" : {"$ne" : "deprecated"}, 
                 "httpRequestData.date" : {"$gte": d },
                                             }
 
query_project = { "date":"$httpRequestData.date",
                  "geoLoc":"$geoLocationData.iso",
                  "ipAddress": "$httpRequestData.ipAddress",
                  "city": "$geoLocationData.city",
                  "lat": "$geoLocationData.latitude",
                  "long":"$geoLocationData.longitude",
                  "isp":"$geoLocationData.isp"}
 
df = read_mongo(db, collection, {"$match" : query_filter}, {"$project" : query_project},  host, port, username, password)
mongodb.txt · Last modified: 2016/10/07 16:23 by vincenzo