# in sql, it's easy to extract the age in years from the year of birth EXTRACT (YEAR from age(F.birthdate)) as Age_in_Years # Create a binning function #Binning: def binning(col, cut_points, labels=None): #Define min and max values: minval = col.min() maxval = col.max() #create list by adding min and max to cut_points break_points = [minval] + cut_points + [maxval] #if no labels provided, use default labels 0 ... (n-1) if not labels: labels = range(len(cut_points)+1) #Binning using cut function of pandas colBin = pd.cut(col,bins=break_points,labels=labels,include_lowest=True) return colBin #Binning age: cut_points = [25,35,45,55,65] labels = ["18 - 25","26 - 35","36 - 45","46 - 55","56 - 65","66 +"] #apply it to a dataset creating a new column containing the bins df["bins"] = df(applicata["age_in_years"], cut_points, labels)
Then, group-by counting per age bins, plot the histogram using faceting for unique values contained in another column