I have written function to print output like PROC SUMMARY in python, but needs some help in improvement on output:
Here is the code :
def wmean_grouped2 (group, var_name_in, var_name_weight):
d = group[var_name_in]
w = group[var_name_weight]
return (d * w).sum() / w.sum()
FUNCS = { "mean" : np.mean ,
"sum" : np.sum ,
"count" : np.count_nonzero }
def my_summary2 (
data ,
var_names_in ,
var_names_out ,
var_functions ,
var_name_weight = None ,
var_names_group = None ):
result = pd.DataFrame()
if var_names_group is None:
grouped = data.groupby (lambda x: True)
else:
grouped = data.groupby (var_names_group)
for var_name_in, var_name_out, var_function in \
zip(var_names_in,var_names_out,var_functions):
if var_function == "wsum":
func = lambda x : wmean_grouped2 (x, var_name_in, var_name_weight)
result[var_name_out] = pd.Series(grouped.apply(func))
else:
func = FUNCS[var_function]
result[var_name_out] = grouped[var_name_in].apply(func)
result.loc['Total'] = result.select_dtypes(pd.np.number).sum()
return result
And below is the code to call above function:
int(my_summary2 (
data=df,
var_names_in=["sal","sal"] ,
var_names_out=[
"COUNT","SAL"
] ,
var_functions=["count","sum"] ,
var_name_weight="val_1" ,
var_names_group=['name','age']
))
Here I am grouping on two columns "name" and "age" and below is the output I am getting :
COUNT SAL
(Arik, 32) 1 100
(David, 44) 2 260
(John, 33) 1 200
(John, 34) 1 300
(Peter, 33) 1 100
Total 6 960
In the output, both grouped columns are printing in brackets without column header of name and age. I want to get below output:
name age COUNT SAL
----------------------
Arik 32 1 100
David 44 2 260
John 33 1 200
John 34 1 300
Peter 33 1 100
----------------------
Total 6 960
What I have tried:
I have mentioned in above section what I have tried and what I required.