Source code for publicdata.census.groupby

import numpy as np
from pandas.core.groupby import SeriesGroupBy, DataFrameGroupBy

from publicdata.census.dataframe import CensusDataFrame
from publicdata.census.series import CensusSeries


[docs]def groupby(obj, by, **kwds): if isinstance(obj, CensusSeries): klass = CensusSeriesGroupBy elif isinstance(obj, CensusDataFrame): klass = CensusDataFrameGroupBy else: # pragma: no cover raise TypeError('invalid type: %s' % type(obj)) return klass(obj, by, **kwds)
[docs]class CensusSeriesGroupBy(SeriesGroupBy):
[docs] def sum_rs(self, x): """root of the sum of the squares""" # See the ACS General Handbook, Appendix A, "Calculating Margins of Error for Derived Estimates". # (https://www.census.gov/content/dam/Census/library/publications/2008/acs/ACSGeneralHandbook.pdf) # for a guide to these calculations. return np.sqrt(sum(self.m90 ** 2))
[docs]class CensusDataFrameGroupBy(DataFrameGroupBy):
[docs] def sum_m(self): """root of the sum of the squares""" # See the ACS General Handbook, Appendix A, "Calculating Margins of Error for Derived Estimates". # (https://www.census.gov/content/dam/Census/library/publications/2008/acs/ACSGeneralHandbook.pdf) # for a guide to these calculations. return np.sqrt(sum(self.m90 ** 2))
[docs] def aggregate(self, arg, *args, **kwargs): return super().aggregate(arg, *args, **kwargs)
agg = aggregate def _m_agg(self, f1, f2): cf = {} if not isinstance(self.keys, (list, tuple)): keys = [self.keys] else: keys = self.keys for c in list(self.obj.columns): if c in keys: continue if self.obj[c].dtype == object: continue # Skip strings? if c.endswith('_m90'): cf[c] = [f2] else: cf[c] = [f1] return self.agg(cf)
[docs] def sum(self): from publicdata.censusreporter.func import sum_rs return self._m_agg('sum',sum_rs)
[docs] def mean(self): from publicdata.censusreporter.func import mean_m return self._m_agg('mean', mean_m)