Source code for publicdata.census.dimensions
# Copyright (c) 2017 Civic Knowledge. This file is licensed under the terms of the
# MIT License, included in this distribution as LICENSE
"""
Functions for discovering and using dimensions
"""
import re
# Age Ranges.
age_patterns = re.compile(r'(?P<range>(\d+) to (\d+) years)|'
r'(?P<over>(\d+) years and over)|'
r'(?P<and>(\d+) and (\d+) years)|'
r'(?P<under>Under (\d+) years)|'
r'(?P<single>(\d+) years)'
)
age_formats = { 'and': '{:02d},{:02d}',
'range': '{:02d}-{:02d}',
'over': '{:02d}+',
'single': '{:02d}',
'under': '00-{:02d}'}
[docs]def age_range(c):
"""return the age range for a column"""
# Questions about grandparents
if 'grand' in c.description.lower():
return 'na'
m = age_patterns.search(c.description.strip())
if m:
format = None
d = m.groupdict()
for k in age_formats.keys():
if k in d.keys() and d[k] is not None:
format = k
break
ages = []
for v in m.groups():
try:
ages.append(int(v))
except:
pass
if format == 'and': # convert to a range
return age_formats['range'].format(ages[0], ages[1])
elif format == 'single': # convert to a range
return age_formats['range'].format(ages[0], ages[0])
else:
return age_formats[format].format(*ages)
else:
return 'na'
race_eths = {
'American Indian and Alaska Native Alone': 'aian',
'Asian Alone': 'asian',
'Black or African American Alone': 'black',
'Hispanic or Latino': 'hisp',
'Native Hawaiian and Other Pacific Islander Alone': 'nhopi',
'White alone': 'white',
'White Alone, Not Hispanic or Latino': 'whitenh',
'Multiple': 'multi',
'Some Other Race Alone': 'other',
'Two or More Races': 'two',
'Total Population': 'total'
}
[docs]def race(desc):
for k, v in race_eths.items():
if k.lower() in desc.lower() and 'not' not in desc.lower():
return v
[docs]def classify(c):
"""Classify columns according to sex and age
NOTE: This doesn't work right for race when the race is in the column name, such as
b25006. Race is only meaningful with it is in the table title.
"""
current_sex = 'na'
current_age = 'na'
race_eth = race(c.table.description) or 'na'
for c1 in c.table.columns:
if 'Female' in c1.description:
sex = 'female'
elif 'Male' in c1.description:
sex = 'male'
else:
sex = None
if sex:
if sex != current_sex:
current_sex = sex
has_sex_class = True
current_age = 'na'
age = age_range(c1)
if age != 'na':
current_age = age
if '+' in age:
age_min = int(current_age[:-1])
age_max = 200
elif '-' in age:
age_min, age_max = current_age.split('-')
else:
age_min, age_max = 0, 200
if c1.name == c.name:
return {
'race_eth': race_eth,
'age': current_age,
'age_min': int(age_min),
'age_max': int(age_max),
'sex': current_sex
}