import unittest
import requests
from publicdata.census.files.url_templates import *
from publicdata.census.files.metafiles import Table, Column, TableShell, TableLookup, TableMeta
from publicdata.census.files.generators import SequenceFile, GeoFile, Table as TableGenerator
from rowgenerators import parse_app_url
from itertools import islice
import csv
import logging
[docs]class TestGenerators(unittest.TestCase):
[docs] def setUp(self):
import warnings
warnings.simplefilter('ignore')
[docs] def test_tableshell(self):
ts = TableShell(2016, 1)
ts._process()
self.assertEqual(1319, len(ts.tables))
self.assertEqual(['b15002h', 'c15002h', 'b15002i', 'c15002i', 'b15003',
'c15003', 'b15010', 'c15010', 'c15010a', 'c15010b'],
list(ts.tables.keys())[500:510])
self.assertEqual(ts.tables['c16004'].title,
'AGE BY LANGUAGE SPOKEN AT HOME BY ABILITY TO SPEAK ENGLISH FOR THE POPULATION 5 YEARS AND OVER')
with open('/tmp/tables_shell.csv', 'w') as f:
w = csv.writer(f)
w.writerow(Table.csv_header)
w.writerows(t.row for t in ts.tables.values())
[docs] def test_tablelookup(self):
ts = TableLookup(2016, 1)
ts._process()
self.assertEqual(1310, len(ts.tables))
self.assertEqual(['c15002d', 'c15002e', 'c15002f', 'c15002g', 'c15002h',
'c15002i', 'c15003', 'c15010', 'c15010a', 'b15011'],
list(ts.tables.keys())[500:510])
self.assertEqual(ts.tables['c16004'].title,
'Age By Language Spoken At Home By Ability To Speak English For The Population 5 Years And Over')
with open('/tmp/tables_lookup.csv', 'w') as f:
w = csv.writer(f)
w.writerow(Table.csv_header)
w.writerows(t.row for t in ts.tables.values())
[docs] def test_geo(self):
tm = GeoFile(2016, 5, 'RI', 140, 1)
for row in islice(tm,10):
print(row)
[docs] def test_table(self):
import geoid.core
tm = TableGenerator(2016, 5, 'CA', geoid.core.names['tract'], 'B01001')
tracts = list(tm)
self.assertEqual(8058, len(tracts))
lens = [len(row) for row in tracts]
self.assertTrue(all(x == lens[0] for x in lens))
rows = list(islice(tm,5))
self.assertEqual(('geoid', 'b01001_001_m90', 'b01001_004', 'b01001_006_m90', 'b01001_009',
'b01001_011_m90', 'b01001_014', 'b01001_016_m90', 'b01001_019', 'b01001_021_m90',
'b01001_024', 'b01001_026_m90', 'b01001_029', 'b01001_031_m90', 'b01001_034',
'b01001_036_m90', 'b01001_039', 'b01001_041_m90', 'b01001_044', 'b01001_046_m90',
'b01001_049'),
rows[0][::5])
self.assertEqual(('14000US06001400100', 'CA', '001', 'Census Tract 4001, Alameda County, California',
3018, 195),
rows[1][:6])
# Checksum a few rows
self.assertEqual(6561, sum(rows[1][7:]))
self.assertEqual(9061, sum(rows[4][7:]))
[docs] def test_appurl(self):
from publicdata.census.util import sub_geoids, sub_summarylevel
from rowgenerators import parse_app_url
from publicdata.census.exceptions import CensusParsingException
#self.assertEqual(245,list(parse_app_url('census://2016/5/RI/140/B17001').generator))
#self.assertEqual(245, list(parse_app_url('census://RI/140/B17001').generator))
with self.assertRaises(ValueError):
sub_geoids('foobar')
u = parse_app_url('census://RI/140/B17001')
self.assertEqual('B17001', u.tableid)
self.assertEqual('04000US44', u.geoid)
u = parse_app_url('census://B17001/140/RI')
self.assertEqual('B17001', u.tableid)
self.assertEqual('04000US44', u.geoid)
u = parse_app_url('census://140/RI/B17001')
self.assertEqual('B17001', u.tableid)
self.assertEqual('04000US44', u.geoid)
with self.assertRaises(CensusParsingException):
parse_app_url('census://B17001/Frop/140')
with self.assertRaises(CensusParsingException):
parse_app_url('census://BINGO/RI/140')
[docs] def test_appurl_US(self):
from rowgenerators import parse_app_url
from rowgenerators.appurl.web.download import logger as download_logger
from publicdata.census.files import logger
logging.basicConfig()
logger.setLevel(logging.DEBUG)
# Iterate over all counties in the US
u = parse_app_url('census://2016/5/US/county/B01003')
rows = list(u.generator)
states = set()
counties = set()
for row in rows[1:]:
states.add(row[1])
counties.add(row[3])
from collections import Counter
c = Counter(row[3] for row in rows[1:])
for k, v in c.items():
if v > 1:
print(k,v)
self.assertEqual(52, len(states))
self.assertEqual(3220, len(counties))
self.assertEqual(3220,len(rows[1:]))
[docs] def test_sequence(self):
sf = SequenceFile(2016,5,'RI',140, 3 )
h, f, m = list(zip(sf.file_headers, sf.descriptions, sf.columns))[60]
self.assertEqual('B01001G_028', h)
self.assertEqual('SEX BY AGE (TWO OR MORE RACES) for People Who Are Two Or More Races - Female: - 55 to 64 '
'years',
f)
for h,f,m in list(zip(sf.file_headers, sf.descriptions, sf.columns)):
self.assertEqual(h, m.unique_id)
[docs] def test_dataframe(self):
from publicdata.census.files.appurl import CensusFile
from rowgenerators import parse_app_url
u = parse_app_url('census://2016/5/RI/140/B01002')
print(type(u))
g = u.generator
rows = list(g)
self.assertEqual(245,len(rows))
df = u.generator.dataframe()
self.assertEqual(9708, int(df['b01002_001'].sum()))
self.assertEqual(809, int(df['b01002_001_m90'].sum()))
self.assertEqual(9375, int(df['b01002_002'].sum()))
self.assertEqual(1171, int(df['b01002_002_m90'].sum()))
[docs] def test_geo_dataframe(self):
u = parse_app_url('census://2016/5/RI/140/B01002')
self.assertEqual(244, len(u.geoframe().geometry))
u = parse_app_url('censusgeo://2016/5/RI/140')
self.assertEqual(244, len(u.geoframe().geometry))
[docs] def test_age_dimensions(self):
'''Check that there are not tables with 'year' in the title that don't get a parsed age range'''
tm = TableMeta(2016, 5)
age_tables = []
for t_id, table in tm.tables.items():
if 'by age' in table.title.lower():
age_tables.append(t_id)
for at in age_tables:
u = parse_app_url('census://2016/5/RI/40/{}'.format(at.lower()))
g = u.generator
t = g.table
parse_errors = []
for c in t.columns:
if '_m90' not in c.unique_id and \
'year' in c.description and \
not c.age_range and \
'1 year ago' not in c.description and \
not( 'year-round' in c.description or 'year - round' in c.description) :
parse_errors.append(c)
for parse_error in parse_errors:
print(parse_error.row)
self.assertEqual(0, len(parse_errors))
[docs] def test_race_dimensions(self):
table_ids = ['B03002','C02003', 'B02017']
table_ids = ['b02008', 'b02010', 'b02015']
table_ids = ['B05010','B17001', 'B17001a', 'B17001b', 'B17001i']
for t_id in table_ids:
u = parse_app_url('census://2016/5/RI/40/{}'.format(t_id.lower()))
g = u.generator
t = g.table
print('---- ', t.table.title)
for i, c in enumerate(t.columns):
if '_m90' not in c.unique_id and i > 3:
row = [c.unique_id, c.sex, c.race, c.age, c.description]
print(row)
if i > 30:
break
[docs] def test_pov_dimensions(self):
tm = TableMeta(2016, 5)
for t_id, table in tm.tables.items():
u = parse_app_url('census://2016/5/RI/40/{}'.format(t_id.lower()))
g = u.generator
t = g.table
for i, c in enumerate(t.columns):
if '_m90' not in c.unique_id and 'poverty' in c.long_description:
print(c.unique_id, c.poverty_status, c.long_description)
[docs] def test_dimensions(self):
tm = TableMeta(2016, 5)
with open('/tmp/dimensions.csv', 'w') as f:
w = csv.writer(f)
w.writerow("id sex race age pov description".split())
for t_id, table in tm.tables.items():
u = parse_app_url('census://2016/5/RI/40/{}'.format(t_id.lower()))
g = u.generator
t = g.table
w.writerow([t_id, table.title])
for i, c in enumerate(t.columns):
if '_m90' not in c.unique_id and i > 3:
row = [c.unique_id, c.sex, c.race, c.age, c.poverty_status, c.description]
print(row)
w.writerow(row)
if __name__ == '__main__':
unittest.main()