| Home | Trees | Indices | Help |
|
|---|
|
|
1 #!/usr/bin/env python
2
3 '''
4 Transformation utilities for csv (or csv-like) generated rows.
5
6 The standard csv module is very useful for parsing tabular data in CSV format.
7 Typically though, one or more transformations need to be applied to the generated
8 rows before being ready to be used; for instance "convert the 3rd column to int,
9 the 5th to float and ignore all the rest". This module provides an easy way to
10 specify such transformations upfront instead of coding them every time by hand.
11
12 Two classes are currently available, L{SequenceTransformer} and L{MappingTransformer},
13 that represent each row as a list (like C{csv.reader}) or dict (like C{csv.DictReader}),
14 respectively.
15
16 @requires: Python 2.3 or later.
17 '''
18
19
20 __all__ = ['RowTransformer', 'SequenceTransformer', 'MappingTransformer']
21 __author__ = 'George Sakkis <george.sakkis AT gmail DOT com>'
22
23 # Python 2.3 support
24 try: set
25 except NameError:
26 from sets import Set as set
27
28 #======= RowTransformer ================================================
29
31 '''Abstract base transformer class.'''
32
34 '''Specifies the transformations to apply for each row.
35
36 @param key_adaptors: Specifies the adaptor to transform each column.
37 A column is identified by some C{key} and C{adaptor} is either a
38 callable C{f(x)} or None (equivalent to the identity C{lambda x:x}).
39 @type key_adaptors: Sequence of (key,adaptor) pairs
40
41 @keyword default: An adaptor for all columns not specified explicitly in
42 C{key_adaptors}.
43 @type default: Callable C{f(x)} or C{None}
44
45 @keyword include: The columns to include for each row:
46 - If given, only the items at the respective columns are included,
47 in the same order.
48 - Otherwise if C{default} is given, all columns are included.
49 - Otherwise, if neither C{include} or C{default} are given,
50 only the keys specified in C{key_adaptors} are included.
51 @type include: Iterable of keys
52
53 @keyword exclude: The columns to exclude for each row. This takes precedence
54 over all other options: a column specified in C{exclude} will B{never}
55 be included in the transformed rows.
56 @type exclude: Iterable of keys
57 '''
58 self._exclude = set(kwds.get('exclude',()))
59 self._key2Adaptor = {}
60 include = []
61 for key,adaptor in key_adaptors:
62 if key in self._key2Adaptor:
63 raise ValueError('More than one adaptors for column %r' % key)
64 include.append(key)
65 self._key2Adaptor[key] = adaptor
66 self._default_adaptor = kwds.get('default', None)
67 if 'include' in kwds:
68 self._include = kwds['include']
69 elif 'default' not in kwds:
70 # include only the explicitly specified columns
71 self._include = include
72 else: # include all columns
73 self._include = []
74
84
85
86 #======= SequenceTransformer ===================================================
87
89 '''A L{RowTransformer} that expects and returns rows as I{sequences}.
90
91 Examples:
92
93 >>> import csv
94 >>> rows = list(csv.reader(["1,3.34,4-3.2j,John",
95 ... "4,4,4,4",
96 ... "0,-1.1,3.4,None"]))
97
98 >>> # by default, SequenceTransformer returns each row as is
99 >>> list(SequenceTransformer()(rows)) == rows
100 True
101
102 >>> # transform and return the first two columns only
103 >>> for row in SequenceTransformer(int,float)(rows):
104 ... print row
105 [1, 3.3399999999999999]
106 [4, 4.0]
107 [0, -1.1000000000000001]
108
109 >>> # as before, but keep the rest columns too
110 >>> for row in SequenceTransformer(int, float, default=None)(rows):
111 ... print row
112 [1, 3.3399999999999999, '4-3.2j', 'John']
113 [4, 4.0, '4', '4']
114 [0, -1.1000000000000001, '3.4', 'None']
115
116 >>> # as before, but in reverse column order
117 >>> for row in SequenceTransformer(int, float, default=None,
118 ... include=reversed(xrange(4)))(rows):
119 ... print row
120 ['John', '4-3.2j', 3.3399999999999999, 1]
121 ['4', '4', 4.0, 4]
122 ['None', '3.4', -1.1000000000000001, 0]
123
124 >>> # transform the second column and leave the rest as is
125 >>> for row in SequenceTransformer((1,float), default=None)(rows):
126 ... print row
127 ['1', 3.3399999999999999, '4-3.2j', 'John']
128 ['4', 4.0, '4', '4']
129 ['0', -1.1000000000000001, '3.4', 'None']
130
131 >>> # transform and return the 4nd and the 2th column, in this order
132 >>> for row in SequenceTransformer((3,str),(1,float))(rows):
133 ... print row
134 ['John', 3.3399999999999999]
135 ['4', 4.0]
136 ['None', -1.1000000000000001]
137
138 >>> # exclude the 4th column and eval() the rest (XXX: Use eval for trusted data only)
139 >>> for row in SequenceTransformer(default=eval, exclude=[3])(rows):
140 ... print row
141 [1, 3.3399999999999999, (4-3.2000000000000002j)]
142 [4, 4, 4]
143 [0, -1.1000000000000001, 3.3999999999999999]
144 '''
145
147 '''Specifies what transformations to apply to each row.
148
149 @param adaptors: The adaptors for selected columns. The i-th adaptor can be:
150 - None: C{row[i]} will be left as is.
151 - A callable C{f(x)}: C{row[i]} will be transformed by f to C{f(row[i])}.
152 - A pair C{(j,A)}: C{row[j]} will be transformed by adaptor A, where
153 A can be C{None} or a callable C{f(x)} as above. C{i} is ignored in
154 this case.
155
156 @keyword include: It can be:
157 - An iterable of indices: Only the items at the respective columns
158 are included (except for those that are also in C{exclude}).
159 - A positive integer N: shortcut for C{xrange(N)}.
160
161 @keyword default,exclude: See L{RowTransformer.__init__}
162 '''
163 key_adaptors = []
164 for i,adaptor in enumerate(adaptors):
165 # check if 'adaptor' is actually an (i,adaptor) pair or not
166 try: i,adaptor = adaptor
167 except: pass
168 if not (isinstance(i,int) and i>=0):
169 raise ValueError('Indices must be non-negative integers '
170 '(%r given)' % i)
171 key_adaptors.append((i,adaptor))
172 # convert 'include' to a range if an integer is passed
173 if isinstance(kwds.get('include'), int):
174 kwds['include'] = xrange(kwds['include'])
175 RowTransformer.__init__(self, key_adaptors, **kwds)
176
178 '''Transform the given rows by this transformer.
179
180 @param rows: An iterable of sequences.
181 @return: An iterator over the transformed rows as lists.
182 '''
183 exclude = self._exclude
184 get_adaptor = self._key2Adaptor.get
185 default = self._default_adaptor
186 if self._include: # include selected columns
187 indexed_adaptors = [(j,get_adaptor(j,default))
188 for j in self._include if j not in exclude]
189 for row in rows:
190 new_row = [None] * len(indexed_adaptors)
191 for i,(j,adaptor) in enumerate(indexed_adaptors):
192 if adaptor is None:
193 new_row[i] = row[j]
194 else:
195 new_row[i] = adaptor(row[j])
196 yield new_row
197 else: # include all (non-excluded) columns
198 excluded = object()
199 adaptors = []
200 for row in rows:
201 new_row = []; append = new_row.append
202 for i,value in enumerate(row):
203 try: adaptor = adaptors[i]
204 except IndexError:
205 # this will typically be raised only for the first row
206 if i in exclude:
207 adaptor = excluded
208 else:
209 adaptor = get_adaptor(i,default)
210 adaptors.append(adaptor)
211 if adaptor is not excluded:
212 if adaptor is None:
213 append(value)
214 else:
215 append(adaptor(value))
216 yield new_row
217
218 #======= MappingTransformer ====================================================
219
221 '''A L{RowTransformer} that expects and returns rows as I{mappings}.
222
223 Examples:
224
225 >>> import csv
226 >>> rows = list(csv.DictReader(["1,3.34,4-3.2j,John",
227 ... "4,4,4,4",
228 ... "0,-1.1,3.4,None" ],
229 ... fieldnames="IFCS"))
230
231 >>> # by default, MappingTransformer returns each row as is
232 >>> list(MappingTransformer()(rows)) == rows
233 True
234
235 >>> # transform and return the first two columns only
236 >>> for row in MappingTransformer({'I':int,'F':float})(rows):
237 ... print row
238 {'I': 1, 'F': 3.3399999999999999}
239 {'I': 4, 'F': 4.0}
240 {'I': 0, 'F': -1.1000000000000001}
241
242 >>> # as before, but keep the rest columns too
243 >>> for row in MappingTransformer({'I':int, 'F':float}, default=None)(rows):
244 ... print row
245 {'I': 1, 'C': '4-3.2j', 'S': 'John', 'F': 3.3399999999999999}
246 {'I': 4, 'C': '4', 'S': '4', 'F': 4.0}
247 {'I': 0, 'C': '3.4', 'S': 'None', 'F': -1.1000000000000001}
248
249 >>> # transform the 'F' column and leave the rest as is
250 >>> for row in MappingTransformer({'F':float}, default=None)(rows):
251 ... print row
252 {'I': '1', 'C': '4-3.2j', 'S': 'John', 'F': 3.3399999999999999}
253 {'I': '4', 'C': '4', 'S': '4', 'F': 4.0}
254 {'I': '0', 'C': '3.4', 'S': 'None', 'F': -1.1000000000000001}
255
256 >>> # transform and return the 'F' and 'S' columns
257 >>> for row in MappingTransformer({'S':str,'F':float})(rows):
258 ... print row
259 {'S': 'John', 'F': 3.3399999999999999}
260 {'S': '4', 'F': 4.0}
261 {'S': 'None', 'F': -1.1000000000000001}
262
263 >>> # exclude the 'S' column and eval() the rest (XXX: Use eval for trusted data only)
264 >>> for row in MappingTransformer(default=eval, exclude=['S'])(rows):
265 ... print row
266 {'I': 1, 'C': (4-3.2000000000000002j), 'F': 3.3399999999999999}
267 {'I': 4, 'C': 4, 'F': 4}
268 {'I': 0, 'C': 3.3999999999999999, 'F': -1.1000000000000001}
269 '''
270
272 '''Specifies what transformations to apply to each row.
273
274 @param adaptors: A mapping from column names to adaptors.
275 @keyword default,include,exclude: See L{RowTransformer.__init__}
276 '''
277 RowTransformer.__init__(self, adaptors.items(), **kwds)
278
280 '''Transform the given rows by this transformer.
281
282 @param rows: An iterable of mappings.
283 @return: An iterator over the transformed rows as dicts.
284 '''
285 exclude = self._exclude
286 get_adaptor = self._key2Adaptor.get
287 default = self._default_adaptor
288 if self._include: # include selected columns
289 key_adaptors = [(key,get_adaptor(key,default))
290 for key in self._include if key not in exclude]
291 for row in rows:
292 new_row = {}
293 for key,adaptor in key_adaptors:
294 if adaptor is None:
295 new_row[key] = row[key]
296 else:
297 new_row[key] = adaptor(row[key])
298 yield new_row
299 else: # include all (non-excluded) columns
300 excluded = object()
301 key2adaptor = {}
302 for row in rows:
303 new_row = {}
304 for key in row:
305 try: adaptor = key2adaptor[key]
306 except KeyError:
307 if key in exclude:
308 adaptor = excluded
309 else:
310 adaptor = get_adaptor(key,default)
311 key2adaptor[key] = adaptor
312 if adaptor is not excluded:
313 if adaptor is None:
314 new_row[key] = row[key]
315 else:
316 new_row[key] = adaptor(row[key])
317 yield new_row
318
319
320 if __name__ == '__main__':
321 import doctest
322 doctest.testmod()
323
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0beta1 on Sat Dec 15 13:59:06 2007 | http://epydoc.sourceforge.net |