8.4. CSV DictReader¶
csv.DictReader: list[dict]
8.4.1. Example¶
>>> import csv
>>>
>>> FILE = r'/tmp/myfile.csv'
>>> DATA = """sepal_length,sepal_width,petal_length,petal_width,species
... 5.4,3.9,1.3,0.4,setosa
... 5.9,3.0,5.1,1.8,virginica
... 6.0,3.4,4.5,1.6,versicolor"""
>>>
>>> with open(FILE, mode='w') as file:
... _ = file.write(DATA)
>>>
>>>
>>> with open(FILE) as file:
... result = csv.DictReader(file)
...
... for line in result:
... print(line)
{'sepal_length': '5.4', 'sepal_width': '3.9', 'petal_length': '1.3', 'petal_width': '0.4', 'species': 'setosa'}
{'sepal_length': '5.9', 'sepal_width': '3.0', 'petal_length': '5.1', 'petal_width': '1.8', 'species': 'virginica'}
{'sepal_length': '6.0', 'sepal_width': '3.4', 'petal_length': '4.5', 'petal_width': '1.6', 'species': 'versicolor'}
Read data from CSV file using csv.DictReader()
. While giving custom names note, that first line (typically a header) will be treated like normal data. Therefore we skip it using header = file.readline()
:
sepal_length,sepal_width,petal_length,petal_width,species
5.4,3.9,1.3,0.4,setosa
5.9,3.0,5.1,1.8,virginica
6.0,3.4,4.5,1.6,versicolor
>>> import csv
>>>
>>> FILE = r'/tmp/myfile.csv'
>>> DATA = """sepal_length,sepal_width,petal_length,petal_width,species
... 5.4,3.9,1.3,0.4,setosa
... 5.9,3.0,5.1,1.8,virginica
... 6.0,3.4,4.5,1.6,versicolor"""
>>>
>>> with open(FILE, mode='w') as file:
... _ = file.write(DATA)
>>>
>>>
>>> FIELDNAMES = ['Sepal Length', 'Sepal Width',
... 'Petal Length', 'Petal Width', 'Species']
>>>
>>>
>>> with open(FILE) as file:
... result = csv.DictReader(file, fieldnames=FIELDNAMES, delimiter=',')
... header = file.readline() # skip the first line (old header)
...
... for line in result:
... print(line)
{'Sepal Length': '5.4', 'Sepal Width': '3.9', 'Petal Length': '1.3', 'Petal Width': '0.4', 'Species': 'setosa'}
{'Sepal Length': '5.9', 'Sepal Width': '3.0', 'Petal Length': '5.1', 'Petal Width': '1.8', 'Species': 'virginica'}
{'Sepal Length': '6.0', 'Sepal Width': '3.4', 'Petal Length': '4.5', 'Petal Width': '1.6', 'Species': 'versicolor'}
8.4.2. Use Case - 0x01¶
'sepal_length';'sepal_width';'petal_length';'petal_width';'species'
'5,4';'3,9';'1,3';'0,4';'setosa'
'5,9';'3,0';'5,1';'1,8';'virginica'
'6,0';'3,4';'4,5';'1,6';'versicolor'
>>> import csv
>>>
>>>
>>> FILE = r'/tmp/myfile.csv'
>>> DATA = """'sepal_length';'sepal_width';'petal_length';'petal_width';'species'
... '5,4';'3,9';'1,3';'0,4';'setosa'
... '5,9';'3,0';'5,1';'1,8';'virginica'
... '6,0';'3,4';'4,5';'1,6';'versicolor'"""
>>>
>>> with open(FILE, mode='w') as file:
... _ = file.write(DATA)
>>>
>>>
>>> def isnumeric(value):
... try:
... float(value)
... return True
... except ValueError:
... return False
>>>
>>>
>>> def clean(line):
... return {key: float(v) if isnumeric(v) else v
... for key, value in line.items()
... if (v := value.replace(',', '.'))}
>>>
>>>
>>> with open(FILE) as file:
... result = csv.DictReader(file, delimiter=';', quotechar="'")
...
... for line in result:
... print(clean(line))
{'sepal_length': 5.4, 'sepal_width': 3.9, 'petal_length': 1.3, 'petal_width': 0.4, 'species': 'setosa'}
{'sepal_length': 5.9, 'sepal_width': 3.0, 'petal_length': 5.1, 'petal_width': 1.8, 'species': 'virginica'}
{'sepal_length': 6.0, 'sepal_width': 3.4, 'petal_length': 4.5, 'petal_width': 1.6, 'species': 'versicolor'}
8.4.3. Assignments¶
"""
* Assignment: CSV DictReader Iris
* Complexity: easy
* Lines of code: 5 lines
* Time: 5 min
English:
1. Using `csv.DictReader` read the `FILE` content
2. Use explicit `encoding`, `delimiter` and `quotechar`
3. Replace column names with `FIELDNAMES`
4. Skip the first line (header)
5. Add rows to `result: list[dict]`
6. Run doctests - all must succeed
Polish:
1. Korzystając z `csv.DictReader` wczytaj zawartość pliku `FILE`
2. Podaj jawnie `encoding`, `delimiter` oraz `quotechar`
3. Podmień nazwy kolumn na `FIELDNAMES`
4. Pomiń pierwszą linię (nagłówek)
5. Dodaj wiersze do `result: list[dict]`
6. Uruchom doctesty - wszystkie muszą się powieść
Tests:
>>> import sys; sys.tracebacklimit = 0
>>> from os import remove
>>> remove(FILE)
>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'
>>> assert type(result) is list, \
'Variable `result` has invalid type, should be list'
>>> assert all(type(x) is dict for x in result), \
'All rows in `result` should be dict'
>>> result # doctest: +NORMALIZE_WHITESPACE
[{'Sepal Length': '5.8', 'Sepal Width': '2.7', 'Petal Length': '5.1',
'Petal Width': '1.9', 'Species': 'virginica'},
{'Sepal Length': '5.1', 'Sepal Width': '3.5', 'Petal Length': '1.4',
'Petal Width': '0.2', 'Species': 'setosa'},
{'Sepal Length': '5.7', 'Sepal Width': '2.8', 'Petal Length': '4.1',
'Petal Width': '1.3', 'Species': 'versicolor'}]
"""
import csv
DATA = """sepal_length,sepal_width,petal_length,petal_width,species
5.8,2.7,5.1,1.9,virginica
5.1,3.5,1.4,0.2,setosa
5.7,2.8,4.1,1.3,versicolor"""
FIELDNAMES = ['Sepal Length', 'Sepal Width',
'Petal Length', 'Petal Width', 'Species']
FILE = r'_temporary.csv'
with open(FILE, mode='w') as file:
file.write(DATA)
# Using `csv.DictReader` read the `FILE` content
# type: list[dict]
result = ...