Pandas khá giỏi trong việc xử lý dữ liệu. Đây là một ví dụ cách sử dụng nó:
import pandas as pd
# Read the CSV into a pandas data frame [df]
# With a df you can do many things
# most important: visualize data with Seaborn
df = pd.read_csv['filename.csv', delimiter=',']
# Or export it in many ways, e.g. a list of tuples
tuples = [tuple[x] for x in df.values]
# or export it as a list of dicts
dicts = df.to_dict[].values[]
Một lợi thế lớn là gấu trúc tự động giao dịch với các hàng tiêu đề.
Nếu bạn chưa nghe nói về Seaborn, tôi khuyên bạn nên xem nó.
Xem thêm: Làm cách nào để đọc và viết các tệp CSV bằng Python?
Gandas #2
import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
Nội dung của DF là:
country population population_time EUR
0 Germany 82521653.0 2016-12-01 True
1 France 66991000.0 2017-01-01 True
2 Indonesia 255461700.0 2017-01-01 False
3 Ireland 4761865.0 NaT True
4 Spain 46549045.0 2017-06-01 True
5 Vatican NaN NaT True
Nội dung của dicts là
[{'country': 'Germany', 'population': 82521653.0, 'population_time': Timestamp['2016-12-01 00:00:00'], 'EUR': True},
{'country': 'France', 'population': 66991000.0, 'population_time': Timestamp['2017-01-01 00:00:00'], 'EUR': True},
{'country': 'Indonesia', 'population': 255461700.0, 'population_time': Timestamp['2017-01-01 00:00:00'], 'EUR': False},
{'country': 'Ireland', 'population': 4761865.0, 'population_time': NaT, 'EUR': True},
{'country': 'Spain', 'population': 46549045.0, 'population_time': Timestamp['2017-06-01 00:00:00'], 'EUR': True},
{'country': 'Vatican', 'population': nan, 'population_time': NaT, 'EUR': True}]
Gandas #3
import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
lists = [[row[col] for col in df.columns] for row in df.to_dict['records']]
Nội dung của lists
là:
[['Germany', 82521653.0, Timestamp['2016-12-01 00:00:00'], True],
['France', 66991000.0, Timestamp['2017-01-01 00:00:00'], True],
['Indonesia', 255461700.0, Timestamp['2017-01-01 00:00:00'], False],
['Ireland', 4761865.0, NaT, True],
['Spain', 46549045.0, Timestamp['2017-06-01 00:00:00'], True],
['Vatican', nan, NaT, True]]
Trong bài viết này, chúng ta sẽ xem cách đọc các tệp CSV vào danh sách các danh sách trong Python.
Phương pháp 1: Sử dụng mô -đun CSV
- Chúng ta có thể đọc các tệp CSV vào các cấu trúc dữ liệu khác nhau như danh sách, danh sách các bộ dữ liệu hoặc danh sách các từ điển.
- Chúng ta có thể sử dụng các mô -đun khác như gấu trúc chủ yếu được sử dụng trong các ứng dụng ML và các kịch bản bao gồm để nhập nội dung CSV để liệt kê có hoặc không có tiêu đề.
Ví dụ 1:
Trong ví dụ này, chúng tôi đang đọc một tệp CSV và chuyển đổi chuỗi thành danh sách.
Python3
import
csv
with
import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
0import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
1import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
2import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
3import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
4import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
5import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
6import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
7import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
8 import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
9import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
6 country population population_time EUR
0 Germany 82521653.0 2016-12-01 True
1 France 66991000.0 2017-01-01 True
2 Indonesia 255461700.0 2017-01-01 False
3 Ireland 4761865.0 NaT True
4 Spain 46549045.0 2017-06-01 True
5 Vatican NaN NaT True
1import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
8 country population population_time EUR
0 Germany 82521653.0 2016-12-01 True
1 France 66991000.0 2017-01-01 True
2 Indonesia 255461700.0 2017-01-01 False
3 Ireland 4761865.0 NaT True
4 Spain 46549045.0 2017-06-01 True
5 Vatican NaN NaT True
3 country population population_time EUR
0 Germany 82521653.0 2016-12-01 True
1 France 66991000.0 2017-01-01 True
2 Indonesia 255461700.0 2017-01-01 False
3 Ireland 4761865.0 NaT True
4 Spain 46549045.0 2017-06-01 True
5 Vatican NaN NaT True
4import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
6 country population population_time EUR
0 Germany 82521653.0 2016-12-01 True
1 France 66991000.0 2017-01-01 True
2 Indonesia 255461700.0 2017-01-01 False
3 Ireland 4761865.0 NaT True
4 Spain 46549045.0 2017-06-01 True
5 Vatican NaN NaT True
6 country population population_time EUR
0 Germany 82521653.0 2016-12-01 True
1 France 66991000.0 2017-01-01 True
2 Indonesia 255461700.0 2017-01-01 False
3 Ireland 4761865.0 NaT True
4 Spain 46549045.0 2017-06-01 True
5 Vatican NaN NaT True
7Output:
[[‘Jan, 34, 360, 417], [‘ Tháng Hai, 31, 342, 391], [‘Mar, 36, 406, 419], [‘ Tháng Tư, 34, 396, 461],
& nbsp; ['May', 36, 420, 472], ['Jun', 43, 472, 535], ['Jul', 49, 548, 622], ['tháng 8', 50, 559, 606], & nbsp;
& nbsp; [‘sep, 40, 463, 508], [‘ tháng 10, 35, 407, 461], [‘tháng 11, 31, 362, 390], [‘ Dec, 33, 405, 432]]]]
Ví dụ 2:
Trong ví dụ này, chúng tôi đang đọc một tệp CSV và lặp lại các dòng trong CSV đã cho.
Python3
import
csv
with
import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
0import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
1[{'country': 'Germany', 'population': 82521653.0, 'population_time': Timestamp['2016-12-01 00:00:00'], 'EUR': True},
{'country': 'France', 'population': 66991000.0, 'population_time': Timestamp['2017-01-01 00:00:00'], 'EUR': True},
{'country': 'Indonesia', 'population': 255461700.0, 'population_time': Timestamp['2017-01-01 00:00:00'], 'EUR': False},
{'country': 'Ireland', 'population': 4761865.0, 'population_time': NaT, 'EUR': True},
{'country': 'Spain', 'population': 46549045.0, 'population_time': Timestamp['2017-06-01 00:00:00'], 'EUR': True},
{'country': 'Vatican', 'population': nan, 'population_time': NaT, 'EUR': True}]
3[{'country': 'Germany', 'population': 82521653.0, 'population_time': Timestamp['2016-12-01 00:00:00'], 'EUR': True},
{'country': 'France', 'population': 66991000.0, 'population_time': Timestamp['2017-01-01 00:00:00'], 'EUR': True},
{'country': 'Indonesia', 'population': 255461700.0, 'population_time': Timestamp['2017-01-01 00:00:00'], 'EUR': False},
{'country': 'Ireland', 'population': 4761865.0, 'population_time': NaT, 'EUR': True},
{'country': 'Spain', 'population': 46549045.0, 'population_time': Timestamp['2017-06-01 00:00:00'], 'EUR': True},
{'country': 'Vatican', 'population': nan, 'population_time': NaT, 'EUR': True}]
4import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
6import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
7import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
8 import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
9import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
6 country population population_time EUR
0 Germany 82521653.0 2016-12-01 True
1 France 66991000.0 2017-01-01 True
2 Indonesia 255461700.0 2017-01-01 False
3 Ireland 4761865.0 NaT True
4 Spain 46549045.0 2017-06-01 True
5 Vatican NaN NaT True
1import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
8 country population population_time EUR
0 Germany 82521653.0 2016-12-01 True
1 France 66991000.0 2017-01-01 True
2 Indonesia 255461700.0 2017-01-01 False
3 Ireland 4761865.0 NaT True
4 Spain 46549045.0 2017-06-01 True
5 Vatican NaN NaT True
3 country population population_time EUR
0 Germany 82521653.0 2016-12-01 True
1 France 66991000.0 2017-01-01 True
2 Indonesia 255461700.0 2017-01-01 False
3 Ireland 4761865.0 NaT True
4 Spain 46549045.0 2017-06-01 True
5 Vatican NaN NaT True
4import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
lists = [[row[col] for col in df.columns] for row in df.to_dict['records']]
7 country population population_time EUR
0 Germany 82521653.0 2016-12-01 True
1 France 66991000.0 2017-01-01 True
2 Indonesia 255461700.0 2017-01-01 False
3 Ireland 4761865.0 NaT True
4 Spain 46549045.0 2017-06-01 True
5 Vatican NaN NaT True
6import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
lists = [[row[col] for col in df.columns] for row in df.to_dict['records']]
9import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
lists = [[row[col] for col in df.columns] for row in df.to_dict['records']]
7 country population population_time EUR
0 Germany 82521653.0 2016-12-01 True
1 France 66991000.0 2017-01-01 True
2 Indonesia 255461700.0 2017-01-01 False
3 Ireland 4761865.0 NaT True
4 Spain 46549045.0 2017-06-01 True
5 Vatican NaN NaT True
6[['Germany', 82521653.0, Timestamp['2016-12-01 00:00:00'], True],
['France', 66991000.0, Timestamp['2017-01-01 00:00:00'], True],
['Indonesia', 255461700.0, Timestamp['2017-01-01 00:00:00'], False],
['Ireland', 4761865.0, NaT, True],
['Spain', 46549045.0, Timestamp['2017-06-01 00:00:00'], True],
['Vatican', nan, NaT, True]]
2[['Germany', 82521653.0, Timestamp['2016-12-01 00:00:00'], True],
['France', 66991000.0, Timestamp['2017-01-01 00:00:00'], True],
['Indonesia', 255461700.0, Timestamp['2017-01-01 00:00:00'], False],
['Ireland', 4761865.0, NaT, True],
['Spain', 46549045.0, Timestamp['2017-06-01 00:00:00'], True],
['Vatican', nan, NaT, True]]
3[['Germany', 82521653.0, Timestamp['2016-12-01 00:00:00'], True],
['France', 66991000.0, Timestamp['2017-01-01 00:00:00'], True],
['Indonesia', 255461700.0, Timestamp['2017-01-01 00:00:00'], False],
['Ireland', 4761865.0, NaT, True],
['Spain', 46549045.0, Timestamp['2017-06-01 00:00:00'], True],
['Vatican', nan, NaT, True]]
4import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
lists = [[row[col] for col in df.columns] for row in df.to_dict['records']]
7 country population population_time EUR
0 Germany 82521653.0 2016-12-01 True
1 France 66991000.0 2017-01-01 True
2 Indonesia 255461700.0 2017-01-01 False
3 Ireland 4761865.0 NaT True
4 Spain 46549045.0 2017-06-01 True
5 Vatican NaN NaT True
6[['Germany', 82521653.0, Timestamp['2016-12-01 00:00:00'], True],
['France', 66991000.0, Timestamp['2017-01-01 00:00:00'], True],
['Indonesia', 255461700.0, Timestamp['2017-01-01 00:00:00'], False],
['Ireland', 4761865.0, NaT, True],
['Spain', 46549045.0, Timestamp['2017-06-01 00:00:00'], True],
['Vatican', nan, NaT, True]]
2[['Germany', 82521653.0, Timestamp['2016-12-01 00:00:00'], True],
['France', 66991000.0, Timestamp['2017-01-01 00:00:00'], True],
['Indonesia', 255461700.0, Timestamp['2017-01-01 00:00:00'], False],
['Ireland', 4761865.0, NaT, True],
['Spain', 46549045.0, Timestamp['2017-06-01 00:00:00'], True],
['Vatican', nan, NaT, True]]
3[['Germany', 82521653.0, Timestamp['2016-12-01 00:00:00'], True],
['France', 66991000.0, Timestamp['2017-01-01 00:00:00'], True],
['Indonesia', 255461700.0, Timestamp['2017-01-01 00:00:00'], False],
['Ireland', 4761865.0, NaT, True],
['Spain', 46549045.0, Timestamp['2017-06-01 00:00:00'], True],
['Vatican', nan, NaT, True]]
9lists
0[['Germany', 82521653.0, Timestamp['2016-12-01 00:00:00'], True],
['France', 66991000.0, Timestamp['2017-01-01 00:00:00'], True],
['Indonesia', 255461700.0, Timestamp['2017-01-01 00:00:00'], False],
['Ireland', 4761865.0, NaT, True],
['Spain', 46549045.0, Timestamp['2017-06-01 00:00:00'], True],
['Vatican', nan, NaT, True]]
9lists
2lists
3[[‘Jan, 34, 360, 417], [‘ Tháng Hai, 31, 342, 391], [‘Mar, 36, 406, 419], [‘ Tháng Tư, 34, 396, 461],
Output:
& nbsp; ['May', 36, 420, 472], ['Jun', 43, 472, 535], ['Jul', 49, 548, 622], ['tháng 8', 50, 559, 606], & nbsp;
& nbsp; [‘sep, 40, 463, 508], [‘ tháng 10, 35, 407, 461], [‘tháng 11, 31, 362, 390], [‘ Dec, 33, 405, 432]]]]
Lưu ý: Để biết thêm thông tin, hãy, hãy đọc CSV vào danh sách bằng cách sử dụng gấu trúcFor more information refer Read CSV Into List Using Pandas
Python3
import
import
0
import
1
import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
8 import
3import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
6import
5import
6import
7import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
3import
9import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
3csv
1csv
2import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
6csv
4import
6csv
6import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
3csv
8import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
3with
0csv
2import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
6with
3import
6with
5import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
3with
7import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
3with
9import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
00import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
01import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
02import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
8 import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
04import
1import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
lists = [[row[col] for col in df.columns] for row in df.to_dict['records']]
1 country population population_time EUR
0 Germany 82521653.0 2016-12-01 True
1 France 66991000.0 2017-01-01 True
2 Indonesia 255461700.0 2017-01-01 False
3 Ireland 4761865.0 NaT True
4 Spain 46549045.0 2017-06-01 True
5 Vatican NaN NaT True
6import pandas as pd
# Get data - reading the CSV file
import mpu.pd
df = mpu.pd.example_df[]
# Convert
dicts = df.to_dict['records']
08Output: