一、创建多层次索引
(1)隐式构造
- 最常见的方法是给DataFrame构造函数的index参数传递两个或更多的数组
# 导入pandas
import numpy as np
import pandas as pd
data = np.random.randint(0,100,size=(6,6))
# 行索引
index = [
["1班","1班","1班","2班","2班","2班"],
["张三","李四","王五","鲁班","张三丰","张无忌"]
]
# 列索引
columns = [
["期中","期中","期中","期末","期末","期末"],
["语文","数学","英语","语文","数学","英语"]
]
df = pd.DataFrame(data=data,index=index,columns=columns)
df
- Series也可以创建多层索引
data = np.random.randint(0,100,size=6)
index = [
["1班","1班","1班","2班","2班","2班"],
["张三","李四","王五","鲁班","张三丰","张无忌"]
]
s = pd.Series(data=data,index=index)
s
(2)显式构造pd.MultiIndex
- 使用数组
data = np.random.randint(0,100,size=(6,6))
# 行索引
index = pd.MultiIndex.from_arrays([
["1班","1班","1班","2班","2班","2班"],
["张三","李四","王五","鲁班","张三丰","张无忌"]
])
# 列索引
columns = [
["期中","期中","期中","期末","期末","期末"],
["语文","数学","英语","语文","数学","英语"]
]
df = pd.DataFrame(data=data,index=index,columns=columns)
df
- 使用tuple
data = np.random.randint(0,100,size=(6,6))
# 行索引
index = pd.MultiIndex.from_tuples(
(
("1班","张三"),("1班","李四"),("1班","王五"),
("2班","鲁班"),("2班","张三丰"),("2班","张无忌")
)
)
# 列索引
columns = [
["期中","期中","期中","期末","期末","期末"],
["语文","数学","英语","语文","数学","英语"]
]
df = pd.DataFrame(data=data,index=index,columns=columns)
df
- 使用product
笛卡尔积:{a,b}{c,d} ==> {a,c},{a,d},{b,c},{b,d}
data = np.random.randint(0,100,size=(6,6))
# 行索引
index = pd.MultiIndex.from_product([
["1班","2班"],
["张三","李四","王五"]
])
# 列索引
columns = [
["期中","期中","期中","期末","期末","期末"],
["语文","数学","英语","语文","数学","英语"]
]
df = pd.DataFrame(data=data,index=index,columns=columns)
df
二、多层列索引
除了行索引index,列索引columns也能用同样的方法创建多层索引
- 使用数组
data = np.random.randint(0,100,size=(6,6))
# 行索引
index = pd.MultiIndex.from_arrays([
["1班","1班","1班","2班","2班","2班"],
["张三","李四","王五","鲁班","张三丰","张无忌"]
])
# 列索引
columns = pd.MultiIndex.from_arrays([
["期中","期中","期中","期末","期末","期末"],
["语文","数学","英语","语文","数学","英语"]
])
df = pd.DataFrame(data=data,index=index,columns=columns)
df
- 使用tuple
data = np.random.randint(0,100,size=(6,6))
# 行索引
index = pd.MultiIndex.from_tuples(
(
("1班","张三"),("1班","李四"),("1班","王五"),
("2班","鲁班"),("2班","张三丰"),("2班","张无忌")
)
)
# 列索引
columns = pd.MultiIndex.from_tuples(
(
("期中","语文"),("期中","数学"),("期中","英语"),
("期末","语文"),("期末","数学"),("期末","英语")
)
)
df = pd.DataFrame(data=data,index=index,columns=columns)
df
- 使用product
data = np.random.randint(0,100,size=(6,6))
# 行索引
index = pd.MultiIndex.from_product([
["1班","2班"],
["张三","李四","王五"]
])
# 列索引
columns = pd.MultiIndex.from_product([
["期中","期末"],
["语文","数学","英语"]
])
df = pd.DataFrame(data=data,index=index,columns=columns)
df
© 版权声明
文章版权归作者所有,未经允许请勿转载。如内容涉嫌侵权,请在本页底部进入<联系我们>进行举报投诉!
THE END














暂无评论内容