python每天一个小技巧—Day 8:collections 模块妙用

今天我们学习 Day 8:collections 模块妙用。这个模块提供了许多强劲的数据结构,能极大提升代码的效率和可读性。


深度解析:collections 模块

1.defaultdict:带默认值的字典

传统字典的问题:

# 需要手动检查键是否存在
word_count = {}
words = ["apple", "banana", "apple", "cherry", "banana", "apple"]

for word in words:
    if word in word_count:
        word_count[word] += 1
    else:
        word_count[word] = 1

print(word_count)  # 输出: {'apple': 3, 'banana': 2, 'cherry': 1}

使用 defaultdict:

from collections import defaultdict

word_count = defaultdict(int)  # 默认值为 0
words = ["apple", "banana", "apple", "cherry", "banana", "apple"]

for word in words:
    word_count[word] += 1  # 不需要检查键是否存在

print(dict(word_count))  # 输出: {'apple': 3, 'banana': 2, 'cherry': 1}

自定义默认值工厂:

# 默认值为空列表
list_dict = defaultdict(list)
list_dict['fruits'].append('apple')
list_dict['fruits'].append('banana')
print(list_dict)  # 输出: defaultdict(<class 'list'>, {'fruits': ['apple', 'banana']})

# 默认值为空集合
set_dict = defaultdict(set)
set_dict['colors'].add('red')
set_dict['colors'].add('blue')
set_dict['colors'].add('red')  # 重复元素不会被添加
print(set_dict)  # 输出: defaultdict(<class 'set'>, {'colors': {'red', 'blue'}})

2.Counter:计数器

基础计数:

from collections import Counter

words = ["apple", "banana", "apple", "cherry", "banana", "apple"]
word_count = Counter(words)

print(word_count)  # 输出: Counter({'apple': 3, 'banana': 2, 'cherry': 1})
print(word_count['apple'])  # 输出: 3
print(word_count['orange'])  # 输出: 0 (不会报错)

常用操作:

c = Counter(['a', 'b', 'c', 'a', 'b', 'a'])

print(c.most_common(2))  # 输出: [('a', 3), ('b', 2)]
print(sum(c.values()))   # 输出: 6 (总计数)

# 更新计数器
c.update(['a', 'b', 'd'])
print(c)  # 输出: Counter({'a': 4, 'b': 3, 'c': 1, 'd': 1})

# 数学运算
c1 = Counter(a=3, b=1)
c2 = Counter(a=1, b=2)
print(c1 + c2)   # 输出: Counter({'a': 4, 'b': 3})
print(c1 - c2)   # 输出: Counter({'a': 2})
print(c1 & c2)   # 输出: Counter({'a': 1, 'b': 1}) (交集: min(c1[x], c2[x]))
print(c1 | c2)   # 输出: Counter({'a': 3, 'b': 2}) (并集: max(c1[x], c2[x]))

3.namedtuple:命名元组

创建命名元组:

from collections import namedtuple

# 定义 Point 结构
Point = namedtuple('Point', ['x', 'y'])
p = Point(10, 20)

print(p.x, p.y)    # 输出: 10 20
print(p[0], p[1])  # 输出: 10 20 (依旧支持索引访问)

实用特性:

# 多种定义字段的方式
Person = namedtuple('Person', 'name age city')  # 字符串用空格分隔
Person = namedtuple('Person', ['name', 'age', 'city'])  # 列表
Person = namedtuple('Person', 'name, age, city')  # 字符串用逗号分隔

# 创建实例
person = Person('Alice', 25, 'New York')
print(f"{person.name}, {person.age}岁, 来自{person.city}")

# _asdict() 转换为字典
print(person._asdict())  # 输出: {'name': 'Alice', 'age': 25, 'city': 'New York'}

# _replace() 创建新实例(不可变对象的"修改")
new_person = person._replace(age=26, city='Boston')
print(new_person)  # 输出: Person(name='Alice', age=26, city='Boston')

4.deque:双端队列

from collections import deque

# 创建双端队列
d = deque([1, 2, 3])

# 两端操作
d.append(4)        # 右端添加: deque([1, 2, 3, 4])
d.appendleft(0)    # 左端添加: deque([0, 1, 2, 3, 4])
d.pop()            # 右端移除: 4
d.popleft()        # 左端移除: 0

print(d)  # 输出: deque([1, 2, 3])

# 其他有用方法
d.extend([4, 5])       # 右端扩展: deque([1, 2, 3, 4, 5])
d.extendleft([0, -1])  # 左端扩展: deque([-1, 0, 1, 2, 3, 4, 5])
d.rotate(2)            # 向右旋转: deque([4, 5, -1, 0, 1, 2, 3])

5.OrderedDict:有序字典(Python 3.7+ 中普通dict已有序)

from collections import OrderedDict

# 保持插入顺序
od = OrderedDict()
od['z'] = 1
od['a'] = 2
od['c'] = 3

print(list(od.keys()))  # 输出: ['z', 'a', 'c']

# 移动元素到末尾
od.move_to_end('a')
print(list(od.keys()))  # 输出: ['z', 'c', 'a']

6. 实际应用场景

场景1:数据分组(defaultdict)

from collections import defaultdict

# 按首字母分组单词
words = ["apple", "banana", "cherry", "date", "elderberry", "fig"]
grouped = defaultdict(list)

for word in words:
    grouped[word[0]].append(word)

print(dict(grouped))
# 输出: {'a': ['apple'], 'b': ['banana'], 'c': ['cherry'], 'd': ['date'], 'e': ['elderberry'], 'f': ['fig']}

场景2:词频分析(Counter)

from collections import Counter
import re

text = "apple banana apple cherry banana apple date apple elderberry"
words = re.findall(r'w+', text.lower())

word_freq = Counter(words)
print("最常见单词:", word_freq.most_common(3))
# 输出: [('apple', 4), ('banana', 2), ('cherry', 1)]

# 计算字符频率
char_freq = Counter(text.replace(' ', ''))
print("最常见字符:", char_freq.most_common(5))

场景3:数据处理(namedtuple)

from collections import namedtuple

# 定义数据格式
Employee = namedtuple('Employee', ['id', 'name', 'department', 'salary'])

employees = [
    Employee(1, 'Alice', 'IT', 80000),
    Employee(2, 'Bob', 'HR', 65000),
    Employee(3, 'Charlie', 'IT', 90000)
]

# 计算部门平均薪资
dept_salaries = defaultdict(list)
for emp in employees:
    dept_salaries[emp.department].append(emp.salary)

for dept, salaries in dept_salaries.items():
    avg = sum(salaries) / len(salaries)
    print(f"{dept}部门平均薪资: ${avg:,.2f}")

7. 高级技巧

技巧1:嵌套 defaultdict

from collections import defaultdict

# 创建嵌套的defaultdict
nested = defaultdict(lambda: defaultdict(int))

data = [('A', 'X', 1), ('A', 'Y', 2), ('B', 'X', 3), ('A', 'X', 4)]

for category, subcategory, value in data:
    nested[category][subcategory] += value

print(dict(nested['A']))  # 输出: {'X': 5, 'Y': 2}

技巧2:Counter 的高级统计

from collections import Counter

# 找出出现次数最多的元素
data = [1, 2, 2, 3, 3, 3, 4, 4, 4, 4]
counter = Counter(data)

# 找出所有出现次数大于1的元素
common = {item: count for item, count in counter.items() if count > 1}
print(common)  # 输出: {2: 2, 3: 3, 4: 4}

# 计算频率百分比
total = sum(counter.values())
percentages = {item: count/total * 100 for item, count in counter.items()}
print(percentages)

今日练习

练习1:使用 defaultdict

# 统计句子中每个单词的长度分布
sentence = "the quick brown fox jumps over the lazy dog"
# 创建 {单词长度: [单词列表]} 的映射
# 你的代码 here

练习2:使用 Counter

# 分析文本,找出最常用的3个单词和最常用的3个字符
text = "Python is awesome and Python is powerful"
# 你的代码 here

练习3:使用 namedtuple

# 创建学生成绩记录系统
# 定义Student结构: name, math_score, english_score, science_score
# 计算每个学生的平均分和总分
# 你的代码 here

练习答案:

# 练习1答案:
from collections import defaultdict
sentence = "the quick brown fox jumps over the lazy dog"
length_map = defaultdict(list)
for word in sentence.split():
    length_map[len(word)].append(word)
print(dict(length_map))

# 练习2答案:
from collections import Counter
text = "Python is awesome and Python is powerful"
words = text.lower().split()
chars = list(text.replace(' ', '').lower())

word_counter = Counter(words)
char_counter = Counter(chars)

print("常用单词:", word_counter.most_common(3))
print("常用字符:", char_counter.most_common(3))

# 练习3答案:
from collections import namedtuple
Student = namedtuple('Student', ['name', 'math', 'english', 'science'])

students = [
    Student('Alice', 85, 92, 78),
    Student('Bob', 76, 88, 90),
    Student('Charlie', 92, 95, 89)
]

for student in students:
    total = student.math + student.english + student.science
    average = total / 3
    print(f"{student.name}: 总分{total}, 平均分{average:.1f}")

今日总结

  • defaultdict: 处理缺失键的字典,避免KeyError
  • Counter: 高效的计数工具,支持各种统计操作
  • namedtuple: 创建可读性强的数据结构
  • deque: 高效的双端队列操作
  • OrderedDict: 保持插入顺序的字典

选择指南:

  • 需要计数 → Counter
  • 需要处理缺失键 → defaultdict
  • 需要命名字段 → namedtuple
  • 需要高效队列操作 → deque

这些工具能让你的代码更简洁、高效和Pythonic!明天我们将学习上下文管理器与 with 语句

© 版权声明
THE END
如果内容对您有所帮助,就支持一下吧!
点赞0 分享
咖啡暖暖的的头像 - 鹿快
评论 共1条

请登录后发表评论