本文目录导读:
我来为您详细介绍几种Python结果统计的常见案例实现方法:
基础数据统计
1 统计列表元素出现次数
# 方法1:使用字典统计
def count_elements_basic(data):
"""基础版本:统计列表元素出现次数"""
count_dict = {}
for item in data:
count_dict[item] = count_dict.get(item, 0) + 1
return count_dict
# 方法2:使用collections.Counter
from collections import Counter
def count_elements_counter(data):
"""使用Counter统计"""
return Counter(data)
# 示例
scores = [85, 92, 78, 92, 85, 90, 85, 92, 88, 90]
print("基础统计:", count_elements_basic(scores))
print("Counter统计:", count_elements_counter(scores))
# 输出: Counter({85: 3, 92: 3, 90: 2, 78: 1, 88: 1})
考试成绩统计案例
def exam_statistics(scores):
"""考试成绩统计"""
n = len(scores)
if n == 0:
return {}
total = sum(scores)
average = total / n
# 排序
sorted_scores = sorted(scores)
# 中位数
if n % 2 == 0:
median = (sorted_scores[n//2 - 1] + sorted_scores[n//2]) / 2
else:
median = sorted_scores[n//2]
# 众数
from collections import Counter
counter = Counter(scores)
mode = counter.most_common(1)[0][0]
# 分数段统计
grade_distribution = {
'优秀(90-100)': sum(1 for s in scores if 90 <= s <= 100),
'良好(80-89)': sum(1 for s in scores if 80 <= s <= 89),
'中等(70-79)': sum(1 for s in scores if 70 <= s <= 79),
'及格(60-69)': sum(1 for s in scores if 60 <= s <= 69),
'不及格(<60)': sum(1 for s in scores if s < 60)
}
# 最高分和最低分
max_score = max(scores)
min_score = min(scores)
# 标准差
variance = sum((x - average) ** 2 for x in scores) / n
std_dev = variance ** 0.5
return {
'总分': total,
'平均分': round(average, 2),
'中位数': median,
'众数': mode,
'最高分': max_score,
'最低分': min_score,
'分数段分布': grade_distribution,
'标准差': round(std_dev, 2)
}
# 使用示例
exam_scores = [78, 92, 85, 60, 95, 88, 72, 80, 90, 55, 85, 92]
result = exam_statistics(exam_scores)
for key, value in result.items():
print(f"{key}: {value}")
文本分析统计
from collections import Counter
import re
def text_statistics(text):
"""文本统计分析"""
# 基本统计
char_count = len(text)
word_count = len(text.split())
sentence_count = len(re.split(r'[.!?]+', text))
# 单词频率统计
words = re.findall(r'\b\w+\b', text.lower())
word_freq = Counter(words)
# 最常见的10个单词
common_words = word_freq.most_common(10)
# 字符频率统计(忽略空白)
chars = [c.lower() for c in text if c.isalpha()]
char_freq = Counter(chars)
return {
'总字符数': char_count,
'总单词数': word_count,
'总句子数': sentence_count,
'平均单词长度': round(sum(len(w) for w in words) / len(words), 2) if words else 0,
'最常用单词': common_words,
'词汇量': len(word_freq),
'字符频率': dict(char_freq.most_common(5)) # 最常见的5个字符
}
# 示例
sample_text = "Python is a powerful programming language. It is widely used in data science and machine learning. Python's simplicity makes it a great choice for beginners."
text_stats = text_statistics(sample_text)
for key, value in text_stats.items():
print(f"{key}: {value}")
销售数据统计
def sales_statistics(sales_data):
"""销售数据统计"""
# 数据格式: [{'product': 'A', 'amount': 100, 'date': '2024-01-01'}, ...]
total_sales = len(sales_data)
total_revenue = sum(item['amount'] for item in sales_data)
# 按产品统计
product_stats = {}
for item in sales_data:
product = item['product']
if product not in product_stats:
product_stats[product] = {
'销量': 0,
'销售额': 0,
'平均价格': 0
}
product_stats[product]['销量'] += 1
product_stats[product]['销售额'] += item['amount']
# 计算平均价格
for product in product_stats:
stats = product_stats[product]
stats['平均价格'] = round(stats['销售额'] / stats['销量'], 2)
# 按月统计
monthly_sales = {}
for item in sales_data:
month = item['date'][:7] # 提取年月
if month not in monthly_sales:
monthly_sales[month] = 0
monthly_sales[month] += item['amount']
# 分析和排名
best_selling_product = max(product_stats.items(),
key=lambda x: x[1]['销量'])
highest_revenue_product = max(product_stats.items(),
key=lambda x: x[1]['销售额'])
return {
'总订单数': total_sales,
'总销售额': total_revenue,
'平均订单金额': round(total_revenue / total_sales, 2) if total_sales else 0,
'产品统计': product_stats,
'月度统计': monthly_sales,
'最畅销产品': best_selling_product[0],
'销售额最高产品': highest_revenue_product[0]
}
# 示例
sales_data = [
{'product': 'A', 'amount': 100, 'date': '2024-01-15'},
{'product': 'B', 'amount': 200, 'date': '2024-01-20'},
{'product': 'A', 'amount': 150, 'date': '2024-02-10'},
{'product': 'C', 'amount': 300, 'date': '2024-02-15'},
{'product': 'B', 'amount': 180, 'date': '2024-03-05'}
]
sales_result = sales_statistics(sales_data)
for key, value in sales_result.items():
print(f"\n{key}:")
if isinstance(value, dict):
for k, v in value.items():
print(f" {k}: {v}")
else:
print(f" {value}")
实用工具函数
import numpy as np
import pandas as pd
def advanced_statistics(data):
"""使用NumPy和Pandas进行高级统计"""
# 转换为numpy数组
array = np.array(data)
# 基本统计量
basic_stats = {
'均值': np.mean(array),
'中位数': np.median(array),
'标准差': np.std(array),
'方差': np.var(array),
'最小值': np.min(array),
'最大值': np.max(array),
'25%分位数': np.percentile(array, 25),
'75%分位数': np.percentile(array, 75)
}
# 使用pandas的describe方法
series = pd.Series(data)
pandas_stats = series.describe()
return {
'NumPy统计': basic_stats,
'Pandas描述统计': pandas_stats.to_dict()
}
# 示例
data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
print(advanced_statistics(data))
可视化统计结果
import matplotlib.pyplot as plt
def visualize_statistics(data, title="统计结果可视化"):
"""可视化统计结果"""
from collections import Counter
# 频数统计
counter = Counter(data)
items = list(counter.keys())
frequencies = list(counter.values())
# 创建图表
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
# 柱状图
axes[0].bar(items, frequencies, color='skyblue')
axes[0].set_title('频数分布')
axes[0].set_xlabel('类别')
axes[0].set_ylabel('频数')
# 饼图
axes[1].pie(frequencies, labels=items, autopct='%1.1f%%')
axes[1].set_title('占比分布')
plt.suptitle(title)
plt.tight_layout()
plt.show()
# 示例
data = ['A', 'B', 'A', 'C', 'B', 'A', 'D', 'C', 'A', 'B']
# visualize_statistics(data) # 取消注释以显示图表
这些案例涵盖了Python结果统计的常见应用场景,您可以根据具体需求选择合适的方法,也可以组合使用这些功能来构建更复杂的统计分析系统。
标签: 统计案例