Python筛选A股高价值股票并可视化展示

目的:通过从A股筛选高价值的股票(连续5年ROE大于15%),并进行TOP10标的的排序和可视化展示。


详细代码:

import baostock as bs
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from datetime import datetime
import time
import warnings
warnings.filterwarnings('ignore')

# 设置中文字体
matplotlib.rcParams['font.sans-serif'] = ['SimHei', 'Arial Unicode MS', 'DejaVu Sans']
matplotlib.rcParams['axes.unicode_minus'] = False

class ROEAnalyzer:
    """ROE分析器,筛选连续5年ROE>15%的股票"""
    
    def __init__(self, stocklist_file='all_stocklist_simple.csv'):
        self.stocklist_file = stocklist_file
        self.stock_list = []
        self.results = []
        self.login_success = False
        
    def load_stock_list(self):
        """加载股票列表"""
        print(f"正在加载股票列表: {self.stocklist_file}")
        
        try:
            df = pd.read_csv(self.stocklist_file, encoding='utf-8-sig')
            print(f"成功加载 {len(df)} 只股票")
            
            # 提取股票代码
            self.stock_list = df['股票代码'].tolist()
            
            return True
            
        except Exception as e:
            print(f"加载股票列表失败: {e}")
            return False
    
    def login_baostock(self):
        """登录baostock"""
        try:
            lg = bs.login()
            if lg.error_code == '0':
                print("登录成功")
                self.login_success = True
                return True
            else:
                print(f"登录失败: {lg.error_msg}")
                return False
        except Exception as e:
            print(f"登录异常: {e}")
            return False
    
    def logout_baostock(self):
        """登出baostock"""
        if self.login_success:
            bs.logout()
            print("已登出")
    
    def get_roe_data(self, stock_code, years):
        """获取指定股票在指定年份的ROE数据"""
        roe_data = {}
        
        for year in years:
            try:
                # 查询盈利能力数据
                rs = bs.query_profit_data(
                    code=stock_code,
                    year=year,
                    quarter=4  # 使用年报数据
                )
                
                if rs.error_code == '0':
                    data = rs.get_data()
                    if not data.empty and 'roeAvg' in data.columns:
                        value = data.iloc[0]['roeAvg']
                        if pd.notna(value):
                            # roeAvg字段是小数形式,如0.15表明15%
                            roe_data[year] = float(value)
                else:
                    # 静默处理查询失败
                    pass
                    
            except Exception as e:
                # 静默处理异常
                pass
            
            # 避免请求过快
            time.sleep(0.03)  # 稍微减少等待时间,加快整体速度
        
        return roe_data
    
    def analyze_stock_roe(self, stock_code, stock_name="未知", years=None):
        """分析单只股票的ROE数据"""
        if years is None:
            years = [2020, 2021, 2022, 2023, 2024]  # 使用2020-2024年数据
        
        # 获取ROE数据
        roe_data = self.get_roe_data(stock_code, years)
        
        # 检查是否所有年份都有数据
        if len(roe_data) < len(years):
            return None  # 数据不全
        
        # 检查是否所有年份ROE>0.15
        all_years_valid = True
        roe_values = []
        
        for year in years:
            if year in roe_data:
                roe = roe_data[year]
                roe_values.append(roe)
                
                if roe <= 0.15:  # 注意:这里是小数形式
                    all_years_valid = False
                    break
            else:
                all_years_valid = False
                break
        
        if all_years_valid:
            # 计算平均ROE(转换为百分比形式)
            avg_roe = (sum(roe_values) / len(roe_values)) * 100
            
            return {
                '股票代码': stock_code,
                '股票名称': stock_name,
                '平均ROE(%)': round(avg_roe, 2),
                'ROE数据': {year: round(roe_data[year] * 100, 2) for year in years},
                'ROE值列表': [round(roe * 100, 2) for roe in roe_values],
                '年份列表': years
            }
        
        return None
    
    def get_stock_name_from_code(self, stock_code):
        """根据股票代码获取股票名称"""
        # 尝试从原始数据中获取名称
        try:
            df = pd.read_csv(self.stocklist_file, encoding='utf-8-sig')
            match = df[df['股票代码'] == stock_code]
            if not match.empty:
                return match.iloc[0]['股票名称']
        except:
            pass
        
        # 如果无法从文件中获取,尝试从baostock获取
        try:
            rs = bs.query_stock_basic(code=stock_code)
            if rs.error_code == '0':
                data = rs.get_data()
                if not data.empty and 'code_name' in data.columns:
                    return data.iloc[0]['code_name']
        except:
            pass
        
        return "未知"
    
    def screen_high_roe_stocks(self):
        """筛选高ROE股票"""
        print("
开始筛选高ROE股票...")
        print("筛选条件: 连续5年(2020-2024)ROE>15%")
        print(f"分析股票数量: {len(self.stock_list)}")
        
        if not self.login_success:
            if not self.login_baostock():
                return []
        
        results = []
        total_stocks = len(self.stock_list)
        
        for i, stock_code in enumerate(self.stock_list):
            # 显示进度
            if i % 50 == 0:
                print(f"已分析 {i}/{total_stocks} 只股票 ({i/total_stocks*100:.1f}%),已找到 {len(results)} 只符合条件的股票")
            
            try:
                # 获取股票名称
                stock_name = self.get_stock_name_from_code(stock_code)
                
                # 分析ROE
                roe_info = self.analyze_stock_roe(stock_code, stock_name)
                
                if roe_info:
                    results.append(roe_info)
                    
                    # 每找到10只股票显示一次
                    if len(results) % 10 == 0:
                        print(f"  找到第{len(results)}只: {stock_code} {stock_name}, 平均ROE: {roe_info['平均ROE(%)']}%")
            
            except Exception as e:
                # 静默处理异常
                pass
        
        # 按平均ROE排序
        results.sort(key=lambda x: x['平均ROE(%)'], reverse=True)
        
        return results
    
    def save_results_to_csv(self, results, filename=None):
        """保存结果到CSV文件"""
        if not results:
            print("没有结果可以保存")
            return None
        
        # 准备数据
        data_list = []
        
        for stock in results:
            roe_data = stock['ROE数据']
            row = {
                '股票代码': stock['股票代码'],
                '股票名称': stock['股票名称'],
                '平均ROE(%)': stock['平均ROE(%)'],
                '2020年ROE(%)': roe_data.get(2020, ''),
                '2021年ROE(%)': roe_data.get(2021, ''),
                '2022年ROE(%)': roe_data.get(2022, ''),
                '2023年ROE(%)': roe_data.get(2023, ''),
                '2024年ROE(%)': roe_data.get(2024, ''),
            }
            data_list.append(row)
        
        # 创建DataFrame
        df = pd.DataFrame(data_list)
        
        # 设置文件名
        if filename is None:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            filename = f"high_roe_stocks_{timestamp}.csv"
        
        # 保存到CSV
        df.to_csv(filename, index=False, encoding='utf-8-sig')
        
        print(f"
筛选结果已保存到: {filename}")
        print(f"共找到 {len(df)} 只符合条件的股票")
        
        # 显示前10只股票
        print("
前10只高ROE股票:")
        print("-" * 100)
        print(f"{'排名':<5} {'股票代码':<12} {'股票名称':<20} {'平均ROE(%)':<12} {'2020':<8} {'2021':<8} {'2022':<8} {'2023':<8} {'2024':<8}")
        print("-" * 100)
        
        for i, stock in enumerate(results[:10], 1):
            roe_values = stock['ROE值列表']
            years = stock['年份列表']
            
            # 创建年份到ROE值的映射
            roe_dict = {}
            for year, roe in zip(years, roe_values):
                roe_dict[year] = roe
            
            print(f"{i:<5} {stock['股票代码']:<12} {stock['股票名称'][:18]:<20} "
                  f"{stock['平均ROE(%)']:<12.2f} "
                  f"{roe_dict.get(2020, ''):<8.2f} {roe_dict.get(2021, ''):<8.2f} "
                  f"{roe_dict.get(2022, ''):<8.2f} {roe_dict.get(2023, ''):<8.2f} "
                  f"{roe_dict.get(2024, ''):<8.2f}")
        
        return df
    
    def visualize_top_stocks(self, results, top_n=10):
        """可视化Top N股票"""
        if len(results) < top_n:
            print(f"只有 {len(results)} 只符合条件的股票,无法展示 {top_n} 只")
            top_n = min(len(results), 5)
        
        if top_n == 0:
            print("没有股票可以可视化")
            return
        
        top_stocks = results[:top_n]
        
        # 创建图表
        fig, axes = plt.subplots(2, 2, figsize=(16, 12))
        
        # 图表1: 平均ROE柱状图
        ax1 = axes[0, 0]
        stock_labels = []
        avg_roes = []
        
        for stock in top_stocks:
            # 简化显示名称
            if stock['股票名称'] != '未知':
                display_name = f"{stock['股票名称'][:10]}
({stock['股票代码']})"
            else:
                display_name = stock['股票代码']
            stock_labels.append(display_name)
            avg_roes.append(stock['平均ROE(%)'])
        
        bars = ax1.bar(range(len(stock_labels)), avg_roes, color='skyblue', edgecolor='black')
        ax1.set_xlabel('股票', fontsize=12)
        ax1.set_ylabel('平均ROE (%)', fontsize=12)
        ax1.set_title(f'连续5年ROE>15% Top {top_n} 股票 (2020-2024年)', fontsize=14, fontweight='bold')
        ax1.set_xticks(range(len(stock_labels)))
        ax1.set_xticklabels(stock_labels, rotation=45, ha='right', fontsize=10)
        ax1.axhline(y=15, color='red', linestyle='--', alpha=0.5, label='15%基准线')
        
        # 在柱子上添加数值
        for bar, roe in zip(bars, avg_roes):
            height = bar.get_height()
            ax1.text(bar.get_x() + bar.get_width()/2., height + 0.5,
                    f'{roe:.1f}%', ha='center', va='bottom', fontsize=10)
        
        # 图表2: ROE趋势图
        ax2 = axes[0, 1]
        years = [2020, 2021, 2022, 2023, 2024]
        
        # 只显示前5只的详细趋势
        for i, stock in enumerate(top_stocks[:5]):
            if len(stock['ROE值列表']) == len(years):
                ax2.plot(years, stock['ROE值列表'], 
                        marker='o', linewidth=2,
                        label=f"{stock['股票名称'][:10]} ({stock['股票代码']})")
        
        ax2.set_xlabel('年份', fontsize=12)
        ax2.set_ylabel('ROE (%)', fontsize=12)
        ax2.set_title('Top 5 股票ROE年度趋势', fontsize=14, fontweight='bold')
        ax2.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
        ax2.grid(True, alpha=0.3)
        ax2.axhline(y=15, color='red', linestyle='--', alpha=0.5, label='15%基准线')
        
        # 图表3: ROE分布直方图
        ax3 = axes[1, 0]
        all_roes = []
        for stock in results:
            all_roes.append(stock['平均ROE(%)'])
        
        ax3.hist(all_roes, bins=20, color='lightgreen', edgecolor='black', alpha=0.7)
        ax3.set_xlabel('平均ROE (%)', fontsize=12)
        ax3.set_ylabel('股票数量', fontsize=12)
        ax3.set_title(f'全部 {len(results)} 只股票平均ROE分布', fontsize=14, fontweight='bold')
        ax3.axvline(x=15, color='red', linestyle='--', alpha=0.5, label='15%基准线')
        ax3.legend()
        
        # 图表4: 板块分布
        ax4 = axes[1, 1]
        
        # 统计各板块数量
        sector_counts = {
            '上证主板': 0,
            '科创板': 0,
            '深证主板': 0,
            '创业板': 0
        }
        
        for stock in results:
            code = stock['股票代码']
            if code.startswith('sh.6') and not code.startswith('sh.688'):
                sector_counts['上证主板'] += 1
            elif code.startswith('sh.688'):
                sector_counts['科创板'] += 1
            elif code.startswith('sz.0'):
                sector_counts['深证主板'] += 1
            elif code.startswith('sz.3'):
                sector_counts['创业板'] += 1
        
        # 过滤掉数量为0的板块
        sectors = [k for k, v in sector_counts.items() if v > 0]
        counts = [v for v in sector_counts.values() if v > 0]
        colors = ['gold', 'lightcoral', 'lightskyblue', 'lightgreen']
        
        if sum(counts) > 0:
            ax4.pie(counts, labels=sectors, colors=colors[:len(sectors)], 
                   autopct='%1.1f%%', startangle=90, shadow=True)
            ax4.set_title('高ROE股票板块分布', fontsize=14, fontweight='bold')
        else:
            ax4.text(0.5, 0.5, '无板块数据', ha='center', va='center', fontsize=14)
            ax4.set_title('高ROE股票板块分布', fontsize=14, fontweight='bold')
        
        plt.tight_layout()
        
        # 保存图表
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        chart_filename = f"high_roe_stocks_chart_{timestamp}.png"
        plt.savefig(chart_filename, dpi=300, bbox_inches='tight')
        
        plt.show()
        
        print(f"
可视化图表已保存为: {chart_filename}")
        
        return chart_filename
    
    def run_analysis(self):
        """运行完整的分析流程"""
        print("=" * 80)
        print("A股连续5年ROE>15%股票筛选与分析程序")
        print("数据年份: 2020-2024年")
        print(f"股票列表: {self.stocklist_file}")
        print("=" * 80)
        
        # 加载股票列表
        if not self.load_stock_list():
            print("无法加载股票列表,程序退出")
            return
        
        # 登录baostock
        if not self.login_baostock():
            print("登录失败,程序退出")
            return
        
        try:
            # 筛选高ROE股票
            print(f"
开始分析所有 {len(self.stock_list)} 只股票...")
            print("注意:这可能需要较长时间,请耐心等待...")
            
            start_time = time.time()
            results = self.screen_high_roe_stocks()
            end_time = time.time()
            
            print(f"
筛选完成!耗时: {end_time-start_time:.1f}秒")
            
            if results:
                # 保存结果到CSV
                df = self.save_results_to_csv(results)
                
                # 可视化Top 10
                self.visualize_top_stocks(results, top_n=10)
                
                # 显示更多统计信息
                print("
" + "=" * 60)
                print("统计摘要:")
                print("=" * 60)
                print(f"总分析股票数: {len(self.stock_list)}")
                print(f"符合条件的股票数: {len(results)}")
                print(f"筛选通过率: {len(results)/len(self.stock_list)*100:.2f}%")
                
                if len(results) > 0:
                    # ROE统计
                    roe_values = [stock['平均ROE(%)'] for stock in results]
                    print(f"平均ROE范围: {min(roe_values):.1f}% - {max(roe_values):.1f}%")
                    print(f"平均ROE中位数: {np.median(roe_values):.1f}%")
                    
                    # 板块分布
                    print("
板块分布:")
                    sector_counts = {}
                    for stock in results:
                        code = stock['股票代码']
                        if code.startswith('sh.6') and not code.startswith('sh.688'):
                            sector_counts['上证主板'] = sector_counts.get('上证主板', 0) + 1
                        elif code.startswith('sh.688'):
                            sector_counts['科创板'] = sector_counts.get('科创板', 0) + 1
                        elif code.startswith('sz.0'):
                            sector_counts['深证主板'] = sector_counts.get('深证主板', 0) + 1
                        elif code.startswith('sz.3'):
                            sector_counts['创业板'] = sector_counts.get('创业板', 0) + 1
                    
                    for sector, count in sector_counts.items():
                        print(f"  {sector}: {count}只 ({count/len(results)*100:.1f}%)")
            else:
                print("
未找到符合条件的股票")
                print("可能的缘由:")
                print("1. 2024年年报数据可能尚未完全公布")
                print("2. 部分股票可能某些年份数据缺失")
                print("3. 筛选条件较为严格")
                
                # 测试几只股票的ROE数据
                print("
测试几只股票的ROE数据...")
                test_codes = ['sh.600519', 'sz.000858', 'sz.000333']  # 茅台、五粮液、美的
                
                for test_code in test_codes:
                    test_name = self.get_stock_name_from_code(test_code)
                    test_result = self.analyze_stock_roe(test_code, test_name)
                    
                    if test_result:
                        print(f"
{test_code} {test_name}:")
                        print(f"  平均ROE: {test_result['平均ROE(%)']}%")
                        for year, roe in test_result['ROE数据'].items():
                            print(f"  {year}年: {roe}%")
                    else:
                        print(f"
{test_code}: 不符合条件或数据不全")
        
        finally:
            # 登出
            self.logout_baostock()


# 主程序
if __name__ == "__main__":
    print("=" * 80)
    print("A股连续5年ROE>15%股票筛选程序")
    print("说明:")
    print("1. 使用2020-2024年数据")
    print("2. 从all_stocklist_simple.csv读取所有股票")
    print("3. 筛选连续5年ROE>15%的股票")
    print("4. 保存结果并可视化前10名")
    print("=" * 80)
    
    # 运行完整分析
    analyzer = ROEAnalyzer()
    analyzer.run_analysis()

运行效果:

Python筛选A股高价值股票并可视化展示

Python筛选A股高价值股票并可视化展示

Python筛选A股高价值股票并可视化展示

总结:通过baostock调用分析筛选出110家ROE大于15%的高价值股票,存入文件夹(观察仓);并对top10标的进行可视化展示(核心标的),并对核心标的的行业分布进行区分。

欢迎大家关注,点赞,收藏; 有兴趣交流的小伙伴可以私信沟通哦。

© 版权声明

相关文章

暂无评论

none
暂无评论...