当前位置: 首页 > news >正文

Auto-Coder使用GPT-4o完成:在用TabPFN这个模型构建一个预测未来3天涨跌的分类任务

通过akshare库,获取股票数据,并生成TabPFN这个模型 可以识别、处理的格式,写一个完整的预处理示例,并构建一个预测未来 3 天股价涨跌的分类任务

用TabPFN这个模型构建一个预测未来 3 天股价涨跌的分类任务,进行预测并输出准确率

代码

import os
import sys
import pandas as pd
import akshare as ak
# try:
#     import akshare as ak
# except ImportError:
#     print("请先运行: pip install akshare")
#     sys.exit(1)def fetch_stock_data(stock_code="000001", start_date="20150101", end_date=None):"""获取股票历史行情数据,兼容不同akshare接口"""print(f"DEBUG: akshare version: {ak.__version__}")print(f"DEBUG: fetch_stock_data params: symbol={stock_code}, start_date={start_date}, end_date={end_date}")try:# 优先尝试 stock_zh_a_dailydf = ak.stock_zh_a_daily(symbol=stock_code, adjust="qfq")print("DEBUG: 使用 ak.stock_zh_a_daily 成功")print("DEBUG: Columns before rename:", df.columns.tolist())print("DEBUG: Head before rename:\n", df.head())# 若有 start_date/end_date,筛选if "date" in df.columns:df["date"] = pd.to_datetime(df["date"])if start_date:df = df[df["date"] >= pd.to_datetime(start_date)]if end_date:df = df[df["date"] <= pd.to_datetime(end_date)]df = df.sort_values("date").reset_index(drop=True)except Exception as e:print("ERROR: ak.stock_zh_a_daily() 调用异常,尝试 fallback 原接口")print(f"Exception: {e}")try:df = ak.stock_zh_a_hist(symbol=stock_code, period="daily", start_date=start_date, end_date=end_date, adjust="qfq")print("DEBUG: 使用 ak.stock_zh_a_hist 成功")print("DEBUG: Columns before rename:", df.columns.tolist())print("DEBUG: Head before rename:\n", df.head())except Exception as e2:print("ERROR: Exception occurred while fetching stock data by both interfaces!")print(f"Exception: {e2}")return pd.DataFrame()if df.empty:print("ERROR: Fetched DataFrame is empty! Check stock code, date range, or network/API issues.")return df# 兼容列名rename_map = {"日期": "date","开盘": "open","收盘": "close","最高": "high","最低": "low","成交量": "volume","成交额": "amount","振幅": "amplitude","涨跌幅": "pct_chg","涨跌额": "chg","换手率": "turnover"}for k in list(rename_map.keys()):if k not in df.columns:rename_map.pop(k)df = df.rename(columns=rename_map)if "date" in df.columns:df["date"] = pd.to_datetime(df["date"])df = df.sort_values("date").reset_index(drop=True)# 自动补充pct_chg列(涨跌幅),百分比格式if "pct_chg" not in df.columns and "close" in df.columns:df["pct_chg"] = df["close"].pct_change() * 100return dfdef create_features_and_labels(df, n_past=5, n_future=3):"""构造特征和标签,标签为未来3日涨跌(1=涨,0=跌/平)"""feats = []labels = []for idx in range(n_past, len(df) - n_future):past_slice = df.iloc[idx-n_past:idx]# 特征: 过去n_past日的收盘价、涨跌幅、成交量feature = []feature += list(past_slice["close"].values)feature += list(past_slice["pct_chg"].values)feature += list(past_slice["volume"].values)# 未来n_future日的收盘价均值future_close_mean = df.iloc[idx:idx+n_future]["close"].mean()curr_close = df.iloc[idx-1]["close"]# 涨跌标签: 未来3日均值 > 当前收盘价 => 1,否则0label = 1 if future_close_mean > curr_close else 0feats.append(feature)labels.append(label)feats_df = pd.DataFrame(feats, columns=[f"close_t-{i}" for i in range(n_past,0,-1)] + [f"pct_chg_t-{i}" for i in range(n_past,0,-1)] + [f"volume_t-{i}" for i in range(n_past,0,-1)])feats_df["label"] = labelsreturn feats_dfdef save_for_tabpfn(df, out_csv):"""保存为TabPFN模型可读取的csv格式"""df.to_csv(out_csv, index=False)print(f"已保存至: {out_csv}")print(df.head())def main():# 拉取A股代码表,打印前10条,辅助判断symbol格式print("尝试拉取A股代码表,辅助symbol格式判断...")try:code_df = ak.stock_info_a_code_name()print("A股代码表前10条:")print(code_df.head(10))print("平安银行相关行:")print(code_df[code_df["code"].str.contains("000001")])except Exception as e:print(f"拉取A股代码表失败: {e}")stock_code = "sz000001"  # 平安银行start_date = "20150101"print("正在获取股票数据...")df = fetch_stock_data(stock_code, start_date)print("正在生成特征与标签...")processed = create_features_and_labels(df, n_past=5, n_future=3)# out_csv = os.path.join(os.path.dirname(__file__), f"{stock_code}_tabpfn.csv")out_csv =  f"{stock_code}_tabpfn.csv"print("正在保存为TabPFN格式...")save_for_tabpfn(processed, out_csv)print("预处理完成。")if __name__ == "__main__":main()

预测

import os
# 临时绕过 /dev/null 权限问题
os.devnull = "/tmp/null"
if not os.path.exists("/tmp/null"):with open("/tmp/null", "w") as f:passimport sys
import numpy as np
import pandas as pd# 自动安装tabpfn(如未安装)
try:from tabpfn import TabPFNClassifier
except ImportError:import subprocesssubprocess.check_call([sys.executable, "-m", "pip", "install", "tabpfn"])from tabpfn import TabPFNClassifier# 读取数据
# DATA_PATH = os.path.join(os.path.dirname(__file__), "sz000001_tabpfn.csv")
DATA_PATH = "sz000001_tabpfn.csv"
data = pd.read_csv(DATA_PATH, header=None)
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values# 按时间顺序划分(前80%训练,后20%测试)
split_idx = int(0.8 * len(X))
X_train, X_test = X[:split_idx], X[split_idx:]
y_train, y_test = y[:split_idx], y[split_idx:]# TabPFN训练与预测
# clf = TabPFNClassifier(device='cpu')
clf = TabPFNClassifier(device='cuda')
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy = np.mean(y_pred == y_test)print(f"Test Accuracy: {accuracy:.4f}")

最后生成的准确率:

Test Accuracy: 0.4812

这个准确率低于50%,反而可能证明程序是对的。

http://www.lqws.cn/news/188677.html

相关文章:

  • MySQL 性能调优入门 - 慢查询分析与索引优化基础
  • 行业案例 | ASOS 借助 Azure AI Foundry(国际版)为年轻时尚爱好者打造惊喜体验
  • [electron]预脚本不显示内联script
  • AIGC 基础篇 Python基础 01
  • [特殊字符]解决 “IDEA 登录失败。不支持早于 14.0 的 GitLab 版本” 问题的几种方法
  • Halcon透视矩阵
  • 深入了解JavaScript当中如何确定值的类型
  • 经典文献阅读之--PIVOT-R(原始驱动的航点感知世界模型用于机器人操作)
  • 蓝桥春晚魔术(欧拉定理+快速幂)
  • 今日行情明日机会——20250606
  • 车牌识别技术解决方案
  • 大模型在脑梗塞后遗症风险预测及治疗方案制定中的应用研究
  • 使用VTK还是OpenGL集成到qt程序里哪个好?
  • 电脑桌面太单调,用Python写一个桌面小宠物应用。
  • 计算机视觉与深度学习 | 基于MATLAB的相机标定
  • 【 *p取出内容 a得到地址】
  • 服务器健康摩尔斯电码:深度解读S0-S5状态指示灯
  • 循环神经网络(RNN)
  • 增量式网络爬虫通用模板
  • Numpy5——数组的扩充(相加、复制、广播)排序,形状调整
  • RabbitMQ 学习
  • android debug包和release包的区别
  • EDA断供危机下的冷思考:中国芯片设计软件的破局之道优雅草卓伊凡
  • 关于dropbear ssh服务
  • MySQL基本操作
  • 构建SDK-C Docker镜像
  • 服务器中日志分析的作用都有哪些
  • Spring整合MyBatis的两种方式
  • 二分算法
  • 【免杀】C2免杀技术(十六)反沙箱/反调试