《BUILDING MATH AGENTS WITH MULTI-TURN ITERA- TIVE PREFERENCE LEARNING》
SFT
conda create -n MDP python=3.10.9 -y
conda activate MDPconda install nvidia/label/cuda-12.2.0::cuda-nvccpip3 install torch==2.1.2 torchvision torchaudio -i https://pypi.tuna.tsinghua.edu.cn/simplepip install /home/chenjh2/flash_attn-2.6.3+cu123torch2.1cxx11abiTRUE-cp310-cp310-linux_x86_64.whlpip install huggingface-hub==0.24.7 --no-cache-dir -i https://pypi.tuna.tsinghua.edu.cn/simplegit clone https://github.com/OpenAccess-AI-Collective/axolotl
cd axolotl
pip install -e . -i https://pypi.tuna.tsinghua.edu.cn/simplevim /src/axolotl/utils/bench.py
from pynvml import NVMLErrorgit clone https://github.com/lm-sys/FastChat.git
cd FastChat
pip install -e . -i https://pypi.tuna.tsinghua.edu.cn/simple
pip install pydantic==2.6.3 pydantic-core==2.16.3 pydantic-settings==2.2.1 -i https://pypi.tuna.tsinghua.edu.cn/simplepip install deepspeed -i https://pypi.tuna.tsinghua.edu.cn/simple
pip install wandb -i https://pypi.tuna.tsinghua.edu.cn/simplewandb login
901e4416d671cd0e712b05256590973d905828a7
huggingface-cli login
hf_hnEyGRuGEFpPQHdaPsNppKiIRjtQyTdYHqcd axolotl
CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" torchrun --nproc_per_node 8 --master_port 20001 -m axolotl.cli.train examples/gemma/qlora.yml
CUDA_VISIBLE_DEVICES="0" torchrun --nproc_per_node 1 --master_port 20001 -m axolotl.cli.train examples/gemma/qlora.yml
推理
M-DPO