查看进程使用时间
ps -efp xxxx -o lstart,etime
在线检测ModelID
DS FAQ
挂代理
export http_proxy=http://agent.baidu.com:8891
export https_proxy=http://agent.baidu.com:8891
curl - AS
curl http://10.178.29.70:80/v2/chat/completions -H "Content-Type: application/json" -H "Authorization: Bearer bce-v3/ALTAK-seG05tfjoB4V83dl1I60P/df902ea834c2c21559ac5feb3a5720dc175c9733" -d '{"model": "aiak_bzz2_deepseek_v32_rd_test2", "safety_level":"none", "stream_options": {"include_usage": true}, "max_tokens": 1, "temperature": 0 ,"stream": true, "messages": [{"content":"why we say 42 is the end answer?", "role": "user"}]}'
curl 机内
curl http://127.0.0.1:8999/v1/completions -H 'Authorization: Bearer EMPTY' -H 'Content-Type: application/json' -d '{ "model": "deepseek_v32", "prompt": "今天几号?", "max_tokens": 1, "temperature": 0}'
get_models
kubectl get feddeployment | awk '{print $1}' | sed '1d' | xargs -I {} sh -c "echo {}; kubectl get feddeployment -oyaml {} | sed -n '/MODEL_ID/{n;p;}' | grep 'value'"
kubectl -n pd-test get feddeployment | awk '{ print $1 }' | sed '1d' | xargs -I {} sh -c "echo {}; kubectl -n pd-test get feddeployment -oyaml {} | sed -n '/MODEL_ID/{n;p;}' | grep 'value'"
docker 登录
docker login -u liyanzhen01 -p uuap@Onmyown599 iregistry.baidu-int.com
pip 内网 install
pip install -i http://mirrors.baidubce.com/pypi/simple/ --trusted-host=mirrors.baidubce.com
SGLANG 官方启动 PD 分离
nohup python3 -m sglang.launch_server --model-path /ssd1/models/Qwen3-30B-A3B --disaggregation-mode prefill --port 12345 --disaggregation-ib-device mlx5_bond_0 --disable-radix-cache > /tmp/prefill_sglang.log 2>&1 &
nohup python3 -m sglang.launch_server --model-path /ssd1/models/Qwen3-30B-A3B --disaggregation-mode decode --port 12346 --base-gpu 1 --disaggregation-ib-device mlx5_bond_0 --json-model-override-args '{"rope_scaling":{"rope_type":"yarn","factor":4.0,"original_max_position_embeddings":32768}}' --context-length 131072 > /tmp/decode_sglang.log 2>&1 &
nohup python3 -m sglang.srt.disaggregation.mini_lb --prefill http://127.0.0.1:12345 --decode http://127.0.0.1:12346 --host 0.0.0.0 --port 8081 > /tmp/sglang.log 2>&1 &
macOS 的docker build
docker buildx build --platform linux/amd64 \
-t iregistry.baidu-int.com/lyz/gpu-dev-env:latest \
--push \
-f ./scripts/Dockerfile .
循环测试不同输入的 P 的性能
for ifv in 131070 98300 32700 65530 32000 16380 13310 10240 5120; do
echo "Running with -if $ifv ..."
aiakperf -r openai_chat \
-a http://10.215.110.12:8000 \
-M aiak_deepseek_h200_v3_rd_test1 \
-m /ssd3/models/huggingface.co/deepseek-ai/DeepSeek-V3/DeepSeek-V3.2-Exp \
-w 1 -n 100 \
-if $ifv \
-of 1 \
-d chat \
-D ~/DATASET/aiak_dataset/144k.jsonl \
> ~/result_if_${ifv}.log 2>&1
done
占 GPU 算力避免被超发任务
git clone https://github.com/wilicc/gpu-burn.git
cd gpu-burn
# 2. 编译
make
# 3. 跑 10 分钟,把所有 GPU 都打满
./gpu_burn
导出冲突的DB二进制文件
git show :3:data.db > data_local.db
git show :2:data.db > data_remote.db
ls -ld /usr/local/cuda*
# 强制指定 CUDA 根目录为 12.9
export CUDA_HOME="/usr/local/cuda-12.9"
# 将 12.9 的编译器路径临时提到 PATH 的最前面
export PATH="${CUDA_HOME}/bin:${PATH}"
# 将 12.9 的库路径临时提到 LD_LIBRARY_PATH 的最前面
export LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}"