Convert safetensors to GGUF


  • install python 3.10
  • install pip 25.1
  • prepare DeepSeek LLM models
  • prepare dev envs to make llama.cpp happy
  • converting safetensors to gguf format
  • quantization

install python 3.10

sudo apt install software-properties-common

sudo add-apt-repository ppa:deadsnakes/ppa

sudo apt update

sudo apt-cache policy python3.10

sudo apt install python3.10-venv python3.10-dev

ls /usr/bin/python*

sudo update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1

sudo update-alternatives --config python

python -V

sudo apt remove --purge python3.13

install pip 25.1

sudo apt remove python3-pip

sudo apt install python3.10-distutils

curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py

python --version

python get-pip.py

export PATH=~/.local/bin:$PATH

pip3 --version

prepare LLM models

DeepSeek-R1-Distill-Qwen-7B(15 GiB)

DeepSeek-R1-Distill-Qwen-1.5B(3.55 GiB)

prepare dev envs

cd ~/llama.cpp/

pip3 install multidict

pip3 install -r requirements.txt

converting safetensors to gguf

python convert_hf_to_gguf.py ~/DeepSeek-R1-Distill-Qwen-1.5B

$ ls -lah ~/DeepSeek-R1-Distill-Qwen-1.5B/DeepSeek-R1-Distill-Qwen-1.5B-F16.gguf

-rw-rw-r-- 1 3.4G 4 28 12:24 ~/DeepSeek-R1-Distill-Qwen-1.5B/DeepSeek-R1-Distill-Qwen-1.5B-F16.gguf

quantize from F16 to Q8_0

./llama-quantize ~/DeepSeek-R1-Distill-Qwen-1.5B/DeepSeek-R1-Distill-Qwen-1.5B-F16.gguf ~/DeepSeek-R1-Distill-Qwen-1.5B/DeepSeek-R1-Distill-Qwen-1.5B-Q8_0.gguf q8_0

$ls -lah ~ /DeepSeek-R1-Distill-Qwen-1.5B/DeepSeek-R1-Distill-Qwen-1.5B-Q8_0.gguf

-rw-rw-r-- 1 1.8G 4 30 09:00 ~/DeepSeek-R1-Distill-Qwen-1.5B/DeepSeek-R1-Distill-Qwen-1.5B-Q8_0.gguf