使用 HuggingFace 实习本地部署模型
Mr.Lee 2025-12-28 16:33:23 AIHuggingFace
使用 HuggingFace 实习本地部署模型. 本文以Gryphe/MythoMix-L2-13b 模型为例.
# 下载模型
# 安装Git插件
❯ brew install git-xet
❯ git xet install
git-xet installed to global config!
# 下载(需要在HuggingFace配置Git的公钥文件)
❯ git clone git@hf.co:Gryphe/MythoMix-L2-13b
# ============================================================
# 模型在 ~/.cache/huggingface/hub 目录
❯ uvx hf download Gryphe/MythoMix-L2-13b
❯ uvx hf download LiquidAI/LFM2-2.6B-Exp
Downloading (incomplete total...): 0.00B [00:00, ?B/s] Warning: You are sending unauthenticated requests to the HF Hub. Please set a HF_TOKEN to enable higher rate limits and faster downloads.
Fetching 12 files: 100%|██████████████████████████████████████████████████████████████████████████| 12/12 [07:54<00:00, 39.53s/it]
Download complete: : 5.14GB [07:54, 2.93MB/s] ~/.cache/huggingface/hub/models--LiquidAI--LFM2-2.6B-Exp/snapshots/b4a1d375c7daaeb50bf495a950cc9604b5a5d005
Download complete: : 5.14GB [07:54, 10.8MB/s]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# 使用模型
# 构建项目 & 添加所需依赖
❯ uv init local-llm-demo
❯ uv add transformers datasets evaluate accelerate
1
2
2
# 使用模型
from transformers import pipeline, AutoTokenizer, AutoModel, AutoConfig, AutoModelForCausalLM
def main():
# test()
test_llm("Qwen/Qwen3-4B-Instruct-2507")
def test_llm(model_name: str):
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
result = generator("你好~")
print(result)
def test():
print("Hello from local-llm-demo!")
classifier = pipeline("sentiment-analysis")
result = classifier("I love this!")
# print(result)
model_name = "LiquidAI/LFM2-2.6B-Exp"
tokenizer = AutoTokenizer.from_pretrained(model_name)
# 对文本进行分词
tokenized_text = tokenizer.tokenize("I love this!")
# print(tokenized_text)
# 对文本进行编码(文本向量化)
encoding = tokenizer("We are very happy to show you the 🤗 Transformers library.")
# print(encoding)
# 对文本批量编码(文本向量化)
pt_batch = tokenizer(
["We are very happy to show you the 🤗 Transformers library.", "We hope you don't hate it."],
padding=True,
truncation=True,
max_length=512,
return_tensors="pt",
)
print(pt_batch)
# uv run main.py
if __name__ == "__main__":
main()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# 参考文章
- https://huggingface.co/docs/transformers/zh/quicktour