使用 HuggingFace 实习本地部署模型

使用 HuggingFace 实习本地部署模型. 本文以Gryphe/MythoMix-L2-13b 模型为例.

# 下载模型

# 安装Git插件
❯ brew install git-xet
❯ git xet install
git-xet installed to global config!
# 下载(需要在HuggingFace配置Git的公钥文件)
❯ git clone git@hf.co:Gryphe/MythoMix-L2-13b


# ============================================================
# 模型在 ~/.cache/huggingface/hub 目录
❯ uvx hf download Gryphe/MythoMix-L2-13b
❯ uvx hf download LiquidAI/LFM2-2.6B-Exp
Downloading (incomplete total...): 0.00B [00:00, ?B/s]                                                                           Warning: You are sending unauthenticated requests to the HF Hub. Please set a HF_TOKEN to enable higher rate limits and faster downloads.
Fetching 12 files: 100%|██████████████████████████████████████████████████████████████████████████| 12/12 [07:54<00:00, 39.53s/it]
Download complete: : 5.14GB [07:54, 2.93MB/s]              ~/.cache/huggingface/hub/models--LiquidAI--LFM2-2.6B-Exp/snapshots/b4a1d375c7daaeb50bf495a950cc9604b5a5d005
Download complete: : 5.14GB [07:54, 10.8MB/s]

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16

# 使用模型

# 构建项目 & 添加所需依赖

❯ uv init local-llm-demo
❯ uv add transformers datasets evaluate accelerate

1
2

# 使用模型

from transformers import pipeline, AutoTokenizer, AutoModel, AutoConfig, AutoModelForCausalLM


def main():
    # test()
    test_llm("Qwen/Qwen3-4B-Instruct-2507")


def test_llm(model_name: str):
    model = AutoModelForCausalLM.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
    result = generator("你好~")
    print(result)
    
    


def test():
    print("Hello from local-llm-demo!")
    classifier = pipeline("sentiment-analysis")
    result = classifier("I love this!")
    # print(result)

    model_name = "LiquidAI/LFM2-2.6B-Exp"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    # 对文本进行分词
    tokenized_text = tokenizer.tokenize("I love this!")
    # print(tokenized_text)
    
    # 对文本进行编码(文本向量化)
    encoding = tokenizer("We are very happy to show you the 🤗 Transformers library.")
    # print(encoding)

    # 对文本批量编码(文本向量化)
    pt_batch = tokenizer(
        ["We are very happy to show you the 🤗 Transformers library.", "We hope you don't hate it."],
        padding=True,
        truncation=True,
        max_length=512,
        return_tensors="pt",
    )
    print(pt_batch)

# uv run main.py
if __name__ == "__main__":
    main()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47

# 参考文章

https://huggingface.co/docs/transformers/zh/quicktour

Striveonger

使用 HuggingFace 实习本地部署模型

使用 HuggingFace 实习本地部署模型

# 下载模型

# 使用模型

# 构建项目 & 添加所需依赖

# 使用模型

# 参考文章