搭建步骤:
- 拷贝本地模型,把下载好的Baichuan2-7B-Chat拷贝到models目录下
-
修改modules\models\base_model.py文件,class ModelType增加Baichuan
Baichuan
=
16
elif
"baichuan"
in
model_name_lower:
model_type
=
ModelType.Baichuan
-
修改modules\models\models.py文件,get_model方法增加ModelType.Baichuan
elif
model_type
=
=
ModelType.Baichuan:
from
.Baichuan
import
Baichuan_Client
model
=
Baichuan_Client(model_name, user_name
=
user_name)
-
增加modules\models\Baichuan.py文件
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
from
modelscope
import
snapshot_download, AutoModelForCausalLM, AutoTokenizer,GenerationConfig
from
transformers
import
AutoModelForCausalLM, AutoTokenizer
from
transformers.generation
import
GenerationConfig
import
logging
import
colorama
from
..index_func
import
*
from
..presets
import
*
from
..utils
import
*
from
.base_model
import
BaseLLMModel
from
..presets
import
MODEL_METADATA
from
datetime
import
datetime
class
Baichuan_Client(BaseLLMModel):
def
__init__(
self
, model_name, user_name
=
"")
-
>
None
:
super
().__init__(model_name
=
model_name, user
=
user_name)
import
torch
from
transformers
import
AutoModel, AutoTokenizer
global
CHATGLM_TOKENIZER, CHATGLM_MODEL
print
(
"__init__ Baichuan_Client"
)
if
CHATGLM_TOKENIZER
is
None
or
CHATGLM_MODEL
is
None
:
model_path
=
None
if
os.path.exists(
"models"
):
model_dirs
=
os.listdir(
"models"
)
if
model_name
in
model_dirs:
model_path
=
f
"models/{model_name}"
if
model_path
is
not
None
:
model_source
=
model_path
else
:
model_source
=
snapshot_download(f
"baichuan-inc/{model_name}"
, revision
=
'v1.0.4'
)
CHATGLM_TOKENIZER
=
AutoTokenizer.from_pretrained(
model_source, device_map
=
"auto"
, trust_remote_code
=
True
, torch_dtype
=
torch.float16
)
quantified
=
False
if
"int4"
in
model_name:
quantified
=
True
model
=
AutoModelForCausalLM.from_pretrained(
model_source, device_map
=
"auto"
, trust_remote_code
=
True
, torch_dtype
=
torch.float16
)
model.generation_config
=
GenerationConfig.from_pretrained(model_source)
model
=
model.
eval
()
CHATGLM_MODEL
=
model
def
_get_glm_style_input(
self
):
print
(
"_get_glm_style_input"
)
print
(f
"the history is: {self.history}"
)
history
=
[x[
"content"
]
for
x
in
self
.history]
query
=
history.pop()
print
(f
"the message is: {query}"
)
return
history, query
def
get_answer_at_once(
self
):
print
(
"get_answer_at_once"
)
history,query
=
self
._get_glm_style_input()
messages
=
[]
messages.append({
'role'
:
'user'
,
'content'
: query})
now
=
datetime.now()
print
(
"get_answer_at_once start"
+
"++++++++"
+
now.strftime(
"%Y-%m-%d %H:%M:%S"
))
response
=
CHATGLM_MODEL.chat(
CHATGLM_TOKENIZER, messages)
now
=
datetime.now()
print
(
"get_answer_at_once end"
+
"++++++++"
+
now.strftime(
"%Y-%m-%d %H:%M:%S"
))
print
(f
"the response is: {response}"
)
return
response,
len
(response)
def
get_answer_stream_iter(
self
):
history,query
=
self
._get_glm_style_input()
messages
=
[]
messages.append({
'role'
:
'user'
,
'content'
: query})
result
=
""
now
=
datetime.now()
print
(
"get_answer_stream_iter start"
+
"++++++++"
+
now.strftime(
"%Y-%m-%d %H:%M:%S"
))
for
response
in
CHATGLM_MODEL.chat(
CHATGLM_TOKENIZER,
messages
):
print
(f
"the response is: {response}"
)
result
+
=
response
yield
result
now
=
datetime.now()
print
(
"get_answer_stream_iter end"
+
"++++++++"
+
now.strftime(
"%Y-%m-%d %H:%M:%S"
))
- 答案回调开关控制get_answer_at_once、get_answer_stream_iter方法调用选择
- 执行效果