今天学习的是huggingface之中gpt对应的源代码学习,如果对于源代码想要正常运行的时候,需要先
pip install transformers
对应运行程序的源代码为:
from transformers import pipeline
generator = pipeline('text-generation',model='gpt2')
generator("Hello,I'm a language model,",max_length=30,num_return_sequences=5)
上来先判断一波是否存在相应的框架
if is_tf_available():
import tensorflow as tf
print('is_tf_available()')
from ..models.auto.modeling_tf_auto import (
TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING,
TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
TF_MODEL_WITH_LM_HEAD_MAPPING,
TFAutoModel,
TFAutoModelForCausalLM,
TFAutoModelForMaskedLM,
TFAutoModelForQuestionAnswering,
TFAutoModelForSeq2SeqLM,
TFAutoModelForSequenceClassification,
TFAutoModelForTokenClassification,
)
if is_torch_available():
import torch
print('is torch available()')
from ..models.auto.modeling_auto import (
MODEL_FOR_MASKED_LM_MAPPING,
MODEL_FOR_QUESTION_ANSWERING_MAPPING,
MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING,
MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
AutoModel,
AutoModelForCausalLM,
AutoModelForMaskedLM,
AutoModelForQuestionAnswering,
AutoModelForSeq2SeqLM,
AutoModelForSequenceClassification,
AutoModelForTableQuestionAnswering,
AutoModelForTokenClassification,
)
这里我没装tensorflow,所以直接输出的为is torch available()
接下来运行
targeted_task,task_options = check_task(task)
得到对应的
targeted_task =
{'impl': <class 'transformers.pipelines.text_generation.TextGenerationPipeline'>, 'tf': None, 'pt': <class 'transformers.models.auto.modeling_auto.AutoModelForCausalLM'>, 'default': {'model': {'pt': 'gpt2', 'tf': 'gpt2'}}}
task_options = None
接下来运行对应的model内容
if model is None:
model = get_default_model(targeted_task,framework,task_options)
得到model = ‘gpt2’
framework = framework or get_framework(model)
得到framework = pt
task_class, model_class = targeted_task["impl"], targeted_task[framework]
得到
model_class =
<class 'transformers.models.auto.modeling_auto.AutoModelForCausalLM'>
task_class =
<class 'transformers.pipelines.text_generation.TextGenerationPipeline'>
if tokenizer is None:
if isinstance(model, str):
tokenizer = model
#situation1,tokenizer = distilbert-base-cased-distilled-squad
elif isinstance(config, str):
tokenizer = config
else:
# Impossible to guest what is the right tokenizer here
raise Exception(
"Impossible to guess which tokenizer to use. "
"Please provided a PretrainedTokenizer class or a path/identifier to a pretrained tokenizer."
)
输出对应的tokenizer = ‘gpt’,
接下来运行调用pretrained的内容
if isinstance(tokenizer, (str, tuple)):
if isinstance(tokenizer, tuple):
# For tuple we have (tokenizer name, {kwargs})
use_fast = tokenizer[1].pop("use_fast", use_fast)
tokenizer = AutoTokenizer.from_pretrained(
tokenizer[0], use_fast=use_fast, revision=revision, **tokenizer[1]
)
else:
tokenizer = AutoTokenizer.from_pretrained(tokenizer, revision=revision, use_fast=use_fast)
这里调用else中对应的tokenizer的取值
tokenizer2 = PreTrainedTokenizerFast(name_or_path='gpt2', vocab_size=50257, model_max_len=1024, is_fast=True, padding_side='right', special_tokens={'bos_token': '<|endoftext|>', 'eos_token': '<|endoftext|>', 'unk_token': '<|endoftext|>'})
接下来获取对应的modelcard内容
if isinstance(modelcard,str):
modelcard = ModelCard.from_pretrained(modelcard,revision=revision)
得到对应的modelcard
modelcard =
{
"caveats_and_recommendations": {},
"ethical_considerations": {},
"evaluation_data": {},
"factors": {},
"intended_use": {},
"metrics": {},
"model_details": {},
"quantitative_analyses": {},
"training_data": {}
}
接下来运行下面可能需要初始化模型的部分:
if isinstance(model, str):
# Handle transparent TF/PT model conversion
model_kwargs = {
}
if framework == "pt" and model.endswith(".h5"):
model_kwargs["from_tf"] = True
logger.warning(
"Model might be a TensorFlow model (ending with `.h5`) but TensorFlow is not available. "
"Trying to load the model with PyTorch."
)
elif framework == "tf" and model.endswith(".bin"):
model_kwargs["from_pt"] = True
logger.warning(
"Model might be a PyTorch model (ending with `.bin`) but PyTorch is not available. "
"Trying to load the model with Tensorflow."
)
if model_class is None:
raise ValueError(
f"Pipeline using {framework} framework, but this framework is not supported by this pipeline."
)
#model1 = distilbert-base-cased-distilled-squad
model = model_class.from_pretrained(model, config=config, revision=revision, **model_kwargs)
这一波操作之后,得到对应的model_class的内容
model_class = <class 'transformers.models.auto.modeling_auto.AutoModelForCausalLM‘>
接着调用如下语句
model = model_class.from_pretrained(model,config=config,revision=revision,**model_kwargs)
得到对应的具体model类型
GPT2LMHeadModel(
(transformer): GPT2Model(
(wte): Embedding(50257, 768)
(wpe): Embedding(1024, 768)
(drop): Dropout(p=0.1, inplace=False)
(h): ModuleList(
(0): Block(
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(attn): Attention(
(c_attn): Conv1D()
(c_proj): Conv1D()
(attn_dropout): Dropout(p=0.1, inplace=False)
(resid_dropout): Dropout(p=0.1, inplace=False)
)
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(mlp): MLP(
(c_fc): Conv1D()
(c_proj): Conv1D()
(dropout): Dropout(p=0.1, inplace=False)
)
)
(1): Block(
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(attn): Attention(
(c_attn): Conv1D()
(c_proj): Conv1D()
(attn_dropout): Dropout(p=0.1, inplace=False)
(resid_dropout): Dropout(p=0.1, inplace=False)
)
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(mlp): MLP(
(c_fc): Conv1D()
(c_proj): Conv1D()
(dropout): Dropout(p=0.1, inplace=False)
)
)
(2): Block(
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(attn): Attention(
(c_attn): Conv1D()
(c_proj): Conv1D()
(attn_dropout): Dropout(p=0.1, inplace=False)
(resid_dropout): Dropout(p=0.1, inplace=False)
)
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(mlp): MLP(
(c_fc): Conv1D()
(c_proj): Conv1D()
(dropout): Dropout(p=0.1, inplace=False)
)
)
(3): Block(
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(attn): Attention(
(c_attn): Conv1D()
(c_proj): Conv1D()
(attn_dropout): Dropout(p=0.1, inplace=False)
(resid_dropout): Dropout(p=0.1, inplace=False)
)
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(mlp): MLP(
(c_fc): Conv1D()
(c_proj): Conv1D()
(dropout): Dropout(p=0.1, inplace=False)
)
)
(4): Block(
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(attn): Attention(
(c_attn): Conv1D()
(c_proj): Conv1D()
(attn_dropout): Dropout(p=0.1, inplace=False)
(resid_dropout): Dropout(p=0.1, inplace=False)
)
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(mlp): MLP(
(c_fc): Conv1D()
(c_proj): Conv1D()
(dropout): Dropout(p=0.1, inplace=False)
)
)
(5): Block(
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(attn): Attention(
(c_attn): Conv1D()
(c_proj): Conv1D()
(attn_dropout): Dropout(p=0.1, inplace=False)
(resid_dropout): Dropout(p=0.1, inplace=False)
)
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(mlp): MLP(
(c_fc): Conv1D()
(c_proj): Conv1D()
(dropout): Dropout(p=0.1, inplace=False)
)
)
(6): Block(
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(attn): Attention(
(c_attn): Conv1D()
(c_proj): Conv1D()
(attn_dropout): Dropout(p=0.1, inplace=False)
(resid_dropout): Dropout(p=0.1, inplace=False)
)
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(mlp): MLP(
(c_fc): Conv1D()
(c_proj): Conv1D()
(dropout): Dropout(p=0.1, inplace=False)
)
)
(7): Block(
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(attn): Attention(
(c_attn): Conv1D()
(c_proj): Conv1D()
(attn_dropout): Dropout(p=0.1, inplace=False)
(resid_dropout): Dropout(p=0.1, inplace=False)
)
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(mlp): MLP(
(c_fc): Conv1D()
(c_proj): Conv1D()
(dropout): Dropout(p=0.1, inplace=False)
)
)
(8): Block(
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(attn): Attention(
(c_attn): Conv1D()
(c_proj): Conv1D()
(attn_dropout): Dropout(p=0.1, inplace=False)
(resid_dropout): Dropout(p=0.1, inplace=False)
)
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(mlp): MLP(
(c_fc): Conv1D()
(c_proj): Conv1D()
(dropout): Dropout(p=0.1, inplace=False)
)
)
(9): Block(
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(attn): Attention(
(c_attn): Conv1D()
(c_proj): Conv1D()
(attn_dropout): Dropout(p=0.1, inplace=False)
(resid_dropout): Dropout(p=0.1, inplace=False)
)
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(mlp): MLP(
(c_fc): Conv1D()
(c_proj): Conv1D()
(dropout): Dropout(p=0.1, inplace=False)
)
)
(10): Block(
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(attn): Attention(
(c_attn): Conv1D()
(c_proj): Conv1D()
(attn_dropout): Dropout(p=0.1, inplace=False)
(resid_dropout): Dropout(p=0.1, inplace=False)
)
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(mlp): MLP(
(c_fc): Conv1D()
(c_proj): Conv1D()
(dropout): Dropout(p=0.1, inplace=False)
)
)
(11): Block(
(ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(attn): Attention(
(c_attn): Conv1D()
(c_proj): Conv1D()
(attn_dropout): Dropout(p=0.1, inplace=False)
(resid_dropout): Dropout(p=0.1, inplace=False)
)
(ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(mlp): MLP(
(c_fc): Conv1D()
(c_proj): Conv1D()
(dropout): Dropout(p=0.1, inplace=False)
)
)
)
(ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
)
(lm_head): Linear(in_features=768, out_features=50257, bias=False)
)
此时我们需要查看上一个函数调用的内容,因为这里给出了模型定义的各个层,所以想要看到模型定义的过程
model = model_class.from_pretrained(model,config=config,revision=revision,**model_kwargs)
这里的model_class = <class ‘transformers.models.auto.modeling_auto.AutoModelForCausalLM’>
,所以我们进入到modeling_auto.AutoModelForCasualLM类之中去查看
先去阅读一下AutoModelForCausalLM中相应的注释
This is a generic model class that will be instantiated as one of the
model classes of the library
可以读出这个对应的模型为生成模型
接下来我们使用AutoConfig.from_pretrained函数调用对应的config以及kwargs参数
config,kwargs = AutoConfig.from_pretrained(pretrained_model_name_or_path,return_unused_kwargs=True)
获取相应的参数
config = GPT2Config {
"activation_function": "gelu_new",
"architectures": [
"GPT2LMHeadModel"
],
"attn_pdrop": 0.1,
"bos_token_id": 50256,
"embd_pdrop": 0.1,
"eos_token_id": 50256,
"gradient_checkpointing": false,
"initializer_range": 0.02,
"layer_norm_epsilon": 1e-05,
"model_type": "gpt2",
"n_ctx": 1024,
"n_embd": 768,
"n_head": 12,
"n_inner": null,
"n_layer": 12,
"n_positions": 1024,
"resid_pdrop": 0.1,
"summary_activation": null,
"summary_first_dropout": 0.1,
"summary_proj_to_labels": true,
"summary_type": "cls_index",
"summary_use_proj": true,
"task_specific_params": {
"text-generation": {
"do_sample": true,
"max_length": 50
}
},
"transformers_version": "4.2.2",
"use_cache": true,
"vocab_size": 50257
}
kwargs = {'revision':None}
这里面对应参数的调用通过configuration_auto.py中的AutoConfig类进行相应的调用,先跳过这部分参数的配置,接着往下看
if type(config) in MODEL_FOR_CAUSAL_LM_MAPPING.keys():
return MODEL_FOR_CAUSAL_LM_MAPPING[type(config)].from_pretrained(
pretrained_model_name_or_path, *model_args, config=config, **kwargs
)
raise ValueError(
"Unrecognized configuration class {} for this kind of AutoModel: {}.\n"
"Model type should be one of {}.".format(
config.__class__, cls.__name__, ", ".join(c.__name__ for c in MODEL_FOR_CAUSAL_LM_MAPPING.keys())
)
)
首先这里的type(config) = <class ‘transformers.models.gpt2.configuration_gpt2.GPT2Config’>,而MODEL_FOR_CAUSAL_LM_MAPPING的字典中对应项目为
MODEL_FOR_CAUSAL_LM_MAPPING = OrderedDict(
[
(GPT2Config,GPT2LMHeadModel)
]
)
然后调用
return MODEL_FOR_CAUSAL_LM_MAPPING[type(config)].from_pretrained(
pretrained_model_name_or_path,*model_args,config=config,**kwargs
)
MODEL_FOR_CAUSAL_LM_MAPPING[type(config)]对应到的模型为GPT2LMHeadModel,这里到GPT2LMHeadModel.from_pretrained方法中去查看对应的调用
转载:https://blog.csdn.net/znevegiveup1/article/details/115785197