|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
- #定义训练模型
- class LLMModel(nn.Module):
- def __init__(self, cfg, config_path=None, pretrained=False):
- super().__init__()
- self.cfg = cfg
- if config_path is None:
- self.config = AutoConfig.from_pretrained(cfg.model, output_hidden_states=True)#
- self.config.hidden_dropout = 0.
- self.config.hidden_dropout_prob = 0.
- self.config.attention_dropout = 0.
- self.config.attention_probs_dropout_prob = 0.
- self.config.add_pooling_layer = False
- else:
- self.config = torch.load(config_path)
- if pretrained:
- self.model = AutoModel.from_pretrained(cfg.model, config=self.config)
- else:
- self.model = AutoModel.from_config(self.config)
-
- self.model.resize_token_embeddings(len(tokenizer))#
- if self.cfg.gradient_checkpointing:#使用梯度检查点技术时,模型不会保存前向传播计算的中间结果,减少占用内存
- self.model.gradient_checkpointing_enable()
- self.fc = nn.Linear(self.config.hidden_size, self.cfg.num_labels)
- self._init_weights(self.fc)
-
- def _init_weights(self, module):
- if isinstance(module, nn.Linear):
- module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
- if module.bias is not None:
- module.bias.data.zero_()
- elif isinstance(module, nn.Embedding):
- module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
- if module.padding_idx is not None:
- module.weight.data[module.padding_idx].zero_()
- elif isinstance(module, nn.LayerNorm):
- module.bias.data.zero_()
- module.weight.data.fill_(1.0)
-
- def feature(self, input_ids, attention_mask, token_type_ids):
- outputs = self.model( input_ids, attention_mask, token_type_ids)
- last_hidden_states = outputs[0] #encoder最后一个隐藏状态的输出传递给decoder做cross attention
- feature = last_hidden_states[:, 0, :] ## CLS token
- return feature
- def forward(self, input_ids, attention_mask, token_type_ids):#attention_mask避免用注意力机制的时候关注到填充符
- feature = self.feature( input_ids, attention_mask, token_type_ids)
- output = self.fc(feature)
- return output.squeeze(-1)
复制代码
其中,- def feature(self, input_ids, attention_mask, token_type_ids):
- outputs = self.model( input_ids, attention_mask, token_type_ids)
- last_hidden_states = outputs[0] #encoder最后一个隐藏状态的输出传递给decoder做cross attention
- feature = last_hidden_states[:, 0, :] ## CLS token
- return feature
- def forward(self, input_ids, attention_mask, token_type_ids):#attention_mask避免用注意力机制的时候关注到填充符
- feature = self.feature( input_ids, attention_mask, token_type_ids)
- output = self.fc(feature)
- return output.squeeze(-1)
复制代码
feature函数最终返回的是什么形状的张量?然后将其作为最后输出全连接层的输入 |
|