---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
Cell In[41], line 4
1 print('Training Start!')
2 print('=' * 100)
----> 4 train(model,
5 device,
6 train_dataloader,
7 valid_dataloader,
8 CFG.epochs,
9 loss_fn,
10 optimizer,
11 metric)
13 del model,train_dataloader, valid_dataloader
14 gc.collect()
Cell In[40], line 25, in train(model, device, train_dataloader, valid_dataloader, epochs, loss_fn, optimizer, metric)
23 train_attention_mask = batch['attention_mask'].to(device)
24 train_token_type_ids = batch['token_type_ids'].to(device)
---> 25 train_labels = batch['labels'].squeeze().to(device).long()#label真实值long()转化成一维张量
27 # You can refer to the class "TweetsModel" for understand
28 # what would be logits
29 logits = model(train_input_ids, train_attention_mask,train_token_type_ids).to(device)
KeyError: 'labels'
报错信息如上,代码如下print('Training Start!')
print('=' * 100)
train(model,
device,
train_dataloader,
valid_dataloader,
CFG.epochs,
loss_fn,
optimizer,
metric)
del model,train_dataloader, valid_dataloader
gc.collect()
import gc,os
from tqdm.auto import tqdm # visualizing tool for progress
# They will be used to pick the best model.pt given to the valid loss
best_model_epoch, valid_loss_values = [],[]
valid_loss_min = [1] # arbitrary loss I set here
def train(model,device,train_dataloader,valid_dataloader,epochs,loss_fn,optimizer,metric):
for epoch in range(epochs):
gc.collect() # memory cleaning垃圾回收机制,减少占用内存
model.train()
train_loss = 0
train_step = 0
pbar = tqdm(train_dataloader, total=len(train_dataloader))#tqdm参数是一个iterable
for batch in pbar: # you can also write like "for batch in tqdm(train_dataloader"
optimizer.zero_grad() # initialize
train_step += 1
train_input_ids = batch['input_ids'].to(device)#batch是一个字典
train_attention_mask = batch['attention_mask'].to(device)
train_token_type_ids = batch['token_type_ids'].to(device)
train_labels = batch['labels'].squeeze().to(device).long()#label真实值long()转化成一维张量
# You can refer to the class "TweetsModel" for understand
# what would be logits
logits = model(train_input_ids, train_attention_mask,train_token_type_ids).to(device)
predictions = torch.argmax(logits, dim=1) # get an index from larger one
detached_predictions = predictions.detach().cpu().numpy()
loss = loss_fn(logits, train_labels)
loss.backward()
optimizer.step()
model.zero_grad()
train_loss += loss.detach().cpu().numpy().item()
pbar.set_postfix({'train_loss':train_loss/train_step})#设置进度条显示信息
pbar.close()
with torch.no_grad():
model.eval()
valid_loss = 0
valid_step = 0
total_valid_score = 0
y_pred = [] # for getting f1_score that is a metric of the competition
y_true = []
pbar = tqdm(valid_dataloader)
for batch,labels in pbar:
valid_step += 1
valid_input_ids = batch['input_ids'].to(device)
valid_attention_mask = batch['attention_mask'].to(device)
valid_token_type_ids = batch['token_type_ids'].to(device)
valid_labels = batch['labels'].squeeze().to(device).long()
logits = model(valid_input_ids, valid_attention_mask).to(device)
predictions = torch.argmax(logits, dim=1)
detached_predictions = predictions.detach().cpu().numpy()
loss = loss_fn(logits, valid_labels)
valid_loss += loss.detach().cpu().numpy().item()
y_pred.extend(predictions.cpu().numpy())
y_true.extend(valid_labels.cpu().numpy())
valid_loss /= valid_step
f1 = f1_score(y_true,y_pred)
print(f'Epoch [{epoch+1}/{epochs}] Score: {f1}')
print(f'Epoch [{epoch+1}/{epochs}] Valid_loss: {valid_loss}')
if valid_loss < min(valid_loss_min):
print('model improved!')
else:
print('model not improved')
torch.save(model.state_dict(), f'epoch:{epoch+1}_model.pt')#state_dict 是一个字典对象,包含了模型的所有可学习参数(如权重和偏置)及其当前值
print('save checkpoint!')
valid_loss_min.append(valid_loss)
print(f'valid_loss_min:{min(valid_loss_min)}')
best_model_epoch.append(f'/kaggle/working/epoch:{epoch+1}_model.pt')
valid_loss_values.append(valid_loss)
print('='*100)
select_best_model() # refer to below function
print('Train/Valid Completed!!')
del train_dataloader, valid_dataloader # memory cleaning
gc.collect()
def select_best_model():
best_model = best_model_epoch[np.array(valid_loss_values).argmin()]
os.rename(best_model, best_model.split('.pt')[0] + '_best.pt')#重命名文件
|