|

楼主 |
发表于 2024-11-3 13:23:45
|
显示全部楼层
- import gc,os
- from tqdm.auto import tqdm # visualizing tool for progress
- # They will be used to pick the best model.pt given to the valid loss
- best_model_epoch, valid_loss_values = [],[]
- valid_loss_min = [1] # arbitrary loss I set here
- def train(model,device,train_dataloader,valid_dataloader,epochs,loss_fn,optimizer,metric):
- for epoch in range(epochs):
- gc.collect() # memory cleaning垃圾回收机制,减少占用内存
- model.train()
- train_loss = 0
- train_step = 0
- pbar = tqdm(train_dataloader, total=len(train_dataloader))#tqdm参数是一个iterable
- for batch, labels in pbar: # you can also write like "for batch in tqdm(train_dataloader"
- optimizer.zero_grad() # initialize
- train_step += 1
-
- train_input_ids = batch['input_ids'].to(device)#batch是一个字典
- train_attention_mask = batch['attention_mask'].to(device)
- train_token_type_ids = batch['token_type_ids'].to(device)
- train_labels = labels.squeeze().to(device).long()#label真实值long()转化成一维张量
-
- # You can refer to the class "TweetsModel" for understand
- # what would be logits
- logits = model(train_input_ids, train_attention_mask,train_token_type_ids).to(device)
- predictions = torch.argmax(logits, dim=1) # get an index from larger one
- detached_predictions = predictions.detach().cpu().numpy()
-
- loss = loss_fn(logits, train_labels)
- loss.backward()
- optimizer.step()
- model.zero_grad()
- train_loss += loss.detach().cpu().numpy().item()
- pbar.set_postfix({'train_loss':train_loss/train_step})#设置进度条显示信息
- pbar.close()
- with torch.no_grad():
- model.eval()
- valid_loss = 0
- valid_step = 0
- total_valid_score = 0
- y_pred = [] # for getting f1_score that is a metric of the competition
- y_true = []
- pbar = tqdm(valid_dataloader)
- for batch,labels in pbar:
- valid_step += 1
- valid_input_ids = batch['input_ids'].to(device)
- valid_attention_mask = batch['attention_mask'].to(device)
- valid_token_type_ids = batch['token_type_ids'].to(device)
- valid_labels = labels.squeeze().to(device).long()
- logits = model(valid_input_ids, valid_attention_mask).to(device)
- predictions = torch.argmax(logits, dim=1)
- detached_predictions = predictions.detach().cpu().numpy()
-
- loss = loss_fn(logits, valid_labels)
- valid_loss += loss.detach().cpu().numpy().item()
- y_pred.extend(predictions.cpu().numpy())
- y_true.extend(valid_labels.cpu().numpy())
- valid_loss /= valid_step
- f1 = f1_score(y_true,y_pred)
- print(f'Epoch [{epoch+1}/{epochs}] Score: {f1}')
- print(f'Epoch [{epoch+1}/{epochs}] Valid_loss: {valid_loss}')
- if valid_loss < min(valid_loss_min):
- print('model improved!')
- else:
- print('model not improved')
-
- torch.save(model.state_dict(), f'epoch:{epoch+1}_model.pt')#state_dict 是一个字典对象,包含了模型的所有可学习参数(如权重和偏置)及其当前值
- print('save checkpoint!')
- valid_loss_min.append(valid_loss)
- print(f'valid_loss_min:{min(valid_loss_min)}')
- best_model_epoch.append(f'/kaggle/working/epoch:{epoch+1}_model.pt')
- valid_loss_values.append(valid_loss)
- print('='*100)
- select_best_model() # refer to below function
- print('Train/Valid Completed!!')
- del train_dataloader, valid_dataloader # memory cleaning
- gc.collect()
- def select_best_model():
- best_model = best_model_epoch[np.array(valid_loss_values).argmin()]
- os.rename(best_model, best_model.split('.pt')[0] + '_best.pt')#重命名文件
复制代码
上述代码train_dataloader要求是什么形状的?batch是一个字典
有如下报错- ---------------------------------------------------------------------------
- ValueError Traceback (most recent call last)
- Cell In[128], line 4
- 1 print('Training Start!')
- 2 print('=' * 100)
- ----> 4 train(model,
- 5 device,
- 6 train_dataloader,
- 7 valid_dataloader,
- 8 CFG.epochs,
- 9 loss_fn,
- 10 optimizer,
- 11 metric)
- 13 del model,train_dataloader, valid_dataloader
- 14 gc.collect()
- Cell In[127], line 17, in train(model, device, train_dataloader, valid_dataloader, epochs, loss_fn, optimizer, metric)
- 14 train_step = 0
- 15 pbar = tqdm(train_dataloader, total=len(train_dataloader))#tqdm参数是一个iterable
- ---> 17 for batch, labels in pbar: # you can also write like "for batch in tqdm(train_dataloader"
- 18 optimizer.zero_grad() # initialize
- 19 train_step += 1
- ValueError: too many values to unpack (expected 2)
复制代码 |
|