命名实体识别——bert_lstm_crf模型

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

import os
os.chdir('/content/drive/MyDrive/chinese task/CLUENER2020')

#安装
!pip install transformers datasets pytorch-crf seqeval

import os
import json
import logging
import numpy as np
import pandas as pd
import config

from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader

#加载处理完的npz数据集
#不加allow_pickle=True会报错Object arrays cannot be loaded when allow_pickle=False，numpy新版本中默认为False。
train_data=np.load('./data/train.npz',allow_pickle=True)
val_data=np.load('./data/dev.npz',allow_pickle=True)
test_data=np.load('./data/test.npz',allow_pickle=True)

test_data.files

['words', 'labels']

#转换为dataframe格式
import pandas as pd
#补个随机frac
train_df=pd.concat([pd.DataFrame(train_data['words'],columns=['words']),
          pd.DataFrame(train_data['labels'],columns=['labels'])],axis=1).sample(frac=1.0)
#测试集和验证集不需要shuffle
val_df=pd.concat([pd.DataFrame(val_data['words'],columns=['words']),
          pd.DataFrame(val_data['labels'],columns=['labels'])],axis=1)#后面要进行预测对比标签，不宜shuffle

test_df=pd.concat([pd.DataFrame(test_data['words'],columns=['words']),
          pd.DataFrame(test_data['labels'],columns=['labels'])],axis=1)
#小样本测试
#train_df=train_df.iloc[:1000]
#val_df=val_df.iloc[:500]


#将训练验证集的BIOS标签转换为数字索引，此时word和labels已经对齐了
def trans(labels):
  labels=list(labels)
  nums=[]
  for label in labels:
    nums.append(config.label2id[label])
  return nums
    
train_df['labels']=train_df['labels'].map(lambda x: trans(x))
val_df['labels']=val_df['labels'].map(lambda x: trans(x))

test_df['labels']=test_df['labels'].map(lambda x: trans(x))
val_df

	words	labels
0	[彭, 小, 军, 认, 为, ，, 国, 内, 银, 行, 现, 在, 走, 的, 是, ...	[7, 17, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
1	[温, 格, 的, 球, 队, 终, 于, 又, 踢, 了, 一, 场, 经, 典, 的, ...	[7, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
2	[突, 袭, 黑, 暗, 雅, 典, 娜, 》, 中, R, i, d, d, i, c, ...	[4, 14, 14, 14, 14, 14, 14, 14, 0, 7, 17, 17, ...
3	[郑, 阿, 姨, 就, 赶, 到, 文, 汇, 路, 排, 队, 拿, 钱, ，, 希, ...	[0, 0, 0, 0, 0, 0, 1, 11, 11, 0, 0, 0, 0, 0, 0...
4	[我, 想, 站, 在, 雪, 山, 脚, 下, 你, 会, 被, 那, 巍, 峨, 的, ...	[0, 0, 0, 0, 10, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0...
...	...	...
1338	[在, 这, 个, 非, 常, 喜, 庆, 的, 日, 子, 里, ，, 我, 们, 首, ...	[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
1339	[姜, 哲, 中, ：, 公, 共, 之, 敌, 1, -, 1, 》, 、, 《, 神, ...	[6, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16...
1340	[目, 前, ，, 日, 本, 松, 山, 海, 上, 保, 安, 部, 正, 在, 就, ...	[0, 0, 0, 5, 15, 15, 15, 15, 15, 15, 15, 15, 0...
1341	[也, 就, 是, 说, 英, 国, 人, 在, 世, 博, 会, 上, 的, 英, 国, ...	[0, 0, 0, 0, 0, 0, 0, 0, 10, 20, 20, 0, 0, 0, ...
1342	[另, 外, 意, 大, 利, 的, P, l, a, y, G, e, n, e, r, ...	[0, 0, 0, 0, 0, 0, 2, 12, 12, 12, 12, 12, 12, ...

1343 rows × 2 columns

pandas数据装入datasets进行解码，之后方便直接pad labels。

labels在后面装入dataloader的时候，不处理的话长度不一致，处理的话整理函数太麻烦。

from datasets import Dataset
from transformers import AutoTokenizer
#这里一定要选AutoTokenizer，如果是BertTokenizer，会提示bertbase没有word_ids方法。结果没用到
trains_ds=Dataset.from_pandas(train_df)
val_ds=Dataset.from_pandas(val_df)
test_ds=Dataset.from_pandas(test_df)

tokenizer=AutoTokenizer.from_pretrained(config.roberta_model,do_lower_case=True)

#tokenized_inputs=tokenizer(trains_ds["words"],padding=True,truncation=True,is_split_into_words=True)为啥这种是错的
tokenized_trains_ds=trains_ds.map(lambda examples:tokenizer(examples['words'],is_split_into_words=True,truncation=True,padding=True),batched=True)
tokenized_val_ds=val_ds.map(lambda examples:tokenizer(examples['words'],is_split_into_words=True,truncation=True,padding=True),batched=True)
tokenized_test_ds=test_ds.map(lambda examples:tokenizer(examples['words'],is_split_into_words=True,truncation=True,padding=True),batched=True)

  0%|          | 0/11 [00:00<?, ?ba/s]


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.



  0%|          | 0/2 [00:00<?, ?ba/s]



  0%|          | 0/2 [00:00<?, ?ba/s]

#在编码之后的datasets里面操作，得到的结果无法写入datasets，所以只好写到pandas文件里。
#将labels填充到和input_ids一样长（最长句子52，所以其实全部都填充到52）

def padding(data,pad):
  pad_labels=[]
  for ds in data:
    labels=ds['labels'] 
    mask=ds['attention_mask']
    label_ids=[pad]

    pad_length=len(mask)
    label_length=len(labels)
    
    label_ids=label_ids+labels+[pad]*(pad_length-label_length-1)
    pad_labels.append(label_ids)
  return pad_labels
#tokenized_trains_ds["pad_labels"]=pad_labels# Column 2 named labels expected length 10748 but got length 1000
"""
train_df['mask_labels']=padding(tokenized_trains_ds,-100)
val_df['mask_labels']=padding(tokenized_val_ds,-100)
test_df['mask_labels']=padding(tokenized_test_ds,-100)"""

train_df['pad_labels']=padding(tokenized_trains_ds,-1)
val_df['pad_labels']=padding(tokenized_val_ds,-1)
test_df['pad_labels']=padding(tokenized_test_ds,-1)
val_df

	words	labels	pad_labels
0	[彭, 小, 军, 认, 为, ，, 国, 内, 银, 行, 现, 在, 走, 的, 是, ...	[7, 17, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...	[-1, 7, 17, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
1	[温, 格, 的, 球, 队, 终, 于, 又, 踢, 了, 一, 场, 经, 典, 的, ...	[7, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...	[-1, 7, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
2	[突, 袭, 黑, 暗, 雅, 典, 娜, 》, 中, R, i, d, d, i, c, ...	[4, 14, 14, 14, 14, 14, 14, 14, 0, 7, 17, 17, ...	[-1, 4, 14, 14, 14, 14, 14, 14, 14, 0, 7, 17, ...
3	[郑, 阿, 姨, 就, 赶, 到, 文, 汇, 路, 排, 队, 拿, 钱, ，, 希, ...	[0, 0, 0, 0, 0, 0, 1, 11, 11, 0, 0, 0, 0, 0, 0...	[-1, 0, 0, 0, 0, 0, 0, 1, 11, 11, 0, 0, 0, 0, ...
4	[我, 想, 站, 在, 雪, 山, 脚, 下, 你, 会, 被, 那, 巍, 峨, 的, ...	[0, 0, 0, 0, 10, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0...	[-1, 0, 0, 0, 0, 10, 20, 0, 0, 0, 0, 0, 0, 0, ...
...	...	...	...
1338	[在, 这, 个, 非, 常, 喜, 庆, 的, 日, 子, 里, ，, 我, 们, 首, ...	[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...	[-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
1339	[姜, 哲, 中, ：, 公, 共, 之, 敌, 1, -, 1, 》, 、, 《, 神, ...	[6, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16...	[-1, 6, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16...
1340	[目, 前, ，, 日, 本, 松, 山, 海, 上, 保, 安, 部, 正, 在, 就, ...	[0, 0, 0, 5, 15, 15, 15, 15, 15, 15, 15, 15, 0...	[-1, 0, 0, 0, 5, 15, 15, 15, 15, 15, 15, 15, 1...
1341	[也, 就, 是, 说, 英, 国, 人, 在, 世, 博, 会, 上, 的, 英, 国, ...	[0, 0, 0, 0, 0, 0, 0, 0, 10, 20, 20, 0, 0, 0, ...	[-1, 0, 0, 0, 0, 0, 0, 0, 0, 10, 20, 20, 0, 0,...
1342	[另, 外, 意, 大, 利, 的, P, l, a, y, G, e, n, e, r, ...	[0, 0, 0, 0, 0, 0, 2, 12, 12, 12, 12, 12, 12, ...	[-1, 0, 0, 0, 0, 0, 0, 2, 12, 12, 12, 12, 12, ...

1343 rows × 3 columns

batch_size=16

#划分训练验证集
from sklearn.model_selection import train_test_split
from datasets import Dataset
from torch.nn.utils.rnn import pad_sequence


train_data,train_label,val_data,val_label=train_df['words'].iloc[:],train_df['pad_labels'].iloc[:],val_df['words'].iloc[:],val_df['pad_labels'].iloc[:]

test_data,test_label=(test_df['words'].iloc[:],test_df['pad_labels'].iloc[:])

#stratify=train_df['label'].iloc[:]报错:The least populated class in y has only 1 member,which is too few.
#The minimum number of groups for any class cannot be less than 2.估计是样本太少，分层抽取不可行。

#数据预处理

tokenizer=AutoTokenizer.from_pretrained(config.roberta_model,do_lower_case=True)
train_encoding=tokenizer(list(train_data),is_split_into_words=True,truncation=True,padding=True,return_tensors='pt')#训练集中划分的训练集
val_encoding=tokenizer(list(val_data),is_split_into_words=True,truncation=True,padding=True,return_tensors='pt')#训练集中划分的验证集
test_encoding=tokenizer(list(test_data),is_split_into_words=True,truncation=True,padding=True,return_tensors='pt')#测试集

#加载到datalodar并预处理
#数据集读取

from torch.utils.data import Dataset, DataLoader,TensorDataset
import torch
class XFeiDataset(Dataset):
  def __init__(self,encodings,pad_labels):
    self.encodings=encodings
    self.pad_labels=pad_labels
  
  # 读取单个样本
  def __getitem__(self,idx):
    item={key:torch.tensor(val[idx]) for key,val in self.encodings.items()}
    item['pad_labels']=torch.tensor((self.pad_labels[idx]))
    item['mask']=(item['pad_labels']!=-1)
    return item
  
  def __len__(self):
    return len(self.pad_labels)

#def collate_fn

train_dataset=XFeiDataset(train_encoding,list(train_label))
val_dataset=XFeiDataset(val_encoding,list(val_label))
test_dataset=XFeiDataset(test_encoding,list(test_label))


from torch.utils.data import Dataset,DataLoader,TensorDataset

train_loader=DataLoader(train_dataset,batch_size=batch_size,shuffle=True)
val_loader=DataLoader(val_dataset,batch_size=batch_size,shuffle=False)
test_loader=DataLoader(test_dataset,batch_size=batch_size,shuffle=False)#test数据不能shuffle啊，真坑死我了

#for i in val_loader:
  #print(i)#输出5元组，input三兄弟和pad_labels和mask矩阵

from transformers import BertModel
from torch.nn.utils.rnn import pad_sequence
#初始化bert模型
from transformers import BertConfig
import torch.nn as nn
from torch.nn import LSTM
from torch.nn import functional as F 
from torchcrf import CRF

num_labels=31
dropout=0.1
#取0.1时，epoch=1，precision 0.68|recall 0.72|f1 0.70|acc 0.93
#选0.2训练loss更大。epoch1时precision 0.50|recall 0.60|f1 0.54|acc 0.90

class Bert_LSTM(nn.Module):
  def __init__(self):
    super(Bert_LSTM,self).__init__()
    self.num_labels=num_labels
    self.dropout=nn.Dropout(dropout)
    self.bert=BertModel.from_pretrained(config.roberta_model)
    for param in self.bert.parameters():
      param.requires_grad=True
    self.classifier=nn.Linear(1024,self.num_labels)
    self.crf=CRF(num_labels,batch_first=True)
    from torch.nn import functional as F

    self.bilstm=nn.LSTM(
        input_size=1024, 
        hidden_size=512, 
        batch_first=True,
        num_layers=2,
        dropout=0.5,  
        bidirectional=True)

  def forward(self,batch_seqs,batch_seq_masks,batch_seq_segments,pad_labels,mask):

    output=self.bert(input_ids=batch_seqs,attention_mask=batch_seq_masks,token_type_ids=batch_seq_segments)
    #pooler_output=output.pooler_output
    last_hidden_state=output.last_hidden_state
    last_hidden_state=self.dropout(last_hidden_state)
    #只有这种写法不会报错，如果是sequence_output,pooler_output=self.bert(**kwags)这种，sequence_output会报错str没有xxx属性。
    #貌似是bert输出有很多，直接用output.last_hidden_state来调用结果（估计是版本问题，坑），关键是输出要打印出来
    

    lstm_output,(hn,cn)=self.bilstm(last_hidden_state)
    #output为输出序列的隐藏层，hn为最后一个时刻的隐藏层，cn为最后一个时刻的隐藏细胞
    lstm_output=self.dropout(lstm_output)

    # 得到判别值
    logits=self.classifier(lstm_output)
    logits,pad_labels,mask=logits[:,1:,:],pad_labels[:,1:],mask[:,1:]#首个值不能是false，否则crf报错
    loss=self.crf(logits,pad_labels,mask)*(-1)
    
    return logits,loss

#加载模型
model=Bert_LSTM()
#model.load_state_dict(torch.load("/content/drive/MyDrive/chinese task/CLUENER2020/model/bert_lstm_crf_model"))
device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

#定义优化器
epoch=10
lr=3e-5

from transformers import AdamW,get_scheduler

train_steps_per_epoch=len(train_loader)
num_training_steps=train_steps_per_epoch*epoch

#定义各模块参数
bert_parameters=list(model.bert.named_parameters())
lstm_parameters=list(model.bilstm.named_parameters())
classifier_parameters=list(model.classifier.named_parameters())
no_decay=['bias','LayerNorm.weight']

#bert模型、lstm模型、nn.linear的学习率分离，后两个是bert的3倍
optimizer_grouped_parameters=[
    {'params':[p for n,p in bert_parameters if not any(nd in n for nd in no_decay)],
      'lr':lr,'weight_decay':0.01},
    {'params':[p for n,p in bert_parameters if any(nd in n for nd in no_decay)],
      'lr':lr,'weight_decay':0.0},
    {'params':[p for n,p in lstm_parameters if not any(nd in n for nd in no_decay)],
      'lr':lr*3,'weight_decay':0.01},
    {'params':[p for n,p in lstm_parameters if any(nd in n for nd in no_decay)],
      'lr':lr*3,'weight_decay': 0.0},
    {'params':[p for n,p in classifier_parameters if not any(nd in n for nd in no_decay)],
      'lr':lr*3,'weight_decay':0.01},
    {'params':[p for n,p in classifier_parameters if any(nd in n for nd in no_decay)],
      'lr':lr*3,'weight_decay':0.0}]

optimizer=AdamW(optimizer_grouped_parameters,lr=lr,eps=1e-8)
#使用线性衰减学习率
lr_scheduler=get_scheduler(
    "linear",
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=num_training_steps)

#将crf.decode预测值用0进行pad之后转为tensor
def pad_result(data,pad_labels):
  pad_pred=[]
  max_len=pad_labels.shape[1]
  for pred in data:
    pad_length=max_len-len(pred)

    label_ids=pred+[0]*(pad_length)
    pad_pred.append(label_ids)
    
  return pad_pred

#pred=torch.tensor(pred)必须在函数外，否则报错tensor没有append

#编写评价方法
from datasets import load_metric
metric=load_metric("seqeval")
import numpy as np

label_list= [label for label,id in list(config.label2id.items())]

def compute_metrics(y_pred,y_true):
  predictions,labels=y_pred,y_true

  # 掉特殊字符处的值，不作比较。
  true_predictions=[
    [label_list[p] for (p,l) in zip(prediction, label) if l !=-1]
    for prediction,label in zip(predictions, labels)
  ]
  true_labels=[
    [label_list[l] for (p,l) in zip(prediction, label) if l !=-1]
    for prediction,label in zip(predictions, labels)
  ]

  results = metric.compute(predictions=true_predictions,references=true_labels)
  return results

#编写训练和验证循环
import time
import numpy as np
from sklearn.metrics import f1_score,precision_score,recall_score,accuracy_score
from torch.nn import functional as F
from torchcrf import CRF
#加载进度条
from tqdm.auto import tqdm

num_training_steps=train_steps_per_epoch*epoch

progress_bar=tqdm(range(num_training_steps))

def train_and_eval(epoch):
  for i in range(epoch):
    """训练模型"""
    start=time.time()
    model.train()
    print("***** Running training epoch {} *****".format(i+1))
    train_loss_sum=0.0
    for idx,batch in enumerate(train_loader):
      input_ids=batch['input_ids'].to(device)
      attention_mask=batch['attention_mask'].to(device)
      token_type_ids=batch['token_type_ids'].to(device)
      pad_labels=batch['pad_labels'].to(device)
      mask=batch['mask'].to(device)


      #计算输出和loss
      logits,loss=model(input_ids,attention_mask,token_type_ids,pad_labels,mask)
      loss.backward()

      optimizer.step()
      lr_scheduler.step()
      optimizer.zero_grad()  
      progress_bar.update(1)

      train_loss_sum+=loss.item()
      if (idx+1)%(len(train_loader)//5)==0: # 只打印五次结果
        print("Epoch {:03d} | Step {:04d}/{:04d} | Loss {:.4f} | Time {:.4f} | Learning rate = {} \n".format(
                  i+1,idx+1,len(train_loader),train_loss_sum/(idx+1),time.time()-start,optimizer.state_dict()['param_groups'][0]['lr']))
      
      #验证模型
    model.eval()
    y_pred,y_true=[],[]
    best_f1,total_eval_loss=0,0
    total_eval_accuracy,total,acc=0,0,0
    
    for batch in val_loader:
      with torch.no_grad():#只有这一块是不需要求导的
      
        input_ids=batch['input_ids'].to(device)
        attention_mask=batch['attention_mask'].to(device)
        token_type_ids=batch['token_type_ids'].to(device)
        pad_labels=batch['pad_labels'].to(device)
        mask=batch['mask'].to(device)
        logits,loss=model(input_ids,attention_mask,token_type_ids,pad_labels,mask)#都是去掉首个-100的结果                      
           
      total_eval_loss+=loss.item()
      

      pad_labels,mask=pad_labels[:,1:].to(device),mask[:,1:].to(device)
      pred=model.crf.decode(logits,mask)
      pred=torch.tensor(pad_result(pred,pad_labels)).to(device)#预测值经过pad_pred函数pad成batch_size*max_length-1，再转为tensor
      acc+=(pred[mask]==pad_labels[mask]).sum().item()#只计算没有mask的单词的准确率,mask在外面似乎accs0.93不准。
      total+=mask.sum().item()
      total_eval_accuracy=acc/total

      y_pred.extend(pred.cpu().numpy().tolist())#将每个把batch结果依次加入总列表
      y_true.extend(pad_labels.cpu().numpy().tolist())
    results=compute_metrics(y_pred,y_true)
    f1=results["overall_f1"]    
    
    if f1>best_f1:
      best_f1=f1
      torch.save(model.state_dict(),"./bert_lstm_crf/blf_model")
    
    print("precision {:.2f}|recall {:.2f}|f1 {:.4f}|acc {:.2f}".format(results["overall_precision"],results["overall_recall"],results["overall_f1"],results["overall_accuracy"]))
    print("Average val loss:%.2f"%(total_eval_loss),"sklearn_acc:%.2f"%(total_eval_accuracy))
    print("time costed={}s \n".format(round(time.time()-start,5)))
    print("-------------------------------")

  0%|          | 0/6720 [00:00<?, ?it/s]

对比bert的token分类任务头

train_and_eval(epoch)
"""
预测值不包括实际值会报错
UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, modifier, msg_start, len(result))"""

***** Running training epoch 1 *****


/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:13: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  del sys.path[0]


Epoch 001 | Step 0134/0672 | Loss 850.4533 | Time 160.0524 | Learning rate = 2.9401785714285713e-05 

Epoch 001 | Step 0268/0672 | Loss 755.6008 | Time 321.1079 | Learning rate = 2.880357142857143e-05 

Epoch 001 | Step 0402/0672 | Loss 693.5408 | Time 482.2713 | Learning rate = 2.8205357142857143e-05 

Epoch 001 | Step 0536/0672 | Loss 626.1115 | Time 643.2802 | Learning rate = 2.7607142857142855e-05 

Epoch 001 | Step 0670/0672 | Loss 554.6280 | Time 804.1686 | Learning rate = 2.7008928571428574e-05 

precision 0.55|recall 0.65|f1 0.59|acc 0.90
Average val loss:16898.68 sklearn_acc:0.90
time costed=847.54825s 

-------------------------------
***** Running training epoch 2 *****
Epoch 002 | Step 0134/0672 | Loss 189.6032 | Time 161.3824 | Learning rate = 2.6401785714285714e-05 

Epoch 002 | Step 0268/0672 | Loss 174.3528 | Time 321.8987 | Learning rate = 2.580357142857143e-05 

Epoch 002 | Step 0402/0672 | Loss 166.6937 | Time 482.3897 | Learning rate = 2.5205357142857145e-05 

Epoch 002 | Step 0536/0672 | Loss 160.9221 | Time 643.1670 | Learning rate = 2.4607142857142857e-05 

Epoch 002 | Step 0670/0672 | Loss 155.2287 | Time 803.8956 | Learning rate = 2.4008928571428572e-05 

precision 0.70|recall 0.78|f1 0.73|acc 0.94
Average val loss:10865.32 sklearn_acc:0.94
time costed=846.85732s 

-------------------------------
***** Running training epoch 3 *****
Epoch 003 | Step 0134/0672 | Loss 104.7770 | Time 161.5656 | Learning rate = 2.3401785714285716e-05 

Epoch 003 | Step 0268/0672 | Loss 104.1795 | Time 322.5888 | Learning rate = 2.2803571428571428e-05 

Epoch 003 | Step 0402/0672 | Loss 102.1001 | Time 483.3302 | Learning rate = 2.2205357142857143e-05 

Epoch 003 | Step 0536/0672 | Loss 100.6387 | Time 644.2039 | Learning rate = 2.1607142857142858e-05 

Epoch 003 | Step 0670/0672 | Loss 100.2379 | Time 804.9622 | Learning rate = 2.100892857142857e-05 

precision 0.71|recall 0.79|f1 0.75|acc 0.94
Average val loss:10455.55 sklearn_acc:0.94
time costed=847.52307s 

-------------------------------
***** Running training epoch 4 *****
Epoch 004 | Step 0134/0672 | Loss 65.6758 | Time 161.4387 | Learning rate = 2.0401785714285714e-05 

Epoch 004 | Step 0268/0672 | Loss 70.1131 | Time 321.9805 | Learning rate = 1.980357142857143e-05 

Epoch 004 | Step 0402/0672 | Loss 74.5657 | Time 482.5696 | Learning rate = 1.920535714285714e-05 

Epoch 004 | Step 0536/0672 | Loss 77.0551 | Time 643.0697 | Learning rate = 1.860714285714286e-05 

Epoch 004 | Step 0670/0672 | Loss 77.2134 | Time 803.5144 | Learning rate = 1.800892857142857e-05 

precision 0.72|recall 0.79|f1 0.75|acc 0.94
Average val loss:10545.26 sklearn_acc:0.94
time costed=846.01025s 

-------------------------------
***** Running training epoch 5 *****
Epoch 005 | Step 0134/0672 | Loss 62.8429 | Time 161.6965 | Learning rate = 1.7401785714285716e-05 

Epoch 005 | Step 0268/0672 | Loss 61.3453 | Time 322.8604 | Learning rate = 1.680357142857143e-05 

Epoch 005 | Step 0402/0672 | Loss 59.3090 | Time 484.0892 | Learning rate = 1.6205357142857143e-05 

Epoch 005 | Step 0536/0672 | Loss 58.5966 | Time 645.2598 | Learning rate = 1.5607142857142858e-05 

Epoch 005 | Step 0670/0672 | Loss 57.8091 | Time 806.5770 | Learning rate = 1.5008928571428572e-05 

precision 0.73|recall 0.81|f1 0.76|acc 0.94
Average val loss:11918.01 sklearn_acc:0.94
time costed=849.09279s 

-------------------------------
***** Running training epoch 6 *****
Epoch 006 | Step 0134/0672 | Loss 40.8853 | Time 161.7673 | Learning rate = 1.4401785714285716e-05 

Epoch 006 | Step 0268/0672 | Loss 40.4048 | Time 322.7583 | Learning rate = 1.3803571428571427e-05 

Epoch 006 | Step 0402/0672 | Loss 40.0783 | Time 483.8099 | Learning rate = 1.3205357142857143e-05 

Epoch 006 | Step 0536/0672 | Loss 39.7375 | Time 644.7180 | Learning rate = 1.2607142857142858e-05 

Epoch 006 | Step 0670/0672 | Loss 40.0972 | Time 805.9105 | Learning rate = 1.2008928571428573e-05 

precision 0.74|recall 0.81|f1 0.78|acc 0.94
Average val loss:11576.13 sklearn_acc:0.94
time costed=848.65729s 

-------------------------------
***** Running training epoch 7 *****
Epoch 007 | Step 0134/0672 | Loss 28.1865 | Time 161.6997 | Learning rate = 1.1401785714285714e-05 

Epoch 007 | Step 0268/0672 | Loss 29.4536 | Time 322.6295 | Learning rate = 1.0803571428571429e-05 

Epoch 007 | Step 0402/0672 | Loss 29.7340 | Time 483.5822 | Learning rate = 1.0205357142857144e-05 

Epoch 007 | Step 0536/0672 | Loss 30.0218 | Time 644.7393 | Learning rate = 9.607142857142856e-06 

Epoch 007 | Step 0670/0672 | Loss 29.1320 | Time 805.7908 | Learning rate = 9.008928571428571e-06 

precision 0.75|recall 0.80|f1 0.78|acc 0.94
Average val loss:13548.30 sklearn_acc:0.94
time costed=848.7225s 

-------------------------------
***** Running training epoch 8 *****
Epoch 008 | Step 0134/0672 | Loss 19.5674 | Time 162.0373 | Learning rate = 8.401785714285715e-06 

Epoch 008 | Step 0268/0672 | Loss 20.5346 | Time 323.2465 | Learning rate = 7.803571428571429e-06 

Epoch 008 | Step 0402/0672 | Loss 20.5707 | Time 484.4724 | Learning rate = 7.205357142857143e-06 

Epoch 008 | Step 0536/0672 | Loss 21.3377 | Time 645.7742 | Learning rate = 6.607142857142857e-06 

Epoch 008 | Step 0670/0672 | Loss 21.3365 | Time 806.9678 | Learning rate = 6.008928571428572e-06 

precision 0.75|recall 0.80|f1 0.78|acc 0.94
Average val loss:14625.68 sklearn_acc:0.94
time costed=850.00101s 

-------------------------------
***** Running training epoch 9 *****
Epoch 009 | Step 0134/0672 | Loss 15.8689 | Time 162.2782 | Learning rate = 5.4017857142857145e-06 

Epoch 009 | Step 0268/0672 | Loss 15.7494 | Time 323.9999 | Learning rate = 4.803571428571428e-06 

Epoch 009 | Step 0402/0672 | Loss 16.1067 | Time 485.5929 | Learning rate = 4.205357142857143e-06 

Epoch 009 | Step 0536/0672 | Loss 15.9825 | Time 647.0968 | Learning rate = 3.6071428571428573e-06 

Epoch 009 | Step 0670/0672 | Loss 15.6784 | Time 808.7016 | Learning rate = 3.0089285714285717e-06 

precision 0.74|recall 0.81|f1 0.77|acc 0.94
Average val loss:15238.61 sklearn_acc:0.94
time costed=851.86068s 

-------------------------------
***** Running training epoch 10 *****
Epoch 010 | Step 0134/0672 | Loss 12.5352 | Time 162.6241 | Learning rate = 2.401785714285714e-06 

Epoch 010 | Step 0268/0672 | Loss 12.0666 | Time 327.8486 | Learning rate = 1.8035714285714286e-06 

Epoch 010 | Step 0402/0672 | Loss 12.0815 | Time 492.9378 | Learning rate = 1.205357142857143e-06 

Epoch 010 | Step 0536/0672 | Loss 12.2230 | Time 658.1718 | Learning rate = 6.071428571428572e-07 

Epoch 010 | Step 0670/0672 | Loss 11.8025 | Time 823.4604 | Learning rate = 8.92857142857143e-09 

precision 0.75|recall 0.80|f1 0.77|acc 0.94
Average val loss:15777.25 sklearn_acc:0.94
time costed=868.35004s 

-------------------------------





'\n预测值不包括实际值会报错\nUndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n  _warn_prf(average, modifier, msg_start, len(result))'

#torch.save(model.state_dict(),"./bert_lstm_crf/finall_blf_model")
model.load_state_dict(torch.load("./bert_lstm_crf/blf_model"))
model.to(device)

#编写predict函数
def predict(model,data_loader):#参数名为data时加载训练好的模型来预测报错，原模型不报错
  model.eval()
  y_pred,y_true,predictions= [],[],[]#pad后的预测结果、真实标签和没有pad的标签list
  for batch in data_loader:
    with torch.no_grad():#只有这一块是不需要求导的
      
      input_ids=batch['input_ids'].to(device)
      attention_mask=batch['attention_mask'].to(device)
      token_type_ids=batch['token_type_ids'].to(device)
      pad_labels=batch['pad_labels'].to(device)#用1填充的标签，用来计算logits和loss
      mask=batch['mask'].to(device)#mask矩阵，用来计算logits和loss、crf解码结果
      logits,loss=model(input_ids,attention_mask,token_type_ids,pad_labels,mask)                      
    
    pad_labels,mask=pad_labels[:,1:].to(device),mask[:,1:].to(device)
    prediction=model.crf.decode(logits,mask)#解码后得出真实tokens的预测结果list

    #预测值只是列表，pad之后加入总的预测列表，用于评价compute_metrics
    pad_pred=torch.tensor(pad_result(prediction,pad_labels)).to(device)
    y_pred.extend(pad_pred.cpu().numpy().tolist())
    y_true.extend(pad_labels.cpu().numpy().tolist())#pad_labels为真实标签

    predictions.extend(prediction)#这个是不用pad的预测结果，用于最终提交结果
      
  return y_pred,y_true,predictions

#预测验证集结果，查看各个tokens类别的指标
y_pred,y_true,predictions=predict(model,val_loader)
results=compute_metrics(y_pred,y_true)
#将结果排序查看
result_df=pd.DataFrame(results)
result_df.stack().unstack(0).sort_values(by=['f1'])

/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:13: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  del sys.path[0]

	precision	recall	f1	number
address	0.555288	0.619303	0.585551	373.000000
scene	0.697115	0.693780	0.695444	209.000000
overall_precision	0.747256	0.747256	0.747256	0.747256
organization	0.721805	0.784741	0.751958	367.000000
overall_f1	0.771725	0.771725	0.771725	0.771725
government	0.739130	0.825911	0.780115	247.000000
position	0.774554	0.801386	0.787741	433.000000
book	0.775000	0.805195	0.789809	154.000000
company	0.759124	0.825397	0.790875	378.000000
overall_recall	0.797852	0.797852	0.797852	0.797852
movie	0.807947	0.807947	0.807947	151.000000
game	0.791925	0.864407	0.826580	295.000000
name	0.865031	0.909677	0.886792	465.000000
overall_accuracy	0.938997	0.938997	0.938997	0.938997

#将验证集真实标签和预测结果进行对比展示
val_df['preds']=pd.Series(predictions)
val_df.to_csv('./bert_lstm_crf/val_1221.csv')
val_df=val_df.drop(["pad_labels"],axis=1)
val_df

	words	labels	preds
0	[彭, 小, 军, 认, 为, ，, 国, 内, 银, 行, 现, 在, 走, 的, 是, ...	[7, 17, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...	[7, 17, 17, 0, 0, 0, 3, 13, 13, 13, 0, 0, 0, 0...
1	[温, 格, 的, 球, 队, 终, 于, 又, 踢, 了, 一, 场, 经, 典, 的, ...	[7, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...	[7, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
2	[突, 袭, 黑, 暗, 雅, 典, 娜, 》, 中, R, i, d, d, i, c, ...	[4, 14, 14, 14, 14, 14, 14, 14, 0, 7, 17, 17, ...	[6, 16, 16, 16, 16, 16, 16, 16, 0, 7, 17, 17, ...
3	[郑, 阿, 姨, 就, 赶, 到, 文, 汇, 路, 排, 队, 拿, 钱, ，, 希, ...	[0, 0, 0, 0, 0, 0, 1, 11, 11, 0, 0, 0, 0, 0, 0...	[0, 0, 0, 0, 0, 0, 1, 11, 11, 0, 0, 0, 0, 0, 0...
4	[我, 想, 站, 在, 雪, 山, 脚, 下, 你, 会, 被, 那, 巍, 峨, 的, ...	[0, 0, 0, 0, 10, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0...	[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
...	...	...	...
1338	[在, 这, 个, 非, 常, 喜, 庆, 的, 日, 子, 里, ，, 我, 们, 首, ...	[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...	[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
1339	[姜, 哲, 中, ：, 公, 共, 之, 敌, 1, -, 1, 》, 、, 《, 神, ...	[6, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16...	[6, 17, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16...
1340	[目, 前, ，, 日, 本, 松, 山, 海, 上, 保, 安, 部, 正, 在, 就, ...	[0, 0, 0, 5, 15, 15, 15, 15, 15, 15, 15, 15, 0...	[0, 0, 0, 5, 15, 15, 15, 15, 15, 15, 15, 15, 0...
1341	[也, 就, 是, 说, 英, 国, 人, 在, 世, 博, 会, 上, 的, 英, 国, ...	[0, 0, 0, 0, 0, 0, 0, 0, 10, 20, 20, 0, 0, 0, ...	[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 20...
1342	[另, 外, 意, 大, 利, 的, P, l, a, y, G, e, n, e, r, ...	[0, 0, 0, 0, 0, 0, 2, 12, 12, 12, 12, 12, 12, ...	[0, 0, 1, 11, 11, 0, 2, 12, 12, 12, 12, 12, 12...

1343 rows × 3 columns

用模型预测验证集结果，与原标签对比

y_pred,y_true,predictions=predict(model,test_loader)
pd.DataFrame({'label':predictions}).to_csv('./bert_lstm_crf/submit1222.csv',index=None)