命名实体识别——bert_softmax模型

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

import os
os.chdir('/content/drive/MyDrive/chinese task/CLUENER2020')

#安装
!pip install transformers datasets seqeval

import os
import json
import logging
import numpy as np
import pandas as pd
import config

#加载处理完的npz数据集
#不加allow_pickle=True会报错Object arrays cannot be loaded when allow_pickle=False，numpy新版本中默认为False。
train_data=np.load('./data/train.npz',allow_pickle=True)
val_data=np.load('./data/dev.npz',allow_pickle=True)
test_data=np.load('./data/test.npz',allow_pickle=True)

test_data.files

数据从npz格式加载到pandas，标签用数字替换，以便输入模型

#转换为dataframe格式
import pandas as pd
#补个随机frac
train_df=pd.concat([pd.DataFrame(train_data['words'],columns=['words']),
          pd.DataFrame(train_data['labels'],columns=['labels'])],axis=1).sample(frac=1.0).rename(columns={'labels':'labels0'})
#测试集和验证集不需要shuffle
val_df=pd.concat([pd.DataFrame(val_data['words'],columns=['words']),
          pd.DataFrame(val_data['labels'],columns=['labels'])],axis=1).rename(columns={'labels':'labels0'})

test_df=pd.concat([pd.DataFrame(test_data['words'],columns=['words']),
          pd.DataFrame(test_data['labels'],columns=['labels'])],axis=1).rename(columns={'labels':'labels0'})


#将训练验证集的BIOS标签转换为数字索引，此时word和labels已经对齐了
def trans(labels):
  labels=list(labels)
  nums=[]
  for label in labels:
    nums.append(config.label2id[label])
  return nums
    
train_df['labels0']=train_df['labels0'].map(lambda x: trans(x))
val_df['labels0']=val_df['labels0'].map(lambda x: trans(x))

test_df['labels0']=test_df['labels0'].map(lambda x: trans(x))
val_df

	words	labels0
0	[彭, 小, 军, 认, 为, ，, 国, 内, 银, 行, 现, 在, 走, 的, 是, ...	[7, 17, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
1	[温, 格, 的, 球, 队, 终, 于, 又, 踢, 了, 一, 场, 经, 典, 的, ...	[7, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
2	[突, 袭, 黑, 暗, 雅, 典, 娜, 》, 中, R, i, d, d, i, c, ...	[4, 14, 14, 14, 14, 14, 14, 14, 0, 7, 17, 17, ...
3	[郑, 阿, 姨, 就, 赶, 到, 文, 汇, 路, 排, 队, 拿, 钱, ，, 希, ...	[0, 0, 0, 0, 0, 0, 1, 11, 11, 0, 0, 0, 0, 0, 0...
4	[我, 想, 站, 在, 雪, 山, 脚, 下, 你, 会, 被, 那, 巍, 峨, 的, ...	[0, 0, 0, 0, 10, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0...
...	...	...
1338	[在, 这, 个, 非, 常, 喜, 庆, 的, 日, 子, 里, ，, 我, 们, 首, ...	[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
1339	[姜, 哲, 中, ：, 公, 共, 之, 敌, 1, -, 1, 》, 、, 《, 神, ...	[6, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16...
1340	[目, 前, ，, 日, 本, 松, 山, 海, 上, 保, 安, 部, 正, 在, 就, ...	[0, 0, 0, 5, 15, 15, 15, 15, 15, 15, 15, 15, 0...
1341	[也, 就, 是, 说, 英, 国, 人, 在, 世, 博, 会, 上, 的, 英, 国, ...	[0, 0, 0, 0, 0, 0, 0, 0, 10, 20, 20, 0, 0, 0, ...
1342	[另, 外, 意, 大, 利, 的, P, l, a, y, G, e, n, e, r, ...	[0, 0, 0, 0, 0, 0, 2, 12, 12, 12, 12, 12, 12, ...

1343 rows × 2 columns

word_ids可以将每一个subtokens位置对应一个word的下标。并且特殊字符对应了None。有了这个list，我们就能将subtokens和words还有标注的labels对齐啦，并将[cls]和[sep]位置的标签用-100填充。-100经过softmax会被忽略。

"""
将word_ids值为none的部分,即特殊符号[cls]和[sep]位置的标签转化为-100。
我们有两种对齐label的方式：

1.label_all_tokens=True，多个subtokens对齐一个word，对齐一个label
2.label_all_tokens=False，多个subtokens的第一个subtoken对齐word，对齐一个label，其他subtokens直接赋予-100.
"""
label_all_tokens=True
def tokenize_and_align_labels(examples):
  tokenized_inputs=tokenizer(examples["words"],truncation=True,is_split_into_words=True)#数据分词

  pad_labels = []#创建labels列表
  for i,label in enumerate(examples['labels0']):
    word_ids=tokenized_inputs.word_ids(batch_index=i)#取出索引i的编码数据的word_ids属性
    previous_word_idx=None
    label_ids=[]
    for word_idx in word_ids:
      # 特殊标记的单词word_ids为None。将标签设置为-100，以便它们自动在损失函数中被忽略。
      if word_idx is None:
        label_ids.append(-100)
      # 我们为每个单词的第一个标记设置标签。（这里一个单词多个subword的word_idx只有一个数）
      elif word_idx != previous_word_idx:
        label_ids.append(label[word_idx])
      # 对于单词中的其他标记，我们将标签设置为当前标签或-100，具体取决于label_all_tokens标志。
      else:
        label_ids.append(label[word_idx] if label_all_tokens else -100)
      #label_all_tokens = True时，其它子词添加和第一个子词一样的标签，否则全部设为-100
      previous_word_idx = word_idx

    pad_labels.append(label_ids)

  tokenized_inputs["labels"] = pad_labels
  return tokenized_inputs

#如果是中文分词，只需要使用word_ids去除特殊单词就行，比如对应标签设置为-100。

from datasets import Dataset
from transformers import AutoTokenizer
#这里一定要选AutoTokenizer，如果是BertTokenizer，会提示bertbase没有word_ids方法。结果没用到

trains_ds=Dataset.from_pandas(train_df)
val_ds=Dataset.from_pandas(val_df)
test_ds=Dataset.from_pandas(test_df)

tokenizer=AutoTokenizer.from_pretrained(config.roberta_model,do_lower_case=True)


tokenized_trains_ds=trains_ds.map(tokenize_and_align_labels,batched=True)
tokenized_val_ds=val_ds.map(tokenize_and_align_labels,batched=True)
tokenized_test_ds=test_ds.map(tokenize_and_align_labels,batched=True)

#加载模型
import torch
from transformers import AutoModelForTokenClassification

model = AutoModelForTokenClassification.from_pretrained('hfl/chinese-roberta-wwm-ext-large',num_labels=31)

device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

解读BertForTokenClassification任务头（说明代码，不需要运行）

计算loss时忽略padding部分（即只算attention_mask==1部分）步骤为：

sequence_output=outputs[0]#bert输出取第一维，即每个tokens的隐向量   
sequence_output=self.dropout(sequence_output)
logits=self.classifier(sequence_output)#经过线性变换，从torch.Size([3,52,1024])转为torch.Size([3,52,31])
loss_fct = CrossEntropyLoss()#交叉熵损失函数，自带softmax

if attention_mask is not None:
  #1.先取出mask矩阵压缩为一维，attention_mask==1转为一维真假矩阵。
  active_loss=attention_mask.view(-1)==1#torch.Size([156])，只有156个有效tokens。

  #2.labels压缩，再创建一个同形状的loss ignore_index矩阵
  active_logits=logits.view(-1,self.num_labels)#logits变成一维。即由torch.Size([3,52,31])变成torch.Size([156,31])

  #3.torch.where取出labels对应mask==1的部分，其余部分为loss忽略索引。即labels为（mask==1和忽略部分）
  active_labels=torch.where(active_loss,labels.view(-1),
              torch.tensor(loss_fct.ignore_index).type_as(labels))#torch.Size([156,31])，labels含有ignore_index。
  """torch.tensor(loss_fct.ignore_index).type_as(labels)就是一个全部为loss忽视索引，形状和labels一样的矩阵。
    torch.where的用法就是满足参数1的条件active_loss，就从参数2矩阵取值，否则从参数3矩阵取值"""
  #4.计算logtis和active_labels的loss。
  loss=loss_fct(active_logits,active_labels)
else:
  loss=loss_fct(logits.view(-1,self.num_labels),labels.view(-1))

"""然而，只有pad部分attention_mask==0,句子首尾部分的cls和sep还是计算loss的。本身这样处理还留有cls和sep。"""

label2id = {
    "O": 0,
    "B-address": 1,
    "B-book": 2,
    "B-company": 3,
    'B-game': 4,
    'B-government': 5,
    'B-movie': 6,
    'B-name': 7,
    'B-organization': 8,
    'B-position': 9,
    'B-scene': 10,
    "I-address": 11,
    "I-book": 12,
    "I-company": 13,
    'I-game': 14,
    'I-government': 15,
    'I-movie': 16,
    'I-name': 17,
    'I-organization': 18,
    'I-position': 19,
    'I-scene': 20,
    "S-address": 21,
    "S-book": 22,
    "S-company": 23,
    'S-game': 24,
    'S-government': 25,
    'S-movie': 26,
    'S-name': 27,
    'S-organization': 28,
    'S-position': 29,
    'S-scene': 30
}

label_list= [label for label,id in list(label2id.items())]
label_list

设置seqeval评测方法，需要注意以下几点

选择预测分类最大概率的下标
将数字下标转化为BIOS格式的label，因为seqeval除了总的指标，还可以查看各个类别的指标。如果只是数字，运行时会有异常提示（但正常运行）
忽略-100所在地方，即特殊tokens的位置
这步和token分类任务头合起来，就将pad部位和特殊tokens部分都忽略loss计算了。

from datasets import load_metric
metric=load_metric("seqeval")
import numpy as np

def compute_metrics(p):
  predictions,labels = p
  predictions = np.argmax(predictions,axis=2)

  # 去掉特殊字符处的值，不作比较。将label由数字转为ner标签31类。
  true_predictions = [
    [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
    for prediction, label in zip(predictions, labels)
  ]
  true_labels = [
    [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
    for prediction, label in zip(predictions, labels)
  ]

  results = metric.compute(predictions=true_predictions,references=true_labels)
  return {
      "precision": results["overall_precision"],
      "recall": results["overall_recall"],
      "f1": results["overall_f1"],
      "accuracy": results["overall_accuracy"],
  }

Downloading:   0%|          | 0.00/2.48k [00:00<?, ?B/s]

batch_size=32
metric_name="f1"
#数据整理器,将接收到的输入及标签进行动态填充。估计是不填充的话labels不齐，无法输入模型
from transformers import DataCollatorForTokenClassification
data_collator = DataCollatorForTokenClassification(tokenizer)

from transformers import TrainingArguments,Trainer
args=TrainingArguments(
  "bert_softmax",
  evaluation_strategy="epoch",
  #save_strategy="epoch",
  learning_rate=2e-5,
  per_device_train_batch_size=batch_size,
  per_device_eval_batch_size=batch_size,
  num_train_epochs=8,
  weight_decay=0.01,
  metric_for_best_model=metric_name#只是调用最好的模型,)

trainer=Trainer(model,args,
  train_dataset=tokenized_trains_ds,
  eval_dataset=tokenized_val_ds,
  data_collator=data_collator,
  tokenizer=tokenizer,
  compute_metrics=compute_metrics)

#进行训练
trainer.train()

The following columns in the training set  don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: words, labels0, __index_level_0__.
***** Running training *****
  Num examples = 10748
  Num Epochs = 8
  Instantaneous batch size per device = 32
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 2688




<div>

  <progress value='2688' max='2688' style='width:300px; height:20px; vertical-align: middle;'></progress>
  [2688/2688 1:34:02, Epoch 8/8]
</div>
<table border="1" class="dataframe">

Epoch Training Loss Validation Loss Precision Recall F1 Accuracy
1 No log 0.205783 0.684743 0.793294 0.735032 0.938082 2 0.302400 0.205716 0.716806 0.806641 0.759075 0.939276 3 0.117300 0.213287 0.736779 0.798177 0.766250 0.941265 4 0.117300 0.244457 0.735330 0.791341 0.762308 0.939952 5 0.056000 0.275058 0.743161 0.795898 0.768626 0.941146 6 0.031100 0.302491 0.738582 0.800130 0.768125 0.941663 7 0.031100 0.326065 0.739182 0.806315 0.771291 0.942957 8 0.015800 0.336456 0.741374 0.804362 0.771585 0.941882
</table>

The following columns in the evaluation set  don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: words, labels0.
***** Running Evaluation *****
  Num examples = 1343
  Batch size = 32
Saving model checkpoint to bert_softmax/checkpoint-336
Configuration saved in bert_softmax/checkpoint-336/config.json
Model weights saved in bert_softmax/checkpoint-336/pytorch_model.bin
tokenizer config file saved in bert_softmax/checkpoint-336/tokenizer_config.json
Special tokens file saved in bert_softmax/checkpoint-336/special_tokens_map.json
The following columns in the evaluation set  don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: words, labels0.
***** Running Evaluation *****
  Num examples = 1343
  Batch size = 32
Saving model checkpoint to bert_softmax/checkpoint-672
Configuration saved in bert_softmax/checkpoint-672/config.json
Model weights saved in bert_softmax/checkpoint-672/pytorch_model.bin
tokenizer config file saved in bert_softmax/checkpoint-672/tokenizer_config.json
Special tokens file saved in bert_softmax/checkpoint-672/special_tokens_map.json
The following columns in the evaluation set  don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: words, labels0.
***** Running Evaluation *****
  Num examples = 1343
  Batch size = 32
Saving model checkpoint to bert_softmax/checkpoint-1008
Configuration saved in bert_softmax/checkpoint-1008/config.json
Model weights saved in bert_softmax/checkpoint-1008/pytorch_model.bin
tokenizer config file saved in bert_softmax/checkpoint-1008/tokenizer_config.json
Special tokens file saved in bert_softmax/checkpoint-1008/special_tokens_map.json
The following columns in the evaluation set  don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: words, labels0.
***** Running Evaluation *****
  Num examples = 1343
  Batch size = 32
Saving model checkpoint to bert_softmax/checkpoint-1344
Configuration saved in bert_softmax/checkpoint-1344/config.json
Model weights saved in bert_softmax/checkpoint-1344/pytorch_model.bin
tokenizer config file saved in bert_softmax/checkpoint-1344/tokenizer_config.json
Special tokens file saved in bert_softmax/checkpoint-1344/special_tokens_map.json
The following columns in the evaluation set  don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: words, labels0.
***** Running Evaluation *****
  Num examples = 1343
  Batch size = 32
Saving model checkpoint to bert_softmax/checkpoint-1680
Configuration saved in bert_softmax/checkpoint-1680/config.json
Model weights saved in bert_softmax/checkpoint-1680/pytorch_model.bin
tokenizer config file saved in bert_softmax/checkpoint-1680/tokenizer_config.json
Special tokens file saved in bert_softmax/checkpoint-1680/special_tokens_map.json
The following columns in the evaluation set  don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: words, labels0.
***** Running Evaluation *****
  Num examples = 1343
  Batch size = 32
Saving model checkpoint to bert_softmax/checkpoint-2016
Configuration saved in bert_softmax/checkpoint-2016/config.json
Model weights saved in bert_softmax/checkpoint-2016/pytorch_model.bin
tokenizer config file saved in bert_softmax/checkpoint-2016/tokenizer_config.json
Special tokens file saved in bert_softmax/checkpoint-2016/special_tokens_map.json
The following columns in the evaluation set  don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: words, labels0.
***** Running Evaluation *****
  Num examples = 1343
  Batch size = 32
Saving model checkpoint to bert_softmax/checkpoint-2352
Configuration saved in bert_softmax/checkpoint-2352/config.json
Model weights saved in bert_softmax/checkpoint-2352/pytorch_model.bin
tokenizer config file saved in bert_softmax/checkpoint-2352/tokenizer_config.json
Special tokens file saved in bert_softmax/checkpoint-2352/special_tokens_map.json
The following columns in the evaluation set  don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: words, labels0.
***** Running Evaluation *****
  Num examples = 1343
  Batch size = 32
Saving model checkpoint to bert_softmax/checkpoint-2688
Configuration saved in bert_softmax/checkpoint-2688/config.json
Model weights saved in bert_softmax/checkpoint-2688/pytorch_model.bin
tokenizer config file saved in bert_softmax/checkpoint-2688/tokenizer_config.json
Special tokens file saved in bert_softmax/checkpoint-2688/special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)







TrainOutput(global_step=2688, training_loss=0.09796489925966376, metrics={'train_runtime': 5645.1208, 'train_samples_per_second': 15.232, 'train_steps_per_second': 0.476, 'total_flos': 8072824637823936.0, 'train_loss': 0.09796489925966376, 'epoch': 8.0})

如果想要得到单个类别的precision/recall/f1，我们直接将结果输入相同的评估函数即可：

#进行评估
trainer.evaluate()

import torch
torch.save(model.state_dict(),"./bert_softmax/bert_lstm_softmax_model")

predictions,labels,loss=trainer.predict(tokenized_val_ds)
predictions=np.argmax(predictions,axis=2)

# Remove ignored index (special tokens)
true_predictions = [
    [label_list[p] for (p,l) in zip(prediction, label) if l != -100]
    for prediction, label in zip(predictions, labels)
]
true_labels = [
    [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
    for prediction, label in zip(predictions, labels)
]

results=metric.compute(predictions=true_predictions,references=true_labels)
results

#将结果排序查看
result_df=pd.DataFrame(results)
result_df.stack().unstack(0).sort_values(by=['f1'])

	precision	recall	f1	number
address	0.556627	0.619303	0.586294	373.000000
scene	0.684211	0.746411	0.713959	209.000000
overall_precision	0.741374	0.741374	0.741374	0.741374
organization	0.713592	0.801090	0.754814	367.000000
book	0.743902	0.792208	0.767296	154.000000
overall_f1	0.771585	0.771585	0.771585	0.771585
position	0.753813	0.799076	0.775785	433.000000
company	0.752427	0.820106	0.784810	378.000000
government	0.738516	0.846154	0.788679	247.000000
overall_recall	0.804362	0.804362	0.804362	0.804362
game	0.808050	0.884746	0.844660	295.000000
movie	0.858108	0.841060	0.849498	151.000000
name	0.848671	0.892473	0.870021	465.000000
overall_accuracy	0.941882	0.941882	0.941882	0.941882

#预测验证集结果并对比标签
predictions,metrics,Loss=trainer.predict(tokenized_val_ds,metric_key_prefix="test")
pred=np.argmax(predictions,axis=2)#生成的结果是二维数组，所以需要用下一行进行转换。
preds=[x for x in pred]
val_df['preds']=pd.Series(preds)
val_df.to_csv('./bert_softmax/val_1220.csv')
val_df

The following columns in the test set  don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: words, labels0.
***** Running Prediction *****
  Num examples = 1343
  Batch size = 32

[42/42 31:38]

	words	labels0	preds
0	[彭, 小, 军, 认, 为, ，, 国, 内, 银, 行, 现, 在, 走, 的, 是, ...	[7, 17, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...	[0, 7, 17, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
1	[温, 格, 的, 球, 队, 终, 于, 又, 踢, 了, 一, 场, 经, 典, 的, ...	[7, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...	[0, 7, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
2	[突, 袭, 黑, 暗, 雅, 典, 娜, 》, 中, R, i, d, d, i, c, ...	[4, 14, 14, 14, 14, 14, 14, 14, 0, 7, 17, 17, ...	[0, 4, 14, 14, 14, 14, 14, 14, 14, 0, 7, 17, 1...
3	[郑, 阿, 姨, 就, 赶, 到, 文, 汇, 路, 排, 队, 拿, 钱, ，, 希, ...	[0, 0, 0, 0, 0, 0, 1, 11, 11, 0, 0, 0, 0, 0, 0...	[0, 0, 0, 0, 0, 0, 0, 1, 11, 11, 0, 0, 0, 0, 0...
4	[我, 想, 站, 在, 雪, 山, 脚, 下, 你, 会, 被, 那, 巍, 峨, 的, ...	[0, 0, 0, 0, 10, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0...	[0, 0, 0, 0, 0, 10, 20, 0, 0, 0, 0, 0, 0, 0, 0...
...	...	...	...
1338	[在, 这, 个, 非, 常, 喜, 庆, 的, 日, 子, 里, ，, 我, 们, 首, ...	[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...	[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
1339	[姜, 哲, 中, ：, 公, 共, 之, 敌, 1, -, 1, 》, 、, 《, 神, ...	[6, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16...	[0, 7, 17, 17, 16, 16, 16, 16, 16, 16, 16, 16,...
1340	[目, 前, ，, 日, 本, 松, 山, 海, 上, 保, 安, 部, 正, 在, 就, ...	[0, 0, 0, 5, 15, 15, 15, 15, 15, 15, 15, 15, 0...	[0, 0, 0, 0, 5, 15, 15, 15, 15, 15, 15, 15, 15...
1341	[也, 就, 是, 说, 英, 国, 人, 在, 世, 博, 会, 上, 的, 英, 国, ...	[0, 0, 0, 0, 0, 0, 0, 0, 10, 20, 20, 0, 0, 0, ...	[0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 18, 18, 0, 0, 0...
1342	[另, 外, 意, 大, 利, 的, P, l, a, y, G, e, n, e, r, ...	[0, 0, 0, 0, 0, 0, 2, 12, 12, 12, 12, 12, 12, ...	[0, 0, 0, 0, 0, 0, 0, 2, 12, 12, 12, 12, 12, 1...

1343 rows × 3 columns

#测试预测的结果，这部分不需要运行。
#预测出来的结果二维数组，不能直接转为Series。如果直接装进DataFrame，每个词是一列，一共52列
import pandas as pd
df=pd.Series(a)
print(pred)
print(df)

[[ 0  8  0 ...  0  0  0]
 [ 0  5 15 ... 15 15  0]
 [ 0  0  0 ...  0 14  0]
 ...
 [ 0  0  0 ... 12  0 12]
 [ 0  0  0 ... 16  0  0]
 [ 0  8 18 ...  0  0  0]]
0       [29, 12, 1, 1, 42, 1, 1, 42, 1, 1, 23, 14, 4, ...
1       [28, 13, 17, 17, 1, 1, 17, 13, 17, 7, 13, 14, ...
2       [3, 3, 18, 6, 6, 6, 6, 5, 6, 6, 6, 4, 16, 16, ...
3       [5, 13, 22, 45, 39, 45, 10, 10, 24, 40, 10, 14...
4       [32, 20, 14, 20, 14, 20, 14, 14, 41, 33, 20, 2...
                              ...                        
1340    [28, 43, 12, 24, 3, 31, 4, 31, 31, 24, 43, 32,...
1341    [22, 7, 33, 3, 10, 10, 46, 33, 10, 10, 23, 8, ...
1342    [26, 39, 18, 18, 45, 40, 18, 14, 18, 3, 3, 44,...
1343    [2, 23, 46, 46, 46, 40, 46, 40, 40, 10, 46, 24...
1344    [32, 10, 41, 33, 41, 34, 41, 41, 1, 41, 33, 13...
Length: 1345, dtype: object

#用trainer预测结果并保存
predictions,metrics,Loss=trainer.predict(tokenized_test_ds,metric_key_prefix="test")
pred=np.argmax(predictions,axis=1)
preds=preds=[x for x in pred]
pd.DataFrame({'label':preds}).to_csv('./bert_softmax/submit1220.csv',index=None)