1116 lines
85 KiB
Plaintext
1116 lines
85 KiB
Plaintext
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 1,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"'C:\\\\Users\\\\Monoid\\\\anaconda3\\\\envs\\\\nn\\\\python.exe'"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 1,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"import sys\n",
|
||
|
"sys.executable"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"파이썬 환경 확인.\n",
|
||
|
"envs\\\\nn\\\\python.exe 으로 끝나기를 기대합니다"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 2,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"from preprocessing import readPreporcssedDataAll\n",
|
||
|
"import torch\n",
|
||
|
"from torch.utils.data import Dataset, DataLoader\n",
|
||
|
"from dataset import make_collate_fn, DatasetArray\n",
|
||
|
"from transformers import BertTokenizer\n",
|
||
|
"import torch.nn as nn\n",
|
||
|
"from read_data import TagIdConverter"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 3,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"TAGS_PATH = \"eng_tags.json\"\n",
|
||
|
"DATASET_PATH = \"engpre\""
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"변수 설정"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 4,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"tagIdConverter = TagIdConverter(TAGS_PATH)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 5,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"PRETAINED_MODEL_NAME = 'bert-base-multilingual-cased'\n",
|
||
|
"tokenizer = BertTokenizer.from_pretrained(PRETAINED_MODEL_NAME)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"Tokenizer 로딩"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 6,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"from transformers import BertModel"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 7,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']\n",
|
||
|
"- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
|
||
|
"- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"PRETAINED_MODEL_NAME = 'bert-base-multilingual-cased'\n",
|
||
|
"bert = BertModel.from_pretrained(PRETAINED_MODEL_NAME)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"버트 로딩"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 8,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"class MyModel(nn.Module):\n",
|
||
|
" def __init__(self,output_feat: int,bert):\n",
|
||
|
" super().__init__()\n",
|
||
|
" self.bert = bert\n",
|
||
|
" self.dropout = nn.Dropout(p=0.1)\n",
|
||
|
" self.lin = nn.Linear(768,output_feat) #[batch_size,word_size,768] -> [batch_size,word_size,output_feat]\n",
|
||
|
" self.softmax = nn.Softmax(2) #[batch_size,word_size,output_feat] -> [batch_size,word_size,output_feat]\n",
|
||
|
" #0부터 시작해서 2 번째 차원에 softmax.\n",
|
||
|
"\n",
|
||
|
" def forward(self,**kargs):\n",
|
||
|
" emb = self.bert(**kargs)\n",
|
||
|
" e = self.dropout(emb['last_hidden_state'])\n",
|
||
|
" w = self.lin(e)\n",
|
||
|
" return w"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 9,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"MyModel(\n",
|
||
|
" (bert): BertModel(\n",
|
||
|
" (embeddings): BertEmbeddings(\n",
|
||
|
" (word_embeddings): Embedding(119547, 768, padding_idx=0)\n",
|
||
|
" (position_embeddings): Embedding(512, 768)\n",
|
||
|
" (token_type_embeddings): Embedding(2, 768)\n",
|
||
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" )\n",
|
||
|
" (encoder): BertEncoder(\n",
|
||
|
" (layer): ModuleList(\n",
|
||
|
" (0): BertLayer(\n",
|
||
|
" (attention): BertAttention(\n",
|
||
|
" (self): BertSelfAttention(\n",
|
||
|
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" )\n",
|
||
|
" (output): BertSelfOutput(\n",
|
||
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" )\n",
|
||
|
" )\n",
|
||
|
" (intermediate): BertIntermediate(\n",
|
||
|
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
||
|
" )\n",
|
||
|
" (output): BertOutput(\n",
|
||
|
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
||
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" )\n",
|
||
|
" )\n",
|
||
|
" (1): BertLayer(\n",
|
||
|
" (attention): BertAttention(\n",
|
||
|
" (self): BertSelfAttention(\n",
|
||
|
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" )\n",
|
||
|
" (output): BertSelfOutput(\n",
|
||
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" )\n",
|
||
|
" )\n",
|
||
|
" (intermediate): BertIntermediate(\n",
|
||
|
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
||
|
" )\n",
|
||
|
" (output): BertOutput(\n",
|
||
|
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
||
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" )\n",
|
||
|
" )\n",
|
||
|
" (2): BertLayer(\n",
|
||
|
" (attention): BertAttention(\n",
|
||
|
" (self): BertSelfAttention(\n",
|
||
|
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" )\n",
|
||
|
" (output): BertSelfOutput(\n",
|
||
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" )\n",
|
||
|
" )\n",
|
||
|
" (intermediate): BertIntermediate(\n",
|
||
|
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
||
|
" )\n",
|
||
|
" (output): BertOutput(\n",
|
||
|
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
||
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" )\n",
|
||
|
" )\n",
|
||
|
" (3): BertLayer(\n",
|
||
|
" (attention): BertAttention(\n",
|
||
|
" (self): BertSelfAttention(\n",
|
||
|
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" )\n",
|
||
|
" (output): BertSelfOutput(\n",
|
||
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" )\n",
|
||
|
" )\n",
|
||
|
" (intermediate): BertIntermediate(\n",
|
||
|
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
||
|
" )\n",
|
||
|
" (output): BertOutput(\n",
|
||
|
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
||
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" )\n",
|
||
|
" )\n",
|
||
|
" (4): BertLayer(\n",
|
||
|
" (attention): BertAttention(\n",
|
||
|
" (self): BertSelfAttention(\n",
|
||
|
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" )\n",
|
||
|
" (output): BertSelfOutput(\n",
|
||
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" )\n",
|
||
|
" )\n",
|
||
|
" (intermediate): BertIntermediate(\n",
|
||
|
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
||
|
" )\n",
|
||
|
" (output): BertOutput(\n",
|
||
|
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
||
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" )\n",
|
||
|
" )\n",
|
||
|
" (5): BertLayer(\n",
|
||
|
" (attention): BertAttention(\n",
|
||
|
" (self): BertSelfAttention(\n",
|
||
|
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" )\n",
|
||
|
" (output): BertSelfOutput(\n",
|
||
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" )\n",
|
||
|
" )\n",
|
||
|
" (intermediate): BertIntermediate(\n",
|
||
|
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
||
|
" )\n",
|
||
|
" (output): BertOutput(\n",
|
||
|
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
||
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" )\n",
|
||
|
" )\n",
|
||
|
" (6): BertLayer(\n",
|
||
|
" (attention): BertAttention(\n",
|
||
|
" (self): BertSelfAttention(\n",
|
||
|
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" )\n",
|
||
|
" (output): BertSelfOutput(\n",
|
||
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" )\n",
|
||
|
" )\n",
|
||
|
" (intermediate): BertIntermediate(\n",
|
||
|
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
||
|
" )\n",
|
||
|
" (output): BertOutput(\n",
|
||
|
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
||
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" )\n",
|
||
|
" )\n",
|
||
|
" (7): BertLayer(\n",
|
||
|
" (attention): BertAttention(\n",
|
||
|
" (self): BertSelfAttention(\n",
|
||
|
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" )\n",
|
||
|
" (output): BertSelfOutput(\n",
|
||
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" )\n",
|
||
|
" )\n",
|
||
|
" (intermediate): BertIntermediate(\n",
|
||
|
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
||
|
" )\n",
|
||
|
" (output): BertOutput(\n",
|
||
|
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
||
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" )\n",
|
||
|
" )\n",
|
||
|
" (8): BertLayer(\n",
|
||
|
" (attention): BertAttention(\n",
|
||
|
" (self): BertSelfAttention(\n",
|
||
|
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" )\n",
|
||
|
" (output): BertSelfOutput(\n",
|
||
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" )\n",
|
||
|
" )\n",
|
||
|
" (intermediate): BertIntermediate(\n",
|
||
|
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
||
|
" )\n",
|
||
|
" (output): BertOutput(\n",
|
||
|
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
||
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" )\n",
|
||
|
" )\n",
|
||
|
" (9): BertLayer(\n",
|
||
|
" (attention): BertAttention(\n",
|
||
|
" (self): BertSelfAttention(\n",
|
||
|
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" )\n",
|
||
|
" (output): BertSelfOutput(\n",
|
||
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" )\n",
|
||
|
" )\n",
|
||
|
" (intermediate): BertIntermediate(\n",
|
||
|
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
||
|
" )\n",
|
||
|
" (output): BertOutput(\n",
|
||
|
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
||
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" )\n",
|
||
|
" )\n",
|
||
|
" (10): BertLayer(\n",
|
||
|
" (attention): BertAttention(\n",
|
||
|
" (self): BertSelfAttention(\n",
|
||
|
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" )\n",
|
||
|
" (output): BertSelfOutput(\n",
|
||
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" )\n",
|
||
|
" )\n",
|
||
|
" (intermediate): BertIntermediate(\n",
|
||
|
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
||
|
" )\n",
|
||
|
" (output): BertOutput(\n",
|
||
|
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
||
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" )\n",
|
||
|
" )\n",
|
||
|
" (11): BertLayer(\n",
|
||
|
" (attention): BertAttention(\n",
|
||
|
" (self): BertSelfAttention(\n",
|
||
|
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" )\n",
|
||
|
" (output): BertSelfOutput(\n",
|
||
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" )\n",
|
||
|
" )\n",
|
||
|
" (intermediate): BertIntermediate(\n",
|
||
|
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
|
||
|
" )\n",
|
||
|
" (output): BertOutput(\n",
|
||
|
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
|
||
|
" (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" )\n",
|
||
|
" )\n",
|
||
|
" )\n",
|
||
|
" )\n",
|
||
|
" (pooler): BertPooler(\n",
|
||
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
||
|
" (activation): Tanh()\n",
|
||
|
" )\n",
|
||
|
" )\n",
|
||
|
" (dropout): Dropout(p=0.1, inplace=False)\n",
|
||
|
" (lin): Linear(in_features=768, out_features=10, bias=True)\n",
|
||
|
" (softmax): Softmax(dim=2)\n",
|
||
|
")\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"model = MyModel(tagIdConverter.size,bert)\n",
|
||
|
"model.cuda()\n",
|
||
|
"bert.cuda()\n",
|
||
|
"print(model)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"`tagIdConverter.size` 만큼의 종류가 있음"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 10,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"bert current device : cuda:0\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"print(\"bert current device :\",bert.device)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 11,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"device(type='cuda')"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 11,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"device = torch.device(\"cuda\")\n",
|
||
|
"device"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 12,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"datasetTrain, datasetDev, datasetTest = readPreporcssedDataAll(DATASET_PATH)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 15,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"my_collate_fn = make_collate_fn(tokenizer, tagIdConverter)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 16,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"BATCH_SIZE = 4\n",
|
||
|
"train_loader = DataLoader(\n",
|
||
|
" DatasetArray(datasetTrain),\n",
|
||
|
" batch_size=BATCH_SIZE,\n",
|
||
|
" shuffle=True,\n",
|
||
|
" collate_fn=my_collate_fn\n",
|
||
|
")\n",
|
||
|
"dev_loader = DataLoader(\n",
|
||
|
" DatasetArray(datasetDev),\n",
|
||
|
" batch_size=BATCH_SIZE,\n",
|
||
|
" shuffle=True,\n",
|
||
|
" collate_fn=my_collate_fn\n",
|
||
|
")\n",
|
||
|
"test_loader = DataLoader(\n",
|
||
|
" DatasetArray(datasetTest),\n",
|
||
|
" batch_size=BATCH_SIZE,\n",
|
||
|
" shuffle=True,\n",
|
||
|
" collate_fn=my_collate_fn\n",
|
||
|
")"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 18,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"from tqdm import tqdm"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 19,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"100%|████████████████████████████████████████████████████████████████████████| 14041/14041 [00:00<00:00, 468033.78it/s]"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"212812/272841 = 0.7799854127495501\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"total_l = 0\n",
|
||
|
"total_o = 0\n",
|
||
|
"\n",
|
||
|
"for item in tqdm(datasetTrain):\n",
|
||
|
" entities = item[\"entity\"]\n",
|
||
|
" l = len(entities)\n",
|
||
|
" o = sum(map(lambda x: 1 if x == \"O\" else 0,entities))\n",
|
||
|
" total_l += l\n",
|
||
|
" total_o += o\n",
|
||
|
"\n",
|
||
|
"print(f\"{total_o}/{total_l} = {total_o/total_l}\")"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"O token 이 77%를 차지함."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 20,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"from torch.optim import AdamW"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 21,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"optimizer = AdamW(model.parameters(), lr=1.0e-5)\n",
|
||
|
"CELoss = nn.CrossEntropyLoss(ignore_index=tagIdConverter.pad_id)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 22,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"from groupby_index import groupby_index"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 23,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"epoch 0 start:\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Epoch 0: 100%|████████████████████████████████████| 3511/3511 [02:21<00:00, 24.82minibatch/s, accuracy=0.954, loss=1.2]\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"epoch 1 start:\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Epoch 1: 100%|██████████████████████████████████| 3511/3511 [02:20<00:00, 24.97minibatch/s, accuracy=0.986, loss=0.288]\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"epoch 2 start:\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Epoch 2: 100%|██████████████████████████████████| 3511/3511 [02:22<00:00, 24.65minibatch/s, accuracy=0.995, loss=0.192]\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"epoch 3 start:\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Epoch 3: 100%|███████████████████████████████████| 3511/3511 [02:25<00:00, 24.20minibatch/s, accuracy=0.99, loss=0.313]\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"epoch 4 start:\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Epoch 4: 100%|██████████████████████████████████| 3511/3511 [02:23<00:00, 24.47minibatch/s, accuracy=0.988, loss=0.345]\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"TRAIN_EPOCH = 5\n",
|
||
|
"\n",
|
||
|
"result = []\n",
|
||
|
"iteration = 0\n",
|
||
|
"\n",
|
||
|
"t = []\n",
|
||
|
"\n",
|
||
|
"model.zero_grad()\n",
|
||
|
"\n",
|
||
|
"for epoch in range(TRAIN_EPOCH):\n",
|
||
|
" model.train()\n",
|
||
|
" print(f\"epoch {epoch} start:\")\n",
|
||
|
" with tqdm(train_loader, unit=\"minibatch\") as tepoch:\n",
|
||
|
" tepoch.set_description(f\"Epoch {epoch}\")\n",
|
||
|
" \n",
|
||
|
" for batch in groupby_index(tepoch,8):\n",
|
||
|
" corrects = 0\n",
|
||
|
" totals = 0\n",
|
||
|
" losses = 0\n",
|
||
|
" \n",
|
||
|
" optimizer.zero_grad()\n",
|
||
|
" for mini_i,mini_l in batch:\n",
|
||
|
" batch_inputs = {k: v.cuda(device) for k, v in list(mini_i.items())}\n",
|
||
|
" batch_labels = mini_l.cuda(device)\n",
|
||
|
" attention_mask = batch_inputs[\"attention_mask\"]\n",
|
||
|
" \n",
|
||
|
" output = model(**batch_inputs)\n",
|
||
|
" loss = CELoss(output.view(-1, output.size(-1)), batch_labels.view(-1))\n",
|
||
|
" \n",
|
||
|
" prediction = output.view(-1, output.size(-1)).argmax(dim=-1)\n",
|
||
|
" corrects += ((prediction == batch_labels.view(-1)) * attention_mask.view(-1)).sum().item()\n",
|
||
|
" totals += attention_mask.view(-1).sum().item()\n",
|
||
|
" losses += loss.item()\n",
|
||
|
" loss.backward()\n",
|
||
|
"\n",
|
||
|
" optimizer.step()\n",
|
||
|
" accuracy = corrects / totals\n",
|
||
|
" result.append({\"iter\":iteration,\"loss\":losses,\"accuracy\":accuracy})\n",
|
||
|
" tepoch.set_postfix(loss=losses, accuracy= accuracy)\n",
|
||
|
" iteration += 1"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 42,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"del batch_inputs\n",
|
||
|
"del batch_labels\n",
|
||
|
"del loss\n",
|
||
|
"del optimizer"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 43,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"gpu allocated : 1355 MB\n",
|
||
|
"gpu reserved : 1470MB\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"torch.cuda.empty_cache()\n",
|
||
|
"print(f\"gpu allocated : {torch.cuda.memory_allocated() // 1024**2} MB\")\n",
|
||
|
"print(f\"gpu reserved : {torch.cuda.memory_reserved() // 1024 ** 2}MB\")"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 24,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"%matplotlib inline"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 25,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"import numpy as np\n",
|
||
|
"import matplotlib.pyplot as plt"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 26,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZEAAAD4CAYAAAAtrdtxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAAsTAAALEwEAmpwYAAA8bUlEQVR4nO3dd5hU1fnA8e/LLktZqiKCgAoIiooiIqJGg0YUiYnYIpiosSEqsUQssQGW2CJWFFEQ9BchFlQCWAgWUHrvyIIIC0ivC+zu7L6/P87MTtnpW5l9P89zn7nl3HPP3J2dd845954rqooxxhiTjGoVXQBjjDGHLgsixhhjkmZBxBhjTNIsiBhjjEmaBRFjjDFJS6/oAoRTrVo1rVWrVkUXwxhjDhn79+9XVS33ikGlDCK1atUiJyenoothjDGHDBE5UBHHteYsY4wxSbMgYowxJmkWRIwxxiQtZp+IiIwALgW2qOrJ3nX/AY73JmkA7FLVDmH2XQvsBQoAj6p2KpVSG2OMqRTi6VgfCbwOvOdboarX+OZF5EVgd5T9z1fVbckW0BhjTOUVszlLVacAO8JtExEB/gSMLuVyGWOMKWUiMkJEtojIkgjbRUReFZEsEVkkIh1j5VnSPpFzgc2quirCdgW+FpG5ItInWkYi0kdE5ojIHI/HU8JiGWOMCWMk0D3K9kuANt6pD/BmrAxLep9Ib6LXQs5R1Y0i0hiYJCIrvDWbYlR1GDAMIDMzM6nx6Z/8/kk6N+vMxcddnMzuld+kSdCqFbRuDYWFsGIFbNoEv/tdyfNetQrWrfPn5XtEgEhwuk2bYPp0uOIKKCiAvDyoVQu++gratoWWLf3pZs2Cyy6DffsgM9PltX+/Sx+Y7+LFsHMnnHeeW96wAebNgz/8weWfkeG2Z2ZC9epu36+/dueiSROoU8edj5Ur3Tm59FK3nJHhXrduhZ9+gjVroGlT6NYNqnl/P33zjcujRQv46CM4+2yYMwd69YJ0779HYSF4PC6/ggIYNgwuuQSOPhpGjIC//AVq1oQDB9w+1av7z+F778FVV7kyi7j3Di7tzp3w3/9Cnz6wezd8/jlcf71Ll5Pj9q9Tx3+e3nwTXnkF/vpXd8xhw1w+2dnuXDz5JPz4I6SlwfLlcPXV7j3VrQudOkGzZu691akD7du7czB+vDvuKafAnXfCwIFw8CD06OH+Hjt2wIwZsHChO0crV0K/fvD9966c/fu79zBnjjsfGRnw29/Cu+/Crl3uczJ2LEyb5tJfdx00aODKM2QIfPstNG4MXbrAli0wYAA895wr45//DDNnunOxY4crkwg0bAiLFrl169bBE0+4c9C0qfs7HXGEO1d5edCokTvnCxa4srz0kvt8LV3qjpmX5z6zr77q3veYMe7z+Le/wfPPu3Kcey4MGgRZWVC7NuzZ485tz57ufU6YAI8+Cscc48779u2ubCNHwosvuvTXXefWf/SRy//aa915ql4dhg6FJUvgpJNg71544IES/jNHpqpTROTYKEkuA95T94yQGSLSQESaquqmaJnGnIBjgSUh69KBzUDzOPMYCPSPJ23t2rU1GbWfrq39v+qf1L5latUq1by8xPa59lrVzEw3v3ev6u7dqu5rpfi0bp1/vwMH3Lrf/MbNd+umOnSo6rx5qtu3hz/Wxo3+vHx8yxMnqv76q+rw4apdu/rXn3GGf37yZP98QYHb//jj3fKnn7rXZ55RnT3bzR92mCvf2rXBx7r9dtUePfzLw4YVf6+PPhq8D6j+8Y+Rz0206bvvYqe55Rb//HnnBW+rVi34vIFqy5aqP/+sevfd/m2NGwfvV7Nm5ON98YXq9df7lw8eVP3gA9WZM5N7jxUxBZ4zmxKfkgTkAnMCpj6q8X2fB2wbD/wmYHky0Clc2qI00TZGOyiuSvR9lH0ygboB89OA7vEcL9kgkvl0pv79y78ntW9C9u1THTzY/4Xp4/G4bYGys91pvvde/7rvvlO96SbVwsLw+e/f7/9A/fRT/B++996LneaSS9zrPfeorl6tet11xdNcdFHF/yPZ5KYrr6z4MthUvlOSgBzV5L7PA7ZNCBNETo+aXxwHHA1sAvKBbOBm7/qRQN+QtEcBE73zrYCF3mkp8Eg8b1A1+SBS95919d4v742dMBm7d6v26qV6wgmq7du7U/fpp8FpbrvNrV+7VvWJJ1TXrw/+cGzY4NK1aOGWv/rK1RBCNW3q32fgwIr/UNtk06EyHXVU6eQTWnsszylJpRRE3gJ6ByyvBJpGzS+eg5b3lGwQqfdMPb37i7uT2jemcH/sMWNUP/rIBY0NG4pvP/LI4uvGj1c97rjiHxrf/JNPVtyH16ZDa/rrXyu+DLGmv/zFNVOW5TE6dFBdsUJ1wQL3vzR4cPE0gc2k8Uyqqt98419+7TX//BlnqB5xRPx5tW2revjh8aV94okSfEWVShD5PfAFIEAXYFbM/OI5aHlPyQaRBs820Lsm3pXUvqqq+q9/qS5dGn5buD94t25l+89hU/lMZ55Z8WWIZxo9WvXBB/3LkT6Xvikjw+1z882x887J8c8HfmGCalpaYuWcN8/f3+XTu7d/+333ub6tkpyLvDzV3Nzg/sBACxeqvvSSP/24ceHzCdc/1b27P5+xY/19ibfeqvrWW/5tdepEL+PVV/vTHjyoWqNG+HS/+517/c1vEvq6Kv4VFTuIhGtZAvr6WpW8wWMIsBpYHKs/RL2fxEo3JRtEGj7bUPtN6JfUvpqf705HvXpueckS1Z073fzrr5fsA29T8tPJJ0feNmlS7P1nzIidprAw8rZdu9xro0aqtWu7+TZtVB97rHjawC/K0On77/3zoR304aaCguDlgQPdZzGwrKrhj9+9u3tt186lCe3ofvVV1SuuUL3wwuC8fF9yqqpHH+3mFy1yZRk/3i2/9prqaaf597vjDv/8DTe4L8RImjRx6UaNcvn69rviisjnIbB/MPRvFg9f+gULwufz97+rrlnj+hNvuSX+fFVV9+xx3xFDhrhWh88/D877r38tvs8nnwSnOXDArS8oKN7HmqB4ayKlPZX7AeOZkg0ihz93uN454c6k9i36JZae7pZBtWFD1R9+iP0PX9mmdu3KNv9ITQPxfDkmMtWsqbp1q3/Zd5ECqP7zn+7v9OyzwfusWeNe+wdcpRftgoM2bVyakSP96z780D+/f7/qL7+obtumunmzu9DBZ+tW/9Vw4P/chJtmzXKvHTu6dDt3uvMY7sKG0Lx69Qr+gglMs2+fq2n07u2Wd+92P4heftlfG/B9ufn6CyZMiJzX3r1ufvNm99n3KSxUnTvX/yWbkeH2y81Vbd7czUeqFQSerzvvdL/K58/3H3vIkPDNuIMGBQfN4cNdmf7xj+jHCeTbN/B44H4kZma6QFCaJkzwN9+98ELx7bm5/jJceWWpHtqCSMCUbBBp9HwjvX387Ynv+OWX/j9sRoZbV5ZfwmU9rV1bNpdZTpvmmiL27Am//fnn3cUGM2e6ZgDf+gED/PP9+8d/PN+vtMBlcMHK43HbQpspVFWXLXNfpIF8gaFDB9Ubb/Sn9wUjVfeFtWWLm2/Z0m0PzSecwGNv2aL6wAPF34vv8mZfEPHZtat4Debyy4Pzfe65yMeL18GDLsBed52bL0leqq7J6uGH3Tm76iqXx7Zt8e8/bZr/2MuWuXVr1qg+/rgW+4L1/b3eeSfxcq5Z465CXLfOf7wuXRLPJ1E//hi+ZhFYwxw8uFQPaUEkYEo2iDR+obH2/W/fxHcM/AeuWbP4ukNh8jXHBX4hrF2r2qpVYvn07Bl5m09eXvD6Cy5wry++WPzcbtrkXn3t7PPmuWaojz8Of4wlS1Rr1XLBKvTvU1BQvLnB9wPgkkui/419zQg9e7pf1OGCSKD1613giceAAe5XcqB4g4hq8aB8003BefgCZmDeDRvGV7ZYkg0igXJyVOfMSWyfX391xx05Mnj9Rx9psSCyapUL/okEqXBWr3YBNPR8ljffOZ87t5SzrZggUimfbJgsQSjUwsR2+uWXkEzE3Slb0e68093RG2/a9DB/ymOOcXdwp6f779D2eeQ
|
||
|
"text/plain": [
|
||
|
"<Figure size 432x288 with 2 Axes>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {
|
||
|
"needs_background": "light"
|
||
|
},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"iters = [item[\"iter\"] for item in result]\n",
|
||
|
"fig, ax1 = plt.subplots()\n",
|
||
|
"ax1.plot(iters,[item[\"loss\"] for item in result],'g')\n",
|
||
|
"ax2 = ax1.twinx()\n",
|
||
|
"ax2.plot(iters,[item[\"accuracy\"] for item in result],'r')\n",
|
||
|
"plt.xlabel(\"iter\")\n",
|
||
|
"plt.show()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 27,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"100%|█████████████████████████████████████████████████████████████████████████████| 864/864 [00:10<00:00, 83.69batch/s]\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"model.eval()\n",
|
||
|
"collect_list = []\n",
|
||
|
"with torch.no_grad():\n",
|
||
|
" with tqdm(test_loader, unit=\"batch\") as tepoch:\n",
|
||
|
" for batch_i,batch_l in tepoch:\n",
|
||
|
" batch_inputs = {k: v.cuda(device) for k, v in list(batch_i.items())}\n",
|
||
|
" batch_labels = batch_l.cuda(device)\n",
|
||
|
" output = model(**batch_inputs)\n",
|
||
|
" loss = CELoss(output.view(-1, output.size(-1)), batch_labels.view(-1))\n",
|
||
|
" \n",
|
||
|
" prediction = output.view(-1, output.size(-1)).argmax(dim=-1)\n",
|
||
|
" correct = (prediction == batch_labels.view(-1)).sum().item()\n",
|
||
|
" accuracy = correct / batch_inputs[\"attention_mask\"].view(-1).sum()\n",
|
||
|
" \n",
|
||
|
" collect_list.append({\"loss\":loss.item(),\"accuracy\":accuracy, \"batch_size\":batch_labels.size(0),\n",
|
||
|
" \"predict\":output.argmax(dim=-1).cpu(),\n",
|
||
|
" \"actual\":batch_labels.cpu(),\n",
|
||
|
" \"attention_mask\":batch_inputs[\"attention_mask\"].cpu()})"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 31,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"def getConfusionMatrix(predict,actual,attention_mask):\n",
|
||
|
" ret = torch.zeros((tagIdConverter.size,tagIdConverter.size),dtype=torch.long)\n",
|
||
|
" for i,(p_s,a_s) in enumerate(zip(predict,actual)):\n",
|
||
|
" for j,(p,a) in enumerate(zip(p_s,a_s)):\n",
|
||
|
" ret[p,a] += attention_mask[i,j]\n",
|
||
|
" return ret"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 33,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"average_loss : 0.13179491325355058, average_accuracy : 0.9739284515380859, size :3453\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"total_loss = 0\n",
|
||
|
"total_accuracy = 0\n",
|
||
|
"total_size = 0\n",
|
||
|
"confusion = torch.zeros((tagIdConverter.size,tagIdConverter.size),dtype=torch.long)\n",
|
||
|
"\n",
|
||
|
"for item in collect_list:\n",
|
||
|
" batch_size = item[\"batch_size\"]\n",
|
||
|
" total_loss += batch_size * item[\"loss\"]\n",
|
||
|
" total_accuracy += batch_size * item[\"accuracy\"]\n",
|
||
|
" total_size += batch_size\n",
|
||
|
" confusion += getConfusionMatrix(item[\"predict\"],item[\"actual\"],item[\"attention_mask\"])\n",
|
||
|
"print(f\"\"\"average_loss : {total_loss/total_size}, average_accuracy : {total_accuracy/total_size}, size :{total_size}\"\"\")"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 34,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"tensor([[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n",
|
||
|
" [ 0, 1566, 19, 78, 19, 1, 3, 1, 0, 22],\n",
|
||
|
" [ 0, 30, 599, 53, 3, 1, 11, 1, 1, 76],\n",
|
||
|
" [ 0, 48, 32, 1479, 21, 2, 1, 3, 0, 53],\n",
|
||
|
" [ 0, 7, 14, 17, 1557, 0, 6, 1, 2, 27],\n",
|
||
|
" [ 0, 1, 0, 0, 0, 1510, 47, 101, 26, 45],\n",
|
||
|
" [ 0, 2, 8, 0, 0, 39, 583, 54, 4, 298],\n",
|
||
|
" [ 0, 7, 2, 9, 0, 39, 63, 2560, 32, 122],\n",
|
||
|
" [ 0, 0, 0, 1, 8, 11, 38, 29, 3355, 33],\n",
|
||
|
" [ 0, 7, 28, 24, 9, 25, 86, 45, 15, 55141]])"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 34,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"confusion"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 35,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"confusion = confusion[0:(tagIdConverter.size - 1)][0:(tagIdConverter.size - 1)]"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 36,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAUkAAAEICAYAAADSjgZhAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAAsTAAALEwEAmpwYAABO9UlEQVR4nO2ddVxW1x/H319AsKdOVAQ7QDFo7Nmzu2fNmv5Wrjvc5ow5NzvmQqezpptdsxuxZ7eiGBhTQKXO74/nAVGJB3gu5Xn7ui/uc+653++5l8cvpz+ilEKj0Wg0CWOT0QXQaDSazIwOkhqNRpMEOkhqNBpNEuggqdFoNEmgg6RGo9EkgQ6SGo1GkwQ6SGYDxMSvInJbRALSYKeuiJywZtkyChEpKSKhImKb0WXRZG1Ez5PM+ohIXWAu4KqUCsvo8hiNiJwHBiil/snosmiyP7ommT0oBZx/FgKkJYiIXUaXQZN90EEynRGREiKyWERuiMhNEZloTrcRkU9F5IKIXBeRWSLynPlaaRFRItJHRC6KSIiIfGK+1h+YAdQ0Ny+HiUhfEdn2hF8lIuXN5y1E5KiI3BORyyLyrjm9vogExbunkohsEpE7InJERNrEu/abiEwSkRVmO7tFpFwizxxb/pdF5JK5W2CwiPiKyCGz/Ynx8pcTkQ3m9xMiInNEpID52u9ASWCZ+Xnfj2e/v4hcBDbES7MTkUIiEiQirc028orIaRHpndbfp+YZQCmlj3Q6AFvgIPADkAfICdQxX+sHnAbKAnmBxcDv5mulAQX8BOQCqgMPgUrm632BbfH8PPbZnKaA8ubzYKCu+bwg4GU+rw8Emc9zmMvzMWAPNATuYWrSA/wG3AL8ADtgDjAvkeeOLf9U8zM3BR4AfwNFAGfgOvCCOX95oAngADgCW4Af49k7DzROwP4s83vNFS/NzpynKXDV7O8n4M+M/j7oI2scuiaZvvgBxYH3lFJhSqkHSqnYGt9LwFil1FmlVCjwEdDtiabjMKXUfaXUQUzBtnoqyxEJVBaR/Eqp20qpfQnkqYEpWI9USkUopTYAy4Hu8fIsVkoFKKWiMAVJj2T8fm1+5rVAGDBXKXVdKXUZ2Ap4AiilTiul1imlHiqlbgBjgRcseK4vze/1/pMXzD4XAuuBlsArFtjTaHSQTGdKABfMQeVJigMX4n2+gKmGVjRe2tV45+GYglhq6Ai0AC6IyGYRqZlIeS4ppWKeKJNzGspzLd75/QQ+5wUQkSIiMs/cFXAXmA0UTsY2wKVkrk8HqgC/KqVuWmBPo9FBMp25BJRMZGDhCqYBmFhKAlE8HkgsJQzIHftBRIrFv6iU2qOUaoup6fk3sCCR8pQQkfjfkZLA5VSUJ6WMwNRUrqaUyg/0BCTe9cSmZCQ6VcM8FWgapib5kNj+WY0mOXSQTF8CMPUHjhSRPCKSU0Rqm6/NBd4SkTIikhf4FpifSK0zOQ4C7iLiISI5gS9jL4iIvYi8JCLPKaUigbtAdAI2dmMKtu+LSA4RqQ+0BualojwpJR8QCtwREWfgvSeuX8PUd5sSPjb/7AeMAWbpOZQaS9BBMh1RSkVjCjTlgYtAENDVfPkX4HdMgxTnMA1svJ5KPyeBr4B/gFPAtiey9ALOm5uygzHV1J60EQG0AZoDIcBkoLdS6nhqypRChgFewH/ACkyDWPEZAXxqHhV/NzljIuINvI2p/NHAKEy1zg+tWmpNtkRPJtdoNJok0DVJjUajSQIdJDUajSYJdJDUaDSaJNBBUqPRaJLAkI0AChcurEqVKm2EaY1Gkw5cuHCekJAQST5n4tjmL6VU1FOLnxJE3b+xRinVLC3+jMKQIFmqVGm27w40wrRGo0kHavv7pNmGinqAg1s3i/I+2D/BkhVVGYLeUkqj0RiDAJKmymimQAdJjUZjHJL1hz10kNRoNMaRDWqSGRrm165ZTTV3V9zdyvPd6JHafgb40Pazt/308pEwAja2lh2ZGSM2qfTy8lb3I1WSR+iDKFWmbFl19MQZ9V/YQ1W1ajW17+CRZO+z9Mjq9rPDM2j7Wfc75OXlrdIaByRPUZXT/32LDiAwozfXzXSb7u4JCKBcufKUKVsWe3t7OnftxvJlS7T9dPSh7Wdv++nlI3HE1Ny25MjEZFiQvHLlMi4uJeI+Ozu7cPmy9bYqzOr208OHtp+97aeXjyQRG8uOTIxFpRORZiJywiyeZJXtpRLafUis+Bclq9tPDx/afva2n14+kuRZqEmaNyadhGlfwcpAdxGpnFbHzs4uBAU92m3/8uUgihcvnlaz2cZ+evjQ9rO3/fTykTjyzNQk/YDTZoGqCEw7U7dNq2MfX19Onz7F+XPniIiIYOH8ebRs1Sb5G58R++nhQ9vP3vbTy0eiCNlidNuSeZLOPC6wFAT4P5lJRAYBgwBKlCyZvGM7O34YN5HWLV8kOjqaPn37Udnd3bJSW0BWt58ePrT97G0/vXwkjmT6WqIlJLszuYh0Bl5USg0wf+4F+CmlEpUW8Pb2UXrttkaTdant78PevYFp6iy0yeesHHwGW5T3wabP9yql0r5g3AAsqUkGYZJCjcUFk5KeRqPRJI6QLWqSljzBHqCCWcXPHugGLDW2WBqNJluQDUa3k61JKqWiROQ1YA1gC/yilDpieMk0Gk0WRzL9oIwlWLTBhVJqJbDS4LJoNJrsRjZobutdgDQajTFkgaa0JeggqdFojCMb1CSz/hNoNJrMi5UGbkQkp4gEiMhBETkiIsPM6YVEZJ2InDL/LBjvno/MS6lPiMiL8dK9ReSw+dp4SWadpg6SGo3GIKy6LPEh0FApVR3wAJqJSA3gQ2C9UqoCsN78GfPS6W6AO9AMmGxeYg0wBdPClwrmI0kBMh0kNRqNMVhxWaIyEWr+mMN8KExLpGea02cC7cznbYF5SqmHSqlzwGnAT0ScgPxKqZ3KtJJmVrx7EkQHSY1GYxApqkkWFpHAeMegp6yJ2IrIAeA6sE4ptRsoqpQKBjD/LGLOntByamfzEZRAeqJkyYGbYWtPGGr/8yYVDbUPEB2T9HLQtGJrY+yoYrput6VJkOSWFKfJtrUMWf49CUluWaJSKhrwEJECwF8iUiUpzwmZSCI9UXRNUqPRGIcBW6Uppe4AmzD1JV4zN6Ex/7xuzpbYcuog8/mT6Ymig6RGozEO641uO5prkIhILqAxcBzTEuk+5mx9gFhtiqVANxFxEJEymAZoAsxN8nsiUsM8qt073j0JkiWb2xqNJgsgVt0qzQmYaR6htgEWKKWWi8hOYIGI9AcuAp0BlFJHRGQBcBSIAl41N9cBhgC/AbmAVeYjUXSQ1Gg0hiE21gmSSqlDgGcC6TeBRoncMxwYnkB6IJBUf+ZjZEnd7RU/fsz4HrWY8b/WcWlb50xgYu96/PJaO355rR1n9myOu3b93AlmvdOVGUNa8fP/WhMV8RCA6MgIVo3/jGkDX2T6K805vn1Nsr5fGdiPUs5F8fGoGpd26OBB6tetha9nNTq2a8Pdu3ctfpYnOXnyBLX8vOKO4o4FmDRhHIcOHqBBvVrU8vOiXi0/AvcEpMq+0eV/zNeAfpQsXgRvD4u/jyniwYMH1Knph59Xdbyqu/P1sC+s7sPoZ7C2JnbQpUs0a9IQz6qV8a5ehUkTxgHw8Yfv4VGlEn5e1enaqQN37txJs6/kEEwDfJYcmZkMC5LR0dEMfeNVlixbxf5DR1k4by7Hjh616N6qjdvT5aufnkr3bduHfhP/pt/Evynn+wIAMdFRLBvzHi++OowBU5bTY+QsbGxNFegd86eSp8DzvPLTGgZOWUHJKn7J+u7Vuy9/L3+8dv6/wQP5evgI9uw/RJt27fjh++8seo6EqFjRlR0B+9gRsI+tO/eQK3duWrdpx2cff8BHn3zGjoB9fPL5l3z2cer02Iwu/2O++vRlyfLVVrGVEA4ODqxet4GAfQfZHXiAtWtWs3vXLqv6MPIZ0vJ/IDFs7ewYMXoM+w8fZdO2nUybMpljR4/SsFETAg8cJmDfQSpUqMCYUSOs9BRJICk4MjFZUne7ZBVfcuZ7zqK85/Ztp0hpV4qWdQMgV/6
|
||
|
"text/plain": [
|
||
|
"<Figure size 432x288 with 2 Axes>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {
|
||
|
"needs_background": "light"
|
||
|
},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"import itertools\n",
|
||
|
"\n",
|
||
|
"plt.title(\"confusion matrix\")\n",
|
||
|
"plt.imshow(confusion,cmap='Blues')\n",
|
||
|
"\n",
|
||
|
"plt.colorbar()\n",
|
||
|
"for i,j in itertools.product(range(confusion.shape[0]),range(confusion.shape[1])):\n",
|
||
|
" plt.text(j,i,\"{:}\".format(confusion[i,j]),horizontalalignment=\"center\",color=\"black\" if i == j else \"black\")\n",
|
||
|
"plt.show()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 37,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"def getF1Score(confusion,c):\n",
|
||
|
" TP = confusion[c,c]\n",
|
||
|
" FP = confusion[c].sum() - TP\n",
|
||
|
" FN = confusion[:,c].sum() - TP\n",
|
||
|
" precision = TP / (TP + FP)\n",
|
||
|
" recall = TP / (TP + FN)\n",
|
||
|
"\n",
|
||
|
" f1Score = (2*precision*recall)/(precision + recall)\n",
|
||
|
" return f1Score"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 39,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"class 0 f1 score : nan\n",
|
||
|
"class 1 f1 score : 0.9293768405914307\n",
|
||
|
"class 2 f1 score : 0.8267770409584045\n",
|
||
|
"class 3 f1 score : 0.9029303789138794\n",
|
||
|
"class 4 f1 score : 0.9614078402519226\n",
|
||
|
"class 5 f1 score : 0.9060906171798706\n",
|
||
|
"class 6 f1 score : 0.6701149344444275\n",
|
||
|
"class 7 f1 score : 0.9169055223464966\n",
|
||
|
"class 8 f1 score : 0.9731689691543579\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"for i in range(tagIdConverter.size - 1):\n",
|
||
|
" f1 = getF1Score(confusion,i)\n",
|
||
|
" print(f\"class {i} f1 score : {f1}\")"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 40,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"import collections"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 41,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"20744it [00:00, 170034.81it/s]"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"token \t count frequency%\n",
|
||
|
"O(9) \t 314867 77.841%\n",
|
||
|
"I-PER(8) \t 21118 5.221%\n",
|
||
|
"I-ORG(7) \t 16329 4.037%\n",
|
||
|
"I-LOC(5) \t 10922 2.700%\n",
|
||
|
"B-LOC(1) \t 10645 2.632%\n",
|
||
|
"B-PER(4) \t 10059 2.487%\n",
|
||
|
"B-ORG(3) \t 9322 2.305%\n",
|
||
|
"I-MISC(6) \t 6176 1.527%\n",
|
||
|
"B-MISC(2) \t 5062 1.251%\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"counter = collections.Counter()\n",
|
||
|
"total_l = 0\n",
|
||
|
"\n",
|
||
|
"for item in tqdm(itertools.chain(datasetTrain,datasetDev,datasetTest)):\n",
|
||
|
" entities = item[\"entity\"]\n",
|
||
|
" for entity in entities:\n",
|
||
|
" counter[entity] += 1\n",
|
||
|
" total_l += len(entities)\n",
|
||
|
"print(f\"{'token':<12}\\t{'count':>12} {'frequency%':>12}\")\n",
|
||
|
"for token,count in counter.most_common():\n",
|
||
|
" tid = tagIdConverter.convert_tokens_to_ids([token])[0]\n",
|
||
|
" print(f\"{f'{token}({tid})':<12}\\t{count:>12}{count*100/total_l:>12.3f}%\")"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": []
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 3 (ipykernel)",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
|
"version": "3.7.11"
|
||
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 2
|
||
|
}
|