feat: count tag frequency
This commit is contained in:
parent
d2256b0ee9
commit
e080077a53
22
count_o.py
22
count_o.py
@ -1,22 +0,0 @@
|
||||
from dataset import readPreporcssedDataAll
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
"""
|
||||
count outside tokens(O tokens)
|
||||
"""
|
||||
if __name__ == "__main__":
|
||||
|
||||
train, _, _ = readPreporcssedDataAll()
|
||||
|
||||
total_l = 0
|
||||
total_o = 0
|
||||
|
||||
for item in tqdm(train):
|
||||
entities = item["entity"]
|
||||
l = len(entities)
|
||||
o = sum(map(lambda x: 1 if x == "O" else 0,entities))
|
||||
total_l += l
|
||||
total_o += o
|
||||
|
||||
print(f"{total_o}/{total_l} = {total_o/total_l}")
|
23
count_tag_freq.py
Normal file
23
count_tag_freq.py
Normal file
@ -0,0 +1,23 @@
|
||||
from dataset import readPreporcssedDataAll
|
||||
from read_data import TagIdConverter
|
||||
from tqdm import tqdm
|
||||
from collections import Counter
|
||||
|
||||
"""
|
||||
get frequency of tokens
|
||||
"""
|
||||
if __name__ == "__main__":
|
||||
train, _, _ = readPreporcssedDataAll()
|
||||
tagIdConverter = TagIdConverter()
|
||||
counter = Counter()
|
||||
total_l = 0
|
||||
|
||||
for item in tqdm(train):
|
||||
entities = item["entity"]
|
||||
for entity in entities:
|
||||
counter[entity] += 1
|
||||
total_l += len(entities)
|
||||
print(f"{'token':<12}\t{'count':>12} {'frequency%':>12}")
|
||||
for token,count in counter.most_common():
|
||||
tid = tagIdConverter.convert_tokens_to_ids([token])[0]
|
||||
print(f"{f'{token}({tid})':<12}\t{count:>12}{count*100/total_l:>12.3f}%")
|
Loading…
Reference in New Issue
Block a user