feat: add eng_tag
This commit is contained in:
parent
883f39d645
commit
609174b089
42
eng_tags.json
Normal file
42
eng_tags.json
Normal file
@ -0,0 +1,42 @@
|
||||
[
|
||||
{
|
||||
"name": "[PAD]",
|
||||
"index": 0
|
||||
},
|
||||
{
|
||||
"name": "B-LOC",
|
||||
"index": 1
|
||||
},
|
||||
{
|
||||
"name": "B-MISC",
|
||||
"index": 2
|
||||
},
|
||||
{
|
||||
"name": "B-ORG",
|
||||
"index": 3
|
||||
},
|
||||
{
|
||||
"name": "B-PER",
|
||||
"index": 4
|
||||
},
|
||||
{
|
||||
"name": "I-LOC",
|
||||
"index": 5
|
||||
},
|
||||
{
|
||||
"name": "I-MISC",
|
||||
"index": 6
|
||||
},
|
||||
{
|
||||
"name": "I-ORG",
|
||||
"index": 7
|
||||
},
|
||||
{
|
||||
"name": "I-PER",
|
||||
"index": 8
|
||||
},
|
||||
{
|
||||
"name": "O",
|
||||
"index": 9
|
||||
}
|
||||
]
|
@ -77,9 +77,7 @@ def readKoreanDataAll():
|
||||
|
||||
def readEnglishDataAll():
|
||||
with open(f"{EnglishBase}/valid.txt", encoding="utf-8") as fp:
|
||||
print("a")
|
||||
dev = readEnglishData(fp)
|
||||
print("b")
|
||||
with open(f"{EnglishBase}/test.txt", encoding="utf-8") as fp:
|
||||
test = readEnglishData(fp)
|
||||
with open(f"{EnglishBase}/train.txt", encoding="utf-8") as fp:
|
||||
@ -173,8 +171,6 @@ extracts and stores tags set from the given data.
|
||||
"""
|
||||
if __name__ == "__main__":
|
||||
from tqdm import tqdm
|
||||
t = TagIdConverter()
|
||||
|
||||
train, dev, test = readEnglishDataAll()
|
||||
vocab = set()
|
||||
def getTags(lst: List[Sentence]):
|
||||
@ -205,5 +201,5 @@ if __name__ == "__main__":
|
||||
tags.append({"name":v,"index":i})
|
||||
i += 1
|
||||
print(tags)
|
||||
#with open("tags.json","w",encoding="utf-8") as fp:
|
||||
# json.dump(tags,fp,ensure_ascii=False, indent=2)
|
||||
with open("eng_tags.json","w",encoding="utf-8") as fp:
|
||||
json.dump(tags,fp,ensure_ascii=False, indent=2)
|
Loading…
Reference in New Issue
Block a user