feat: add eng_tag
This commit is contained in:
parent
883f39d645
commit
609174b089
42
eng_tags.json
Normal file
42
eng_tags.json
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"name": "[PAD]",
|
||||||
|
"index": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "B-LOC",
|
||||||
|
"index": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "B-MISC",
|
||||||
|
"index": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "B-ORG",
|
||||||
|
"index": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "B-PER",
|
||||||
|
"index": 4
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "I-LOC",
|
||||||
|
"index": 5
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "I-MISC",
|
||||||
|
"index": 6
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "I-ORG",
|
||||||
|
"index": 7
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "I-PER",
|
||||||
|
"index": 8
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "O",
|
||||||
|
"index": 9
|
||||||
|
}
|
||||||
|
]
|
@ -77,9 +77,7 @@ def readKoreanDataAll():
|
|||||||
|
|
||||||
def readEnglishDataAll():
|
def readEnglishDataAll():
|
||||||
with open(f"{EnglishBase}/valid.txt", encoding="utf-8") as fp:
|
with open(f"{EnglishBase}/valid.txt", encoding="utf-8") as fp:
|
||||||
print("a")
|
|
||||||
dev = readEnglishData(fp)
|
dev = readEnglishData(fp)
|
||||||
print("b")
|
|
||||||
with open(f"{EnglishBase}/test.txt", encoding="utf-8") as fp:
|
with open(f"{EnglishBase}/test.txt", encoding="utf-8") as fp:
|
||||||
test = readEnglishData(fp)
|
test = readEnglishData(fp)
|
||||||
with open(f"{EnglishBase}/train.txt", encoding="utf-8") as fp:
|
with open(f"{EnglishBase}/train.txt", encoding="utf-8") as fp:
|
||||||
@ -173,8 +171,6 @@ extracts and stores tags set from the given data.
|
|||||||
"""
|
"""
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
t = TagIdConverter()
|
|
||||||
|
|
||||||
train, dev, test = readEnglishDataAll()
|
train, dev, test = readEnglishDataAll()
|
||||||
vocab = set()
|
vocab = set()
|
||||||
def getTags(lst: List[Sentence]):
|
def getTags(lst: List[Sentence]):
|
||||||
@ -205,5 +201,5 @@ if __name__ == "__main__":
|
|||||||
tags.append({"name":v,"index":i})
|
tags.append({"name":v,"index":i})
|
||||||
i += 1
|
i += 1
|
||||||
print(tags)
|
print(tags)
|
||||||
#with open("tags.json","w",encoding="utf-8") as fp:
|
with open("eng_tags.json","w",encoding="utf-8") as fp:
|
||||||
# json.dump(tags,fp,ensure_ascii=False, indent=2)
|
json.dump(tags,fp,ensure_ascii=False, indent=2)
|
Loading…
Reference in New Issue
Block a user