This commit is contained in:
语帆 2024-02-22 16:08:51 +08:00
parent 7f0a06946f
commit 733073d269

View File

@ -153,15 +153,18 @@ class AutoModel:
# build tokenizer # build tokenizer
tokenizer = kwargs.get("tokenizer", None) tokenizer = kwargs.get("tokenizer", None)
pdb.set_trace()
if tokenizer is not None: if tokenizer is not None:
tokenizer_class = tables.tokenizer_classes.get(tokenizer) tokenizer_class = tables.tokenizer_classes.get(tokenizer)
pdb.set_trace()
tokenizer = tokenizer_class(**kwargs["tokenizer_conf"]) tokenizer = tokenizer_class(**kwargs["tokenizer_conf"])
pdb.set_trace()
kwargs["tokenizer"] = tokenizer kwargs["tokenizer"] = tokenizer
kwargs["token_list"] = tokenizer.token_list kwargs["token_list"] = tokenizer.token_list
vocab_size = len(tokenizer.token_list) vocab_size = len(tokenizer.token_list)
else: else:
vocab_size = -1 vocab_size = -1
pdb.set_trace()
# build frontend # build frontend
frontend = kwargs.get("frontend", None) frontend = kwargs.get("frontend", None)
if frontend is not None: if frontend is not None: