SquadTensorizerForKD.ConfigΒΆ
Component: SquadTensorizerForKD
-
class
SquadTensorizerForKD.Config[source] Bases:
SquadTensorizer.Config
All Attributes (including base classes)
- is_input: bool =
True- column: str =
'text'- tokenizer: Tokenizer.Config = Tokenizer.Config(split_regex=
'\\W+')- add_bos_token: bool =
False- add_eos_token: bool =
False- use_eos_token_for_bos: bool =
False- max_seq_len: Optional[int] =
None- vocab: VocabConfig = VocabConfig()
- vocab_file_delimiter: str =
' '- doc_column: str =
'doc'- ques_column: str =
'question'- answers_column: str =
'answers'- answer_starts_column: str =
'answer_starts'- max_ques_seq_len: int =
64- max_doc_seq_len: int =
256- start_logits_column: str =
'start_logits'- end_logits_column: str =
'end_logits'- has_answer_logits_column: str =
'has_answer_logits'- pad_mask_column: str =
'pad_mask'- segment_labels_column: str =
'segment_labels'
Default JSON
{
"is_input": true,
"column": "text",
"tokenizer": {
"Tokenizer": {
"split_regex": "\\W+",
"lowercase": true,
"use_byte_offsets": false
}
},
"add_bos_token": false,
"add_eos_token": false,
"use_eos_token_for_bos": false,
"max_seq_len": null,
"vocab": {
"build_from_data": true,
"size_from_data": 0,
"min_counts": 0,
"vocab_files": []
},
"vocab_file_delimiter": " ",
"doc_column": "doc",
"ques_column": "question",
"answers_column": "answers",
"answer_starts_column": "answer_starts",
"max_ques_seq_len": 64,
"max_doc_seq_len": 256,
"start_logits_column": "start_logits",
"end_logits_column": "end_logits",
"has_answer_logits_column": "has_answer_logits",
"pad_mask_column": "pad_mask",
"segment_labels_column": "segment_labels"
}