ByteModelInputΒΆ
-
class
pytext.models.doc_model.
ByteModelInput
Bases:
ModelInput
All Attributes (including base classes)
- tokens: TokenTensorizer.Config = TokenTensorizer.Config()
- dense: Optional[FloatListTensorizer.Config] =
None
- labels: LabelTensorizer.Config = LabelTensorizer.Config()
- token_bytes: ByteTokenTensorizer.Config = ByteTokenTensorizer.Config()
Default JSON
{
"tokens": {
"is_input": true,
"column": "text",
"tokenizer": {
"Tokenizer": {
"split_regex": "\\s+",
"lowercase": true,
"use_byte_offsets": false
}
},
"add_bos_token": false,
"add_eos_token": false,
"use_eos_token_for_bos": false,
"max_seq_len": null,
"vocab": {
"build_from_data": true,
"size_from_data": 0,
"min_counts": 0,
"vocab_files": []
},
"vocab_file_delimiter": " "
},
"dense": null,
"labels": {
"LabelTensorizer": {
"is_input": false,
"column": "label",
"allow_unknown": false,
"pad_in_vocab": false,
"label_vocab": null,
"label_vocab_file": null,
"add_labels": null
}
},
"token_bytes": {
"is_input": true,
"column": "text",
"tokenizer": {
"Tokenizer": {
"split_regex": "\\s+",
"lowercase": true,
"use_byte_offsets": false
}
},
"max_seq_len": null,
"max_byte_len": 15,
"offset_for_non_padding": 0,
"add_bos_token": false,
"add_eos_token": false,
"use_eos_token_for_bos": false
}
}