BertPairwiseModelInputΒΆ
-
class
pytext.models.bert_classification_models.
BertPairwiseModelInput
Bases:
EncoderPairwiseModelInput
All Attributes (including base classes)
- tokens1: BERTTensorizerBase.Config = BERTTensorizer.Config(columns=
['text1']
, max_seq_len=128
)- tokens2: BERTTensorizerBase.Config = BERTTensorizer.Config(columns=
['text2']
, max_seq_len=128
)- labels: LabelTensorizer.Config = LabelTensorizer.Config()
- num_tokens: NtokensTensorizer.Config = NtokensTensorizer.Config(names=
['tokens1', 'tokens2']
, indexes=[2, 2]
)
Default JSON
{
"tokens1": {
"BERTTensorizer": {
"is_input": true,
"columns": [
"text1"
],
"tokenizer": {
"WordPieceTokenizer": {
"basic_tokenizer": {
"split_regex": "\\s+",
"lowercase": true,
"use_byte_offsets": false
},
"wordpiece_vocab_path": "manifold://nlp_technologies/tree/huggingface-models/bert-base-uncased/vocab.txt"
}
},
"base_tokenizer": null,
"vocab_file": "manifold://nlp_technologies/tree/huggingface-models/bert-base-uncased/vocab.txt",
"max_seq_len": 128
}
},
"tokens2": {
"BERTTensorizer": {
"is_input": true,
"columns": [
"text2"
],
"tokenizer": {
"WordPieceTokenizer": {
"basic_tokenizer": {
"split_regex": "\\s+",
"lowercase": true,
"use_byte_offsets": false
},
"wordpiece_vocab_path": "manifold://nlp_technologies/tree/huggingface-models/bert-base-uncased/vocab.txt"
}
},
"base_tokenizer": null,
"vocab_file": "manifold://nlp_technologies/tree/huggingface-models/bert-base-uncased/vocab.txt",
"max_seq_len": 128
}
},
"labels": {
"LabelTensorizer": {
"is_input": false,
"column": "label",
"allow_unknown": false,
"pad_in_vocab": false,
"label_vocab": null,
"label_vocab_file": null,
"add_labels": null
}
},
"num_tokens": {
"is_input": false,
"names": [
"tokens1",
"tokens2"
],
"indexes": [
2,
2
]
}
}