BertPairwiseModelInputΒΆ

class pytext.models.bert_classification_models.BertPairwiseModelInput

Bases: EncoderPairwiseModelInput

All Attributes (including base classes)

tokens1: BERTTensorizerBase.Config = BERTTensorizer.Config(columns=['text1'], max_seq_len=128)
tokens2: BERTTensorizerBase.Config = BERTTensorizer.Config(columns=['text2'], max_seq_len=128)
labels: LabelTensorizer.Config = LabelTensorizer.Config()
num_tokens: NtokensTensorizer.Config = NtokensTensorizer.Config(names=['tokens1', 'tokens2'], indexes=[2, 2])

Default JSON

{
    "tokens1": {
        "BERTTensorizer": {
            "is_input": true,
            "columns": [
                "text1"
            ],
            "tokenizer": {
                "WordPieceTokenizer": {
                    "basic_tokenizer": {
                        "split_regex": "\\s+",
                        "lowercase": true,
                        "use_byte_offsets": false
                    },
                    "wordpiece_vocab_path": "manifold://nlp_technologies/tree/huggingface-models/bert-base-uncased/vocab.txt"
                }
            },
            "base_tokenizer": null,
            "vocab_file": "manifold://nlp_technologies/tree/huggingface-models/bert-base-uncased/vocab.txt",
            "max_seq_len": 128
        }
    },
    "tokens2": {
        "BERTTensorizer": {
            "is_input": true,
            "columns": [
                "text2"
            ],
            "tokenizer": {
                "WordPieceTokenizer": {
                    "basic_tokenizer": {
                        "split_regex": "\\s+",
                        "lowercase": true,
                        "use_byte_offsets": false
                    },
                    "wordpiece_vocab_path": "manifold://nlp_technologies/tree/huggingface-models/bert-base-uncased/vocab.txt"
                }
            },
            "base_tokenizer": null,
            "vocab_file": "manifold://nlp_technologies/tree/huggingface-models/bert-base-uncased/vocab.txt",
            "max_seq_len": 128
        }
    },
    "labels": {
        "LabelTensorizer": {
            "is_input": false,
            "column": "label",
            "allow_unknown": false,
            "pad_in_vocab": false,
            "label_vocab": null,
            "label_vocab_file": null,
            "add_labels": null
        }
    },
    "num_tokens": {
        "is_input": false,
        "names": [
            "tokens1",
            "tokens2"
        ],
        "indexes": [
            2,
            2
        ]
    }
}