ModelInputΒΆ

class pytext.models.joint_model.ModelInput

Bases: ModelInput

All Attributes (including base classes)

tokens: TokenTensorizer.Config = TokenTensorizer.Config()
word_labels: SlotLabelTensorizer.Config = SlotLabelTensorizer.Config(allow_unknown=True)
doc_labels: LabelTensorizer.Config = LabelTensorizer.Config(allow_unknown=True)
doc_weight: Optional[FloatTensorizer.Config] = None
word_weight: Optional[FloatTensorizer.Config] = None

Default JSON

{
    "tokens": {
        "is_input": true,
        "column": "text",
        "tokenizer": {
            "Tokenizer": {
                "split_regex": "\\s+",
                "lowercase": true,
                "use_byte_offsets": false
            }
        },
        "add_bos_token": false,
        "add_eos_token": false,
        "use_eos_token_for_bos": false,
        "max_seq_len": null,
        "vocab": {
            "build_from_data": true,
            "size_from_data": 0,
            "min_counts": 0,
            "vocab_files": []
        },
        "vocab_file_delimiter": " "
    },
    "word_labels": {
        "is_input": false,
        "slot_column": "slots",
        "text_column": "text",
        "tokenizer": {
            "Tokenizer": {
                "split_regex": "\\s+",
                "lowercase": true,
                "use_byte_offsets": false
            }
        },
        "allow_unknown": true
    },
    "doc_labels": {
        "LabelTensorizer": {
            "is_input": false,
            "column": "label",
            "allow_unknown": true,
            "pad_in_vocab": false,
            "label_vocab": null,
            "label_vocab_file": null,
            "add_labels": null
        }
    },
    "doc_weight": null,
    "word_weight": null
}