ModelInputΒΆ

class pytext.models.seq_models.seq2seq_model.ModelInput

Bases: ModelInput

All Attributes (including base classes)

src_seq_tokens: TokenTensorizer.Config = TokenTensorizer.Config()
trg_seq_tokens: TokenTensorizer.Config = TokenTensorizer.Config()
dict_feat: Optional[GazetteerTensorizer.Config] = None
contextual_token_embedding: Optional[ByteTokenTensorizer.Config] = None

Default JSON

{
    "src_seq_tokens": {
        "is_input": true,
        "column": "text",
        "tokenizer": {
            "Tokenizer": {
                "split_regex": "\\s+",
                "lowercase": true,
                "use_byte_offsets": false
            }
        },
        "add_bos_token": false,
        "add_eos_token": false,
        "use_eos_token_for_bos": false,
        "max_seq_len": null,
        "vocab": {
            "build_from_data": true,
            "size_from_data": 0,
            "min_counts": 0,
            "vocab_files": []
        },
        "vocab_file_delimiter": " "
    },
    "trg_seq_tokens": {
        "is_input": true,
        "column": "text",
        "tokenizer": {
            "Tokenizer": {
                "split_regex": "\\s+",
                "lowercase": true,
                "use_byte_offsets": false
            }
        },
        "add_bos_token": false,
        "add_eos_token": false,
        "use_eos_token_for_bos": false,
        "max_seq_len": null,
        "vocab": {
            "build_from_data": true,
            "size_from_data": 0,
            "min_counts": 0,
            "vocab_files": []
        },
        "vocab_file_delimiter": " "
    },
    "dict_feat": null,
    "contextual_token_embedding": null
}