SparseTransformerSentenceEncoder.ConfigΒΆ

Component: SparseTransformerSentenceEncoder

class SparseTransformerSentenceEncoder.Config[source]

Bases: TransformerSentenceEncoder.Config, ConfigBase

All Attributes (including base classes)

load_path: Optional[str] = None
save_path: Optional[str] = None
freeze: bool = False
shared_module_key: Optional[str] = None
output_dropout: float = 0.4
embedding_dim: int = 768
pooling: PoolingMethod = <PoolingMethod.CLS_TOKEN: 'cls_token'>
export: bool = False
projection_dim: int = 0
normalize_output_rep: bool = False
dropout: float = 0.1
attention_dropout: float = 0.1
activation_dropout: float = 0.1
ffn_embedding_dim: int = 3072
num_encoder_layers: int = 6
num_attention_heads: int = 8
num_segments: int = 2
use_position_embeddings: bool = True
offset_positions_by_padding: bool = True
apply_bert_init: bool = True
encoder_normalize_before: bool = True
activation_fn: str = 'relu'
max_seq_len: int = 128
multilingual: bool = False
freeze_embeddings: bool = False
n_trans_layers_to_freeze: int = 0
use_torchscript: bool = False
use_bias_finetuning: bool = False
project_representation: bool = False
is_bidirectional: bool = True
stride: int = 32
expressivity: int = 8

Default JSON

{
    "load_path": null,
    "save_path": null,
    "freeze": false,
    "shared_module_key": null,
    "output_dropout": 0.4,
    "embedding_dim": 768,
    "pooling": "cls_token",
    "export": false,
    "projection_dim": 0,
    "normalize_output_rep": false,
    "dropout": 0.1,
    "attention_dropout": 0.1,
    "activation_dropout": 0.1,
    "ffn_embedding_dim": 3072,
    "num_encoder_layers": 6,
    "num_attention_heads": 8,
    "num_segments": 2,
    "use_position_embeddings": true,
    "offset_positions_by_padding": true,
    "apply_bert_init": true,
    "encoder_normalize_before": true,
    "activation_fn": "relu",
    "max_seq_len": 128,
    "multilingual": false,
    "freeze_embeddings": false,
    "n_trans_layers_to_freeze": 0,
    "use_torchscript": false,
    "use_bias_finetuning": false,
    "project_representation": false,
    "is_bidirectional": true,
    "stride": 32,
    "expressivity": 8
}