RoBERTaEncoder.ConfigΒΆ

Component: RoBERTaEncoder

class RoBERTaEncoder.Config[source]

Bases: RoBERTaEncoderBase.Config

All Attributes (including base classes)

load_path: Optional[str] = None
save_path: Optional[str] = None
freeze: bool = False
shared_module_key: Optional[str] = None
output_dropout: float = 0.4
embedding_dim: int = 768
pooling: PoolingMethod = <PoolingMethod.CLS_TOKEN: 'cls_token'>
export: bool = False
projection_dim: int = 0
normalize_output_rep: bool = False
vocab_size: int = 50265
num_encoder_layers: int = 12
num_attention_heads: int = 12
model_path: str = 'manifold://pytext_training/tree/static/models/roberta_base_torch.pt'
is_finetuned: bool = False
max_seq_len: int = 514
use_bias_finetuning: bool = False
use_linformer_encoder: bool = False
linformer_compressed_ratio: int = 4
linformer_quantize: bool = False
export_encoder: bool = False
variable_size_embedding: bool = True
use_selfie_encoder: bool = False
transformer_layer_to_keep: Optional[int] = None
attention_heads_to_keep_per_layer: Optional[int] = None

Default JSON

{
    "load_path": null,
    "save_path": null,
    "freeze": false,
    "shared_module_key": null,
    "output_dropout": 0.4,
    "embedding_dim": 768,
    "pooling": "cls_token",
    "export": false,
    "projection_dim": 0,
    "normalize_output_rep": false,
    "vocab_size": 50265,
    "num_encoder_layers": 12,
    "num_attention_heads": 12,
    "model_path": "manifold://pytext_training/tree/static/models/roberta_base_torch.pt",
    "is_finetuned": false,
    "max_seq_len": 514,
    "use_bias_finetuning": false,
    "use_linformer_encoder": false,
    "linformer_compressed_ratio": 4,
    "linformer_quantize": false,
    "export_encoder": false,
    "variable_size_embedding": true,
    "use_selfie_encoder": false,
    "transformer_layer_to_keep": null,
    "attention_heads_to_keep_per_layer": null
}