Source code for pytext.models.representations.huggingface_electra_sentence_encoder

#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved

import os
from typing import List, Tuple

import torch
from pytext.config import ConfigBase
from pytext.models.representations.transformer_sentence_encoder_base import (
    TransformerSentenceEncoderBase,
)
from pytext.utils.file_io import PathManager
from pytext.utils.usage import log_class_usage
from transformers.modeling_electra import ElectraConfig, ElectraModel


[docs]class HuggingFaceElectraSentenceEncoder(TransformerSentenceEncoderBase): """ Generate sentence representation using the open source HuggingFace Electra model. This class implements loading the model weights from a pre-trained model file. """
[docs] class Config(TransformerSentenceEncoderBase.Config, ConfigBase): electra_cpt_dir: str = ( "/mnt/vol/nlp_technologies/electra/electra-base-discriminator" ) load_weights: bool = True
def __init__( self, config: Config, output_encoded_layers: bool, *args, **kwargs ) -> None: super().__init__(config, output_encoded_layers=output_encoded_layers) # Load config config_file = os.path.join(config.electra_cpt_dir, "config.json") electra_config = ElectraConfig.from_json_file(config_file) print("Electra model config {}".format(electra_config)) # Instantiate model. model = ElectraModel(electra_config) weights_path = os.path.join(config.electra_cpt_dir, "pytorch_model.bin") # load pre-trained weights if weights_path exists if config.load_weights and PathManager.isfile(weights_path): state_dict = torch.load(weights_path) missing_keys: List[str] = [] unexpected_keys: List[str] = [] error_msgs: List[str] = [] # copy state_dict so _load_from_state_dict can modify it metadata = getattr(state_dict, "_metadata", None) state_dict = state_dict.copy() if metadata is not None: state_dict._metadata = metadata def load(module, prefix=""): local_metadata = ( {} if metadata is None else metadata.get(prefix[:-1], {}) ) module._load_from_state_dict( state_dict, prefix, local_metadata, True, missing_keys, unexpected_keys, error_msgs, ) for name, child in module._modules.items(): if child is not None: load(child, prefix + name + ".") load(model, prefix="" if hasattr(model, "electra") else "electra.") if len(missing_keys) > 0: print( "Weights of {} not initialized from pretrained model: {}".format( model.__class__.__name__, missing_keys ) ) if len(unexpected_keys) > 0: print( "Weights from pretrained model not used in {}: {}".format( model.__class__.__name__, unexpected_keys ) ) self.electra = model log_class_usage(__class__) def _encoder(self, input_tuple: Tuple[torch.Tensor, ...]): tokens, pad_mask, segment_labels, _ = input_tuple last_encoder_layer, encoded_layers = self.electra( tokens, attention_mask=pad_mask, token_type_ids=segment_labels, output_hidden_states=True, ) pooled_output = last_encoder_layer[:, 0] return encoded_layers, pooled_output def _embedding(self): # used to tie weights in MaskedLM model return self.electra.embeddings.word_embeddings