Source code for pytext.models.r3f_models

#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved

from contextlib import AbstractContextManager
from enum import Enum
from typing import Dict

import torch
import torch.nn.functional as F
from pytext.common.constants import Stage
from pytext.config import ConfigBase
from pytext.utils.precision import maybe_float


[docs]class R3FNoiseType(Enum):
    UNIFORM = "uniform"
    NORMAL = "normal"


[docs]def build_noise_sampler(noise_type: R3FNoiseType, eps: float):
    """
    Given a `noise_type` (`R3FNoiseType`): builds a `torch.distribution`
    capable of generating noise within the passed in `eps` (`float`).
    """
    if noise_type == R3FNoiseType.UNIFORM:
        return torch.distributions.uniform.Uniform(low=-eps, high=eps)
    elif noise_type == R3FNoiseType.NORMAL:
        return torch.distributions.normal.Normal(loc=0.0, scale=eps)
    else:
        raise Exception(f"Unknown noise type: {noise_type}")


[docs]def compute_symmetric_kl(noised_logits, input_logits):
    """
    Computes symmetric KL loss by taking the KL for both the input logits
    and the noised logits and comparing the two
    """
    return F.kl_div(
        F.log_softmax(noised_logits, dim=-1, dtype=torch.float32),
        F.softmax(input_logits, dim=-1, dtype=torch.float32),
        None,
        None,
        "sum",
    ) + F.kl_div(
        F.log_softmax(input_logits, dim=-1, dtype=torch.float32),
        F.softmax(noised_logits, dim=-1, dtype=torch.float32),
        None,
        None,
        "sum",
    )  # / noised_logits.size(0)


[docs]class R3FConfigOptions(ConfigBase):
    """
    Configuration options for models using R3F
    """

    # for MTL purposes different lambda per loss
    r3f_lambda_by_loss: Dict[str, float] = {}
    r3f_default_lambda: float = 0.5
    eps: float = 1e-5
    noise_type: R3FNoiseType = R3FNoiseType.UNIFORM


[docs]class R3FNoiseContextManager(AbstractContextManager):
    """
    Context manager that adds a forward hook to the embedding module,
    to insert noise into the model and detatch embedding when doing
    this pass
    """

    def __init__(self, context):
        self.encoder_hook = None
        self.decoder_hook = None
        self.context = context
        self.hook = self.context.get_embedding_module().register_forward_hook(
            self._hook_implementation
        )

    def __enter__(self):
        return self.context

    def __exit__(self, type, value, traceback):
        self.hook.remove()
        self.hook = None

    def _hook_implementation(self, module, input, output):
        noise = self.context.noise_sampler.sample(sample_shape=output.shape).to(output)
        return output.clone().detach() + noise


[docs]class R3FPyTextMixin(object):
    """
    Mixin class for applying the R3F method, to apply R3F with any model
    inherit the class and implement the abstract functions.

    For more details: https://arxiv.org/abs/2008.03156
    """

    def __init__(self, config: R3FConfigOptions):
        self.r3f_lambda_by_loss = config.r3f_lambda_by_loss
        self.r3f_default_lambda = config.r3f_default_lambda
        self.r3f_eps = config.eps
        self.noise_sampler = build_noise_sampler(config.noise_type, self.r3f_eps)

[docs]    def get_embedding_module(self, *args, **kwargs):
        """
        Given the core model outputs, this returns the embedding module that is used
        for the R3F loss, in particular noise will be injected to this module.
        """
        raise NotImplementedError()

[docs]    def forward_with_noise(self, *args, **kwargs):
        with R3FNoiseContextManager(self):
            return self.original_forward(*args, **kwargs)

[docs]    def original_forward(self, *args, **kwargs):
        """
        Runs the traditional forward of this model
        """
        raise NotImplementedError()

[docs]    def get_sample_size(self, model_inputs, targets):
        """
        Gets the sample size of the model that is used as a regularization
        factor to the model itself
        """
        raise NotImplementedError()

[docs]    def get_r3f_model_output(self, model_output):
        """
        Extracts the output from the model.forward() call that is used for the
        r3f loss term
        """
        return model_output

[docs]    def forward(self, *args, use_r3f: bool = False, **kwargs):
        if use_r3f:
            # forward with the normal model
            model_output = self.original_forward(
                *args,
                **kwargs,
            )

            # compute noised model outputs
            noise_model_outputs = self.forward_with_noise(
                *args,
                **kwargs,
            )

            return model_output, noise_model_outputs
        else:
            return self.original_forward(*args, **kwargs)

[docs]    def get_r3f_loss_terms(
        self, model_outputs, noise_model_outputs, sample_size: int
    ) -> torch.Tensor:
        """
        Computes the auxillary loss for R3F, in particular computes a symmetric
        KL divergence between the result from the input embedding and the noise
        input embedding.
        """

        label_symm_kl = compute_symmetric_kl(
            self.get_r3f_model_output(noise_model_outputs),
            self.get_r3f_model_output(model_outputs),
        )

        label_symm_kl = label_symm_kl  # * sample_size

        return (
            self.r3f_lambda_by_loss.get("label", self.r3f_default_lambda)
            * label_symm_kl
        )

[docs]    @classmethod
    def train_batch(cls, model, batch, state=None):
        """
        Runs training over a batch with the R3F method, training will use R3F
        while eval and test do not.
        """

        # Forward pass through the network.
        model_inputs = model.arrange_model_inputs(batch)
        model_context = model.arrange_model_context(batch)
        targets = model.arrange_targets(batch)

        sample_size = model.get_sample_size(model_inputs=model_inputs, targets=targets)

        # get embedding
        r3f_loss_term = torch.tensor(0)
        if state and state.stage == Stage.TRAIN:
            # during training run R3F forward calls
            model_outputs, noise_model_outputs = model(*model_inputs, use_r3f=True)

            r3f_loss_term = model.get_r3f_loss_terms(
                model_outputs, noise_model_outputs, sample_size=sample_size
            )
        else:
            # during eval and test don't run R3F forward
            model_outputs = model(*model_inputs, use_r3f=False)

        # Add stage to context.
        if state:
            if model_context is None:
                model_context = {"stage": state.stage, "epoch": state.epoch}
            else:
                model_context["stage"] = state.stage
                model_context["epoch"] = state.epoch

        # Compute loss and predictions.
        loss = maybe_float(model.get_loss(model_outputs, targets, model_context))

        # add R3F loss term
        loss = loss + r3f_loss_term.to(loss.device)

        predictions, scores = model.get_pred(model_outputs, context=model_context)

        # Pack results and return them.
        metric_data = (predictions, targets, scores, loss, model_inputs)
        return loss, metric_data