Source code for pytext.optimizer.activations

#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved

import math

import torch
import torch.nn as nn
from pytext.config.module_config import Activation


[docs]class GeLU(nn.Module): """ Implements Gaussian Error Linear Units (GELUs). Reference: Gaussian Error Linear Units (GELUs). Dan Hendrycks, Kevin Gimpel. Technical Report, 2017. https://arxiv.org/pdf/1606.08415.pdf """
[docs] def forward(self, x): if torch.onnx.is_in_onnx_export(): # ONNX -> Caffe2 conversion will create an intermediate blob for # each intermediate math output, which is very memory inefficient. # We use the Gelu operator directly to reduce the memory footprint # in the exported model. return torch.ops._caffe2.Gelu(x, True) else: return ( 0.5 * x * ( # Note: x * x * x is used instead of torch.pow(x, 3) due to # issues with ONNX compatibility: # https://github.com/pytorch/pytorch/issues/18475 1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * (x * x * x))) ) )
[docs]def get_activation(name): if name == Activation.RELU: return nn.ReLU() elif name == Activation.LEAKYRELU: return nn.LeakyReLU() elif name == Activation.TANH: return nn.Tanh() elif name == Activation.GELU: return GeLU() elif name == Activation.GLU: return nn.GLU(dim=1) else: raise RuntimeError(f"{name} is not supported")