Source code for pytext.models.joint_model

#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from typing import Optional, Union

import pytext.utils.cuda as cuda_util
import torch
from pytext.common.constants import SpecialTokens
from pytext.data.tensorizers import (
    FloatTensorizer,
    LabelTensorizer,
    SlotLabelTensorizer,
    TokenTensorizer,
)
from pytext.exporters.exporter import ModelExporter
from pytext.models.embeddings import WordEmbedding
from pytext.models.model import Model
from pytext.models.module import create_module
from pytext.models.representations.pass_through import PassThroughRepresentation
from pytext.utils.usage import log_class_usage

from .decoders import IntentSlotModelDecoder
from .embeddings import EmbeddingList
from .output_layers.intent_slot_output_layer import IntentSlotOutputLayer
from .representations.bilstm_doc_slot_attention import BiLSTMDocSlotAttention
from .representations.jointcnn_rep import (
    JointCNNRepresentation,
    SharedCNNRepresentation,
)


[docs]class IntentSlotModel(Model):
    """
    A joint intent-slot model. This is framed as a model to do document
    classification model and word tagging tasks where the embedding and text
    representation layers are shared for both tasks.

    The supported representation layers are based on bidirectional LSTM or CNN.

    It can be instantiated just like any other :class:`~Model`.

    This is in the new data handling design involving tensorizers; that is the
    difference between this and JointModel
    """

    __EXPANSIBLE__ = True

[docs]    class Config(Model.Config):
        class ModelInput(Model.Config.ModelInput):
            tokens: TokenTensorizer.Config = TokenTensorizer.Config()
            word_labels: SlotLabelTensorizer.Config = SlotLabelTensorizer.Config(
                allow_unknown=True
            )
            doc_labels: LabelTensorizer.Config = LabelTensorizer.Config(
                allow_unknown=True
            )
            doc_weight: Optional[FloatTensorizer.Config] = None
            word_weight: Optional[FloatTensorizer.Config] = None

        inputs: ModelInput = ModelInput()
        word_embedding: WordEmbedding.Config = WordEmbedding.Config()

        representation: Union[
            BiLSTMDocSlotAttention.Config,
            JointCNNRepresentation.Config,
            SharedCNNRepresentation.Config,
            PassThroughRepresentation.Config,
        ] = BiLSTMDocSlotAttention.Config()
        output_layer: IntentSlotOutputLayer.Config = IntentSlotOutputLayer.Config()
        decoder: IntentSlotModelDecoder.Config = IntentSlotModelDecoder.Config()
        default_doc_loss_weight: float = 0.2
        default_word_loss_weight: float = 0.5

    def __init__(
        self, default_doc_loss_weight, default_word_loss_weight, *args, **kwargs
    ):
        super().__init__(*args, **kwargs)
        # CRF module has parameters and it's forward function is not called in
        # model's forward function because of ONNX compatibility issue. This will
        # not work with DDP, thus setting find_unused_parameters to False to work
        # around, can be removed once DDP support params not used in model forward
        # function
        self.find_unused_parameters = False
        self.default_doc_loss_weight = default_doc_loss_weight
        self.default_word_loss_weight = default_word_loss_weight
        log_class_usage(__class__)

[docs]    @classmethod
    def create_embedding(cls, config, tensorizers):
        vocab = tensorizers["tokens"].vocab
        word_embedding = WordEmbedding(
            len(vocab),
            config.word_embedding.embed_dim,
            None,
            None,
            vocab.idx[SpecialTokens.UNK],
            [],
        )
        return EmbeddingList([word_embedding], concat=True)

[docs]    @classmethod
    def from_config(cls, config, tensorizers):
        word_labels = tensorizers["word_labels"].vocab
        doc_labels = tensorizers["doc_labels"].vocab

        embedding = cls.create_embedding(config, tensorizers)
        representation = create_module(
            config.representation, embed_dim=embedding.embedding_dim
        )

        decoder = create_module(
            config.decoder,
            in_dim_doc=representation.doc_representation_dim,
            in_dim_word=representation.word_representation_dim,
            out_dim_doc=len(doc_labels),
            out_dim_word=len(word_labels),
        )

        output_layer = create_module(
            config.output_layer, doc_labels=doc_labels, word_labels=word_labels
        )

        return cls(
            config.default_doc_loss_weight,
            config.default_word_loss_weight,
            embedding,
            representation,
            decoder,
            output_layer,
        )

[docs]    def arrange_model_inputs(self, tensor_dict):
        tokens, seq_lens, _ = tensor_dict["tokens"]
        return (tokens, seq_lens)

[docs]    def arrange_targets(self, tensor_dict):
        intent_tensor = tensor_dict["doc_labels"]
        slot_tensor = tensor_dict["word_labels"]
        return intent_tensor, slot_tensor

[docs]    def vocab_to_export(self, tensorizers):
        return {"tokens_vals": list(tensorizers["tokens"].vocab)}

[docs]    def get_export_input_names(self, tensorizers):
        return ["tokens_vals", "tokens_lens"]

[docs]    def get_export_output_names(self, tensorizers):
        return ["doc_scores", "word_scores"]

[docs]    def arrange_model_context(self, tensor_dict):
        context = self.get_weights_context(tensor_dict)
        context["seq_lens"] = tensor_dict["tokens"][1]
        return context

[docs]    def get_weights_context(self, tensor_dict):
        batch_size = tensor_dict["doc_labels"].size()[0]
        return {
            "doc_weight": tensor_dict.get(
                "doc_weight",
                cuda_util.tensor(
                    [self.default_doc_loss_weight] * batch_size, dtype=torch.float
                ),
            ),
            "word_weight": tensor_dict.get(
                "word_weight",
                cuda_util.tensor(
                    [self.default_word_loss_weight] * batch_size, dtype=torch.float
                ),
            ),
        }

[docs]    def caffe2_export(self, tensorizers, tensor_dict, path, export_onnx_path=None):
        exporter = ModelExporter(
            ModelExporter.Config(),
            self.get_export_input_names(tensorizers),
            self.arrange_model_inputs(tensor_dict),
            self.vocab_to_export(tensorizers),
            self.get_export_output_names(tensorizers),
        )
        return exporter.export_to_caffe2(self, path, export_onnx_path=export_onnx_path)