Source code for pytext.models.seq_models.contextual_intent_slot

#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from typing import Optional

from pytext.data.tensorizers import SeqTokenTensorizer
from pytext.models.embeddings import EmbeddingList, WordEmbedding
from pytext.models.joint_model import IntentSlotModel
from pytext.models.module import create_module
from pytext.models.representations.contextual_intent_slot_rep import (
    ContextualIntentSlotRepresentation,
)


[docs]class ContextualIntentSlotModel(IntentSlotModel):
    """
    Joint Model for Intent classification and slot tagging with inputs of contextual
    information (sequence of utterances) and dictionary feature of the last utterance.

    Training data should include:
    doc_label (string): intent classification label of either the sequence of \
    utterances or just the last sentence
    word_label (string): slot tagging label of the last utterance in the format\
    of start_idx:end_idx:slot_label, multiple slots are separated by a comma
    text (list of string): sequence of utterances for training
    dict_feat (dict): a dict of features that contains the feature of each word\
    in the last utterance

    Following is an example of raw columns from training data:

    ==========  =======================================================================
    doc_label   reply-where
    word_label  10:20:restaurant_name
    text        ["dinner at 6?", "wanna try Tomi Sushi?"]
    dict_feat   {"tokenFeatList": [{"tokenIdx": 2, "features": {"poi:eatery": 0.66}},
                                   {"tokenIdx": 3, "features": {"poi:eatery": 0.66}}]}
    ==========  =======================================================================

    """

[docs]    class Config(IntentSlotModel.Config):
        class ModelInput(IntentSlotModel.Config.ModelInput):

            seq_tokens: Optional[
                SeqTokenTensorizer.Config
            ] = SeqTokenTensorizer.Config()

        inputs: ModelInput = ModelInput()
        seq_embedding: Optional[WordEmbedding.Config] = WordEmbedding.Config()
        representation: ContextualIntentSlotRepresentation.Config = ContextualIntentSlotRepresentation.Config()

[docs]    @classmethod
    def create_embedding(cls, config, tensorizers):
        word_emb = create_module(
            config.word_embedding,
            tensorizer=tensorizers["tokens"],
            init_from_saved_state=config.init_from_saved_state,
        )

        seq_emb_tensorizer = tensorizers["seq_tokens"]
        seq_emb = create_module(config.seq_embedding, tensorizer=seq_emb_tensorizer)
        return EmbeddingList(
            [EmbeddingList([word_emb], concat=True), seq_emb], concat=False
        )

[docs]    def vocab_to_export(self, tensorizers):
        return {
            "tokens_vals": list(tensorizers["tokens"].vocab),
            "seq_tokens_vals": list(tensorizers["seq_tokens"].vocab),
        }

[docs]    def get_export_input_names(self, tensorizers):
        return ["tokens_vals", "seq_tokens_vals", "tokens_lens", "seq_tokens_lens"]

[docs]    def arrange_model_inputs(self, tensor_dict):
        tokens, seq_lens, _ = tensor_dict["tokens"]
        arranged_inputs = [tokens]
        seq_emb_inputs, seq_word_lens = tensor_dict.get("seq_tokens")
        arranged_inputs.append(seq_emb_inputs)
        arranged_inputs.append(seq_lens)
        arranged_inputs.append(seq_word_lens)
        return tuple(arranged_inputs)