Source code for pytext.models.seq_models.contextual_intent_slot

#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from typing import Optional

from pytext.data.tensorizers import SeqTokenTensorizer
from pytext.models.embeddings import EmbeddingList, WordEmbedding
from pytext.models.joint_model import IntentSlotModel
from pytext.models.module import create_module
from pytext.models.representations.contextual_intent_slot_rep import (
    ContextualIntentSlotRepresentation,
)


[docs]class ContextualIntentSlotModel(IntentSlotModel): """ Joint Model for Intent classification and slot tagging with inputs of contextual information (sequence of utterances) and dictionary feature of the last utterance. Training data should include: doc_label (string): intent classification label of either the sequence of \ utterances or just the last sentence word_label (string): slot tagging label of the last utterance in the format\ of start_idx:end_idx:slot_label, multiple slots are separated by a comma text (list of string): sequence of utterances for training dict_feat (dict): a dict of features that contains the feature of each word\ in the last utterance Following is an example of raw columns from training data: ========== ======================================================================= doc_label reply-where word_label 10:20:restaurant_name text ["dinner at 6?", "wanna try Tomi Sushi?"] dict_feat {"tokenFeatList": [{"tokenIdx": 2, "features": {"poi:eatery": 0.66}}, {"tokenIdx": 3, "features": {"poi:eatery": 0.66}}]} ========== ======================================================================= """
[docs] class Config(IntentSlotModel.Config): class ModelInput(IntentSlotModel.Config.ModelInput): seq_tokens: Optional[ SeqTokenTensorizer.Config ] = SeqTokenTensorizer.Config() inputs: ModelInput = ModelInput() seq_embedding: Optional[WordEmbedding.Config] = WordEmbedding.Config() representation: ContextualIntentSlotRepresentation.Config = ( ContextualIntentSlotRepresentation.Config() )
[docs] @classmethod def create_embedding(cls, config, tensorizers): word_emb = create_module( config.word_embedding, tensorizer=tensorizers["tokens"], init_from_saved_state=config.init_from_saved_state, ) seq_emb_tensorizer = tensorizers["seq_tokens"] seq_emb = create_module(config.seq_embedding, tensorizer=seq_emb_tensorizer) return EmbeddingList( [EmbeddingList([word_emb], concat=True), seq_emb], concat=False )
[docs] def vocab_to_export(self, tensorizers): return { "tokens_vals": list(tensorizers["tokens"].vocab), "seq_tokens_vals": list(tensorizers["seq_tokens"].vocab), }
[docs] def get_export_input_names(self, tensorizers): return ["tokens_vals", "seq_tokens_vals", "tokens_lens", "seq_tokens_lens"]
[docs] def arrange_model_inputs(self, tensor_dict): tokens, seq_lens, _ = tensor_dict["tokens"] arranged_inputs = [tokens] seq_emb_inputs, _, seq_word_lens = tensor_dict.get("seq_tokens") arranged_inputs.append(seq_emb_inputs) arranged_inputs.append(seq_lens) arranged_inputs.append(seq_word_lens) return tuple(arranged_inputs)