Source code for pytext.models.disjoint_multitask_model

#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
from typing import List

import torch
import torch.nn as nn
from pytext.common.constants import BatchContext
from pytext.models.model import Model


[docs]class DisjointMultitaskModel(Model): """ Wrapper model to train multiple PyText models that share parameters. Designed to be used for multi-tasking when the tasks have disjoint datasets. Modules which have the same shared_module_key and type share parameters. Only need to configure the first such module in full in each case. Args: models (type): Dictionary of models of sub-tasks. Attributes: current_model (type): Current model to route the input batch to. """ def __init__(self, models, loss_weights) -> None: models = nn.ModuleDict(models) super().__init__(None, None, None, None) self.models = models # make this a list to prevent registering in state_dict self._current_model = [next(iter(models.values()))] self.loss_weights = loss_weights
[docs] def contextualize(self, context): self._current_model[0] = self.models[context[BatchContext.TASK_NAME]] self.current_loss_weight = self.loss_weights[context[BatchContext.TASK_NAME]]
@property def current_model(self): return self._current_model[0]
[docs] def get_loss(self, logits, targets, context): return self.current_loss_weight * self.current_model.get_loss( logits, targets, context )
[docs] def get_pred(self, logits, targets=None, context=None, *args): return self.current_model.get_pred(logits, targets, context, *args)
[docs] def forward(self, *inputs) -> List[torch.Tensor]: return self.current_model.forward(*inputs)
[docs] def save_modules(self, base_path, suffix=""): for name, model in self.models.items(): model.save_modules(base_path, f"-{name}{suffix}")
[docs]class NewDisjointMultitaskModel(DisjointMultitaskModel):
[docs] def arrange_model_inputs(self, tensor_dict): self.contextualize(tensor_dict) return self.current_model.arrange_model_inputs(tensor_dict)
[docs] def arrange_targets(self, tensor_dict): return self.current_model.arrange_targets(tensor_dict)
[docs] def arrange_model_context(self, tensor_dict): return self.current_model.arrange_model_context(tensor_dict)
[docs] def caffe2_export(self, tensorizers, tensor_dict, path, export_onnx_path=None): pass