Source code for pytext

#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import json
import logging
import uuid
from functools import lru_cache
from typing import Callable, Mapping, Optional

import numpy as np
from caffe2.python import workspace
from caffe2.python.predictor import predictor_exporter
from pytext.data.sources.data_source import DataSource
from pytext.task import load
from pytext.task.new_task import NewTask
from pytext.utils.file_io import PathManager, register_http_url_handler
from pytext.workflow import _set_cuda

from .builtin_task import register_builtin_tasks
from .config import PyTextConfig, pytext_config_from_json
from .utils.onnx import CAFFE2_DB_TYPE, convert_caffe2_blob_name


register_builtin_tasks()
register_http_url_handler()


Predictor = Callable[[Mapping[str, str]], Mapping[str, np.array]]


def _predict(workspace_id, predict_net, model, tensorizers, input):
    workspace.SwitchWorkspace(workspace_id)
    tensor_dict = {
        name: tensorizer.prepare_input(input)
        for name, tensorizer in tensorizers.items()
    }
    model_inputs = model.arrange_caffe2_model_inputs(tensor_dict)
    model_input_names = model.get_export_input_names(tensorizers)
    vocab_to_export = model.vocab_to_export(tensorizers)
    for blob_name, model_input in zip(model_input_names, model_inputs):
        converted_blob_name = blob_name
        dtype = np.float32
        if blob_name in vocab_to_export:
            converted_blob_name = convert_caffe2_blob_name(blob_name)
            dtype = str

        workspace.blobs[converted_blob_name] = np.array([model_input], dtype=dtype)
    workspace.RunNet(predict_net)
    return {
        str(blob): workspace.blobs[blob][0] for blob in predict_net.external_outputs
    }


[docs]def load_config(filename: str) -> PyTextConfig: """ Load a PyText configuration file from a file path. See pytext.config.pytext_config for more info on configs. """ with PathManager.open(filename) as file: config_json = json.loads(file.read()) if "config" not in config_json: return pytext_config_from_json(config_json) return pytext_config_from_json(config_json["config"])
[docs]def create_predictor( config: PyTextConfig, model_file: Optional[str] = None, db_type: str = CAFFE2_DB_TYPE, task: Optional[NewTask] = None, cache_size: int = 0, ) -> Predictor: """ Create a simple prediction API from a training config and an exported caffe2 model file. This model file should be created by calling export on a trained model snapshot. """ workspace_id = str(uuid.uuid4()) workspace.SwitchWorkspace(workspace_id, True) predict_net = predictor_exporter.prepare_prediction_net( filename=model_file or PathManager.get_local_path(config.export_caffe2_path), db_type=db_type, ) new_task = task or NewTask.from_config(config.task) input_tensorizers = { name: tensorizer for name, tensorizer in new_task.data.tensorizers.items() if tensorizer.is_input } def predict_fn(input): return _predict( workspace_id, predict_net, new_task.model, input_tensorizers, input ) if cache_size < 0: return lru_cache(maxsize=None)(predict_fn) elif cache_size > 0: return lru_cache(maxsize=cache_size)(predict_fn) else: return predict_fn
[docs]def batch_predict_caffe2_model( pytext_model_file: str, caffe2_model_file: str, db_type: str = CAFFE2_DB_TYPE, data_source: Optional[DataSource] = None, use_cuda=False, task: Optional[NewTask] = None, train_config: Optional[PyTextConfig] = None, cache_size: int = 0, ): """ Gets predictions from caffe2 model from a batch of examples. Args: pytext_model_file: Path to pytext model file (required if task and training config is not specified) caffe2_model_file: Path to caffe2 model file db_type: DB type to use for caffe2 data_source: Data source for test examples use_cuda: Whether to turn on cuda processing task: The pytext task object train_config: The pytext training config cache_size: The LRU cache size to use for prediction. 0 = no cache, -1 = boundless cache, [1, inf) = size of cache """ logging.info(f"Loading data processing config from {pytext_model_file}") _set_cuda(use_cuda) if task is None or train_config is None: task, train_config, _ = load(pytext_model_file) data_source = data_source or task.data.data_source logging.info(f"Loading Caffe2 model: {caffe2_model_file}") predictor = create_predictor( train_config, PathManager.get_local_path(caffe2_model_file), db_type, task, cache_size, ) logging.info(f"Model loaded, start testing") predictions = [predictor(example) for example in data_source.test] return predictions