NewBertPairClassificationTask.ConfigΒΆ

Component: NewBertPairClassificationTask

class NewBertPairClassificationTask.Config[source]

Bases: DocumentClassificationTask.Config

All Attributes (including base classes)

data: Data.Config = Data.Config()
trainer: TaskTrainer.Config = TaskTrainer.Config()
use_elastic: Optional[bool] = None
model: NewBertModel.Config = NewBertModel.Config(inputs=BertModelInput(tokens=BERTTensorizer.Config(columns=['text1', 'text2'], max_seq_len=128)))
metric_reporter: ClassificationMetricReporter.Config = ClassificationMetricReporter.Config(text_column_names=['text1', 'text2'])

Default JSON

{
    "data": {
        "Data": {
            "source": {
                "TSVDataSource": {
                    "column_mapping": {},
                    "train_filename": null,
                    "test_filename": null,
                    "eval_filename": null,
                    "field_names": null,
                    "delimiter": "\t",
                    "quoted": false,
                    "drop_incomplete_rows": false
                }
            },
            "batcher": {
                "PoolingBatcher": {
                    "train_batch_size": 16,
                    "eval_batch_size": 16,
                    "test_batch_size": 16,
                    "pool_num_batches": 1000,
                    "num_shuffled_pools": 1
                }
            },
            "sort_key": null,
            "in_memory": true
        }
    },
    "trainer": {
        "TaskTrainer": {
            "epochs": 10,
            "early_stop_after": 0,
            "max_clip_norm": null,
            "report_train_metrics": true,
            "target_time_limit_seconds": null,
            "do_eval": true,
            "load_best_model_after_train": true,
            "num_samples_to_log_progress": 1000,
            "num_accumulated_batches": 1,
            "num_batches_per_epoch": null,
            "optimizer": {
                "Adam": {
                    "lr": 0.001,
                    "weight_decay": 1e-05,
                    "eps": 1e-08
                }
            },
            "scheduler": null,
            "sparsifier": null,
            "fp16_args": {
                "FP16OptimizerFairseq": {
                    "init_loss_scale": 128,
                    "scale_window": null,
                    "scale_tolerance": 0.0,
                    "threshold_loss_scale": null,
                    "min_loss_scale": 0.0001
                }
            },
            "privacy_engine": null,
            "use_tensorboard": false
        }
    },
    "use_elastic": null,
    "model": {
        "inputs": {
            "tokens": {
                "BERTTensorizer": {
                    "is_input": true,
                    "columns": [
                        "text1",
                        "text2"
                    ],
                    "tokenizer": {
                        "WordPieceTokenizer": {
                            "basic_tokenizer": {
                                "split_regex": "\\s+",
                                "lowercase": true,
                                "use_byte_offsets": false
                            },
                            "wordpiece_vocab_path": "manifold://nlp_technologies/tree/huggingface-models/bert-base-uncased/vocab.txt"
                        }
                    },
                    "base_tokenizer": null,
                    "vocab_file": "manifold://nlp_technologies/tree/huggingface-models/bert-base-uncased/vocab.txt",
                    "max_seq_len": 128
                }
            },
            "dense": null,
            "labels": {
                "LabelTensorizer": {
                    "is_input": false,
                    "column": "label",
                    "allow_unknown": false,
                    "pad_in_vocab": false,
                    "label_vocab": null,
                    "label_vocab_file": null,
                    "add_labels": null
                }
            },
            "num_tokens": {
                "is_input": false,
                "names": [
                    "tokens"
                ],
                "indexes": [
                    2
                ]
            }
        },
        "encoder": {
            "HuggingFaceBertSentenceEncoder": {
                "load_path": null,
                "save_path": null,
                "freeze": false,
                "shared_module_key": null,
                "output_dropout": 0.4,
                "embedding_dim": 768,
                "pooling": "cls_token",
                "export": false,
                "projection_dim": 0,
                "normalize_output_rep": false,
                "bert_cpt_dir": "manifold://nlp_technologies/tree/huggingface-models/bert-base-uncased/",
                "load_weights": true
            }
        },
        "decoder": {
            "load_path": null,
            "save_path": null,
            "freeze": false,
            "shared_module_key": null,
            "hidden_dims": [],
            "out_dim": null,
            "layer_norm": false,
            "dropout": 0.0,
            "bias": true,
            "activation": "relu",
            "temperature": 1.0,
            "spectral_normalization": false
        },
        "output_layer": {
            "load_path": null,
            "save_path": null,
            "freeze": false,
            "shared_module_key": null,
            "loss": {
                "CrossEntropyLoss": {}
            },
            "label_weights": null
        }
    },
    "metric_reporter": {
        "ClassificationMetricReporter": {
            "output_path": "/tmp/test_out.txt",
            "pep_format": false,
            "student_column_names": [],
            "log_gradient": false,
            "model_select_metric": "accuracy",
            "target_label": null,
            "text_column_names": [
                "text1",
                "text2"
            ],
            "additional_column_names": [],
            "recall_at_precision_thresholds": [
                0.2,
                0.4,
                0.6,
                0.8,
                0.9
            ],
            "is_memory_efficient": false
        }
    }
}