mirror of
https://github.com/tinygrad/tinygrad.git
synced 2026-06-24 02:14:17 +00:00
update mlperf bert scripts (#6755)
removed DISABLE_DROPOUT=1. updated BS to 54 that works on tinyboxes with dropouts. used bert's sparse_categorical_crossentropy that takes Tensor ignore_index in accuracy method
This commit is contained in:
parent
717b394391
commit
396c96357b
8 changed files with 16 additions and 34 deletions
|
|
@ -2,14 +2,11 @@
|
|||
|
||||
export PYTHONPATH="."
|
||||
export MODEL="bert"
|
||||
export DEFAULT_FLOAT="HALF" GPUS=6 BS=66 EVAL_BS=6
|
||||
export DEFAULT_FLOAT="HALF" GPUS=6 BS=54 EVAL_BS=6
|
||||
|
||||
export BEAM=4
|
||||
export BASEDIR="/raid/datasets/wiki"
|
||||
|
||||
echo "TODO: DISABLING DROPOUT - UNSET FOR REAL SUBMISSION RUN"
|
||||
export DISABLE_DROPOUT=1 # TODO: Unset flag for real submission run.
|
||||
|
||||
export BENCHMARK=10 DEBUG=2
|
||||
|
||||
python3 examples/mlperf/model_train.py
|
||||
|
|
|
|||
|
|
@ -2,14 +2,11 @@
|
|||
|
||||
export PYTHONPATH="."
|
||||
export MODEL="bert"
|
||||
export DEFAULT_FLOAT="HALF" GPUS=6 BS=66 EVAL_BS=6
|
||||
export DEFAULT_FLOAT="HALF" GPUS=6 BS=54 EVAL_BS=6
|
||||
|
||||
export BEAM=4
|
||||
export BASEDIR="/raid/datasets/wiki"
|
||||
|
||||
echo "TODO: DISABLING DROPOUT - UNSET FOR REAL SUBMISSION RUN"
|
||||
export DISABLE_DROPOUT=1 # TODO: Unset flag for real submission run.
|
||||
|
||||
export WANDB=1
|
||||
|
||||
python3 examples/mlperf/model_train.py
|
||||
|
|
@ -3,14 +3,11 @@
|
|||
export PYTHONPATH="."
|
||||
export MODEL="bert"
|
||||
export SUBMISSION_PLATFORM="tinybox_green"
|
||||
export DEFAULT_FLOAT="HALF" GPUS=6 BS=66 EVAL_BS=6
|
||||
export DEFAULT_FLOAT="HALF" GPUS=6 BS=54 EVAL_BS=6
|
||||
|
||||
export BEAM=4
|
||||
export BASEDIR="/raid/datasets/wiki"
|
||||
|
||||
echo "TODO: DISABLING DROPOUT - UNSET FOR REAL SUBMISSION RUN"
|
||||
export DISABLE_DROPOUT=1 # TODO: Unset flag for real submission run.
|
||||
|
||||
# pip install -e ".[mlperf]"
|
||||
export LOGMLPERF=1
|
||||
|
||||
|
|
|
|||
|
|
@ -2,14 +2,11 @@
|
|||
|
||||
export PYTHONPATH="."
|
||||
export MODEL="bert"
|
||||
export DEFAULT_FLOAT="HALF" GPUS=6 BS=84 EVAL_BS=6
|
||||
export DEFAULT_FLOAT="HALF" GPUS=6 BS=54 EVAL_BS=6
|
||||
|
||||
export BEAM=4
|
||||
export BASEDIR="/raid/datasets/wiki"
|
||||
|
||||
echo "TODO: DISABLING DROPOUT - UNSET FOR REAL SUBMISSION RUN"
|
||||
export DISABLE_DROPOUT=1 # TODO: Unset flag for real submission run.
|
||||
|
||||
export BENCHMARK=10 DEBUG=2
|
||||
|
||||
python3 examples/mlperf/model_train.py
|
||||
|
|
|
|||
|
|
@ -2,14 +2,11 @@
|
|||
|
||||
export PYTHONPATH="."
|
||||
export MODEL="bert"
|
||||
export DEFAULT_FLOAT="HALF" GPUS=6 BS=84 EVAL_BS=6
|
||||
export DEFAULT_FLOAT="HALF" GPUS=6 BS=54 EVAL_BS=6
|
||||
|
||||
export BEAM=4
|
||||
export BASEDIR="/raid/datasets/wiki"
|
||||
|
||||
echo "TODO: DISABLING DROPOUT - UNSET FOR REAL SUBMISSION RUN"
|
||||
export DISABLE_DROPOUT=1 # TODO: Unset flag for real submission run.
|
||||
|
||||
export WANDB=1
|
||||
|
||||
python3 examples/mlperf/model_train.py
|
||||
|
|
@ -3,14 +3,11 @@
|
|||
export PYTHONPATH="."
|
||||
export MODEL="bert"
|
||||
export SUBMISSION_PLATFORM="tinybox_red"
|
||||
export DEFAULT_FLOAT="HALF" GPUS=6 BS=84 EVAL_BS=6
|
||||
export DEFAULT_FLOAT="HALF" GPUS=6 BS=54 EVAL_BS=6
|
||||
|
||||
export BEAM=4
|
||||
export BASEDIR="/raid/datasets/wiki"
|
||||
|
||||
echo "TODO: DISABLING DROPOUT - UNSET FOR REAL SUBMISSION RUN"
|
||||
export DISABLE_DROPOUT=1 # TODO: Unset flag for real submission run.
|
||||
|
||||
# pip install -e ".[mlperf]"
|
||||
export LOGMLPERF=1
|
||||
|
||||
|
|
|
|||
|
|
@ -49,15 +49,15 @@ class BertForPretraining:
|
|||
output = self.bert(input_ids, attention_mask, token_type_ids)
|
||||
return self.cls(output, masked_lm_positions)
|
||||
|
||||
def loss(self, prediction_logits:Tensor, seq_relationship_logits:Tensor, masked_lm_ids:Tensor, masked_lm_weights:Tensor, next_sentence_labels:Tensor):
|
||||
# Reference has residual on denominator: https://github.com/mlcommons/training/blob/master/language_model/tensorflow/bert/run_pretraining.py#L315
|
||||
def sparse_categorical_crossentropy(predictions:Tensor, labels:Tensor, ignore_index=-1):
|
||||
log_probs, loss_mask = predictions.log_softmax(), (labels != ignore_index)
|
||||
y_counter = Tensor.arange(predictions.shape[-1], requires_grad=False, device=predictions.device).unsqueeze(0).expand(labels.numel(), predictions.shape[-1])
|
||||
y = ((y_counter == labels.flatten().reshape(-1, 1)) * loss_mask.reshape(-1, 1)).reshape(*labels.shape, predictions.shape[-1])
|
||||
return -((log_probs * y).sum()) / (loss_mask.sum() + 1e-5) # Small constant to avoid division by zero
|
||||
# Reference has residual on denominator: https://github.com/mlcommons/training/blob/master/language_model/tensorflow/bert/run_pretraining.py#L315
|
||||
def sparse_categorical_crossentropy(self, predictions:Tensor, labels:Tensor, ignore_index=-1):
|
||||
log_probs, loss_mask = predictions.log_softmax(), (labels != ignore_index)
|
||||
y_counter = Tensor.arange(predictions.shape[-1], requires_grad=False, device=predictions.device).unsqueeze(0).expand(labels.numel(), predictions.shape[-1])
|
||||
y = ((y_counter == labels.flatten().reshape(-1, 1)) * loss_mask.reshape(-1, 1)).reshape(*labels.shape, predictions.shape[-1])
|
||||
return -((log_probs * y).sum()) / (loss_mask.sum() + 1e-5) # Small constant to avoid division by zero
|
||||
|
||||
masked_lm_loss = sparse_categorical_crossentropy(prediction_logits, masked_lm_ids, ignore_index=masked_lm_weights)
|
||||
def loss(self, prediction_logits:Tensor, seq_relationship_logits:Tensor, masked_lm_ids:Tensor, masked_lm_weights:Tensor, next_sentence_labels:Tensor):
|
||||
masked_lm_loss = self.sparse_categorical_crossentropy(prediction_logits, masked_lm_ids, ignore_index=masked_lm_weights)
|
||||
next_sentence_loss = seq_relationship_logits.binary_crossentropy_logits(next_sentence_labels)
|
||||
return masked_lm_loss + next_sentence_loss
|
||||
|
||||
|
|
@ -66,7 +66,7 @@ class BertForPretraining:
|
|||
valid = masked_lm_ids != 0
|
||||
masked_lm_predictions = prediction_logits.log_softmax().argmax(-1)
|
||||
masked_lm_accuracy = (masked_lm_predictions == masked_lm_ids) * valid
|
||||
masked_lm_loss = prediction_logits.sparse_categorical_crossentropy(masked_lm_ids, ignore_index=masked_lm_weights)
|
||||
masked_lm_loss = self.sparse_categorical_crossentropy(prediction_logits, masked_lm_ids, ignore_index=masked_lm_weights)
|
||||
|
||||
seq_relationship_predictions = seq_relationship_logits.log_softmax().argmax(-1)
|
||||
seq_relationship_accuracy = (seq_relationship_predictions == next_sentence_labels)
|
||||
|
|
|
|||
|
|
@ -3141,7 +3141,7 @@ class Tensor:
|
|||
"""
|
||||
return (self.maximum(0) - Y * self + (1 + self.abs().neg().exp()).log())._do_reduction(reduction)
|
||||
|
||||
def sparse_categorical_crossentropy(self, Y:Tensor, ignore_index=-1, label_smoothing=0.0, reduction:ReductionStr="mean") -> Tensor:
|
||||
def sparse_categorical_crossentropy(self, Y:Tensor, ignore_index:int=-1, label_smoothing=0.0, reduction:ReductionStr="mean") -> Tensor:
|
||||
"""
|
||||
Computes the sparse categorical cross-entropy loss between `self` and `Y`.
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue