Module mimir.attacks.min_k
Min-k % Prob Attack: https://arxiv.org/pdf/2310.16789.pdf
Classes
class MinKProbAttack (config: ExperimentConfig, model: Model)
-
Expand source code
class MinKProbAttack(Attack): def __init__(self, config: ExperimentConfig, model: Model): super().__init__(config, model, ref_model=None) @ch.no_grad() def _attack(self, document, probs, tokens=None, **kwargs): """ Min-k % Prob Attack. Gets model probabilities and returns likelihood when computed over top k% of ngrams. """ # Hyper-params specific to min-k attack k: float = kwargs.get("k", 0.2) window: int = kwargs.get("window", 1) stride: int = kwargs.get("stride", 1) all_prob = ( probs if probs is not None else self.target_model.get_probabilities(document, tokens=tokens) ) # iterate through probabilities by ngram defined by window size at given stride ngram_probs = [] for i in range(0, len(all_prob) - window + 1, stride): ngram_prob = all_prob[i : i + window] ngram_probs.append(np.mean(ngram_prob)) min_k_probs = sorted(ngram_probs)[: int(len(ngram_probs) * k)] return -np.mean(min_k_probs)
Ancestors
Inherited members