Module mimir.attacks.all_attacks
Enum class for attacks. Also contains the base attack class.
Classes
class AllAttacks (value, names=None, *, module=None, qualname=None, type=None, start=1)
-
An enumeration.
Expand source code
class AllAttacks(str, Enum): LOSS = "loss" # Done REFERENCE_BASED = "ref" # Done ZLIB = "zlib" # Done MIN_K = "min_k" # Done MIN_K_PLUS_PLUS = "min_k++" # Done NEIGHBOR = "ne" # Done GRADNORM = "gradnorm" # Done RECALL = "recall" # QUANTILE = "quantile" # Uncomment when tested implementation is available
Ancestors
- builtins.str
- enum.Enum
Class variables
var GRADNORM
var LOSS
var MIN_K
var MIN_K_PLUS_PLUS
var NEIGHBOR
var RECALL
var REFERENCE_BASED
var ZLIB
class Attack (config, target_model: Model, ref_model: Model = None, is_blackbox: bool = True)
-
Expand source code
class Attack: def __init__(self, config, target_model: Model, ref_model: Model = None, is_blackbox: bool = True): self.config = config self.target_model = target_model self.ref_model = ref_model self.is_loaded = False self.is_blackbox = is_blackbox def load(self): """ Any attack-specific steps (one-time) preparation """ if self.ref_model is not None: self.ref_model.load() self.is_loaded = True def unload(self): if self.ref_model is not None: self.ref_model.unload() self.is_loaded = False def _attack(self, document, probs, tokens=None, **kwargs): """ Actual logic for attack. """ raise NotImplementedError("Attack must implement attack()") def attack(self, document, probs, **kwargs): """ Score a document using the attack's scoring function. Calls self._attack """ # Load attack if not loaded yet if not self.is_loaded: self.load() self.is_loaded = True detokenized_sample = kwargs.get("detokenized_sample", None) if self.config.pretokenized and detokenized_sample is None: raise ValueError("detokenized_sample must be provided") score = ( self._attack(document, probs=probs, **kwargs) if not self.config.pretokenized else self._attack( detokenized_sample, tokens=document, probs=probs, **kwargs ) ) return score
Subclasses
- GradNormAttack
- LOSSAttack
- MinKProbAttack
- MinKPlusPlusAttack
- NeighborhoodAttack
- QuantileAttack
- ReCaLLAttack
- ReferenceAttack
- ZLIBAttack
Methods
def attack(self, document, probs, **kwargs)
-
Score a document using the attack's scoring function. Calls self._attack
def load(self)
-
Any attack-specific steps (one-time) preparation
def unload(self)