Source code for gobrec.mabs.lin_mabs.lin_greedy

"""LinGreedy: Linear Contextual Bandit with Epsilon-Greedy Exploration."""


from gobrec.mabs.lin_mabs import Lin
import numpy as np
import torch


[docs]class LinGreedy(Lin): r"""LinGreedy: Linear Contextual Bandit with Epsilon-Greedy Exploration [1]_. This class implements a linear MAB algorithm that uses ridge regression to estimate the expected rewards for each arm. Then, with probability :math:`\epsilon`, the generated scores are random (exploration), and with probability :math:`1 - \epsilon`, the generated scores are the expected rewards (exploitation). Attributes ---------- epsilon : float Probability of choosing a random action (exploration). Value should be in [0, 1]. 1 means always explore, 0 means always exploit. l2_lambda : float Regularization parameter for ridge regression. device : str Device to use for computations ('cpu' or 'cuda'). items_per_batch : int Number of items to process in each batch when updating the model. More items per batch means more memory usage but faster computation. References ---------- .. [1] John Langford and Tong Zhang. The epoch-greedy algorithm for contextual multi-armed bandits. In Proceedings of the 20th International Conference on Neural Information Pro- cessing Systems, NIPS'07, pages 817-824, Red Hook, NY, USA, 2007. Curran Associates Inc. doi: 10.5555/2981562.2981665. Examples -------- An example using LinGreedy with :math:`\epsilon = 1`, which means always explore. >>> import numpy as np >>> from gobrec.mabs.lin_mabs import LinGreedy >>> contexts = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1], ... [1, 0, 0], [0, 1, 0], [0, 0, 1], ... [1, 0, 0], [0, 1, 0], [0, 0, 1]]) >>> decisions = np.array(['a', 'a', 'a', ... 'b', 'b', 'b', ... 'c', 'c', 'c']) >>> rewards = np.array([10, 0 , 1 , ... 1 , 10, 0 , ... 0 , 1 , 10]) >>> lin_greedy_mab = LinGreedy(seed=42, epsilon=1) >>> lin_greedy_mab.fit(contexts, decisions, rewards) >>> lin_greedy_mab.predict(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])) tensor([[0.6974, 0.0942, 0.9756], [0.7611, 0.7861, 0.1281], [0.4504, 0.3708, 0.9268]], dtype=torch.float64) >>> lin_greedy_mab.predict(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])) tensor([[0.2272, 0.5546, 0.0638], [0.8276, 0.6317, 0.7581], [0.3545, 0.9707, 0.8931]], dtype=torch.float64) An example using LinGreedy with :math:`\epsilon = 0`, which means always exploit. >>> import numpy as np >>> from gobrec.mabs.lin_mabs import LinGreedy >>> contexts = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1], ... [1, 0, 0], [0, 1, 0], [0, 0, 1], ... [1, 0, 0], [0, 1, 0], [0, 0, 1]]) >>> decisions = np.array(['a', 'a', 'a', ... 'b', 'b', 'b', ... 'c', 'c', 'c']) >>> rewards = np.array([10, 0 , 1 , ... 1 , 10, 0 , ... 0 , 1 , 10]) >>> lin_greedy_mab = LinGreedy(seed=42, epsilon=0) >>> lin_greedy_mab.fit(contexts, decisions, rewards) >>> lin_greedy_mab.predict(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])) tensor([[5.0000, 0.5000, 0.0000], [0.0000, 5.0000, 0.5000], [0.5000, 0.0000, 5.0000]], dtype=torch.float64) >>> lin_greedy_mab.predict(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])) tensor([[5.0000, 0.5000, 0.0000], [0.0000, 5.0000, 0.5000], [0.5000, 0.0000, 5.0000]], dtype=torch.float64) """ def __init__(self, seed: int = None, epsilon: float = 0.1, l2_lambda: float = 1.0, use_gpu: bool = False, items_per_batch: int = 10_000): """Initialize the LinGreedy algorithm. Parameters ---------- seed : int, optional Random seed for reproducibility. Default is None. epsilon : float, optional Probability of choosing a random action (exploration). Value should be in [0, 1]. 1 means always explore, 0 means always exploit. Default is 0.1. l2_lambda : float, optional Regularization parameter for ridge regression. Default is 1.0. use_gpu : bool, optional Whether to use GPU for computations if available. Default is False. items_per_batch : int, optional Number of items to process in each batch when updating the model. More items per batch means more memory usage but faster computation. Default is 10,000. """ super().__init__(seed, l2_lambda, use_gpu, items_per_batch) self.epsilon = epsilon
[docs] def predict(self, contexts: np.ndarray): """Predict the expected rewards for each arm given the contexts. Parameters ---------- contexts : np.ndarray A 2D array where each row represents the context features for which predictions are to be made. Returns ------- expected_rewards : torch.Tensor A 2D tensor of shape (n_samples, n_arms) where each element is the expected reward for the corresponding context-arm pair. The encoded items ids are used here. To get the original item IDs, it is possible to use the `label_encoder.inverse_transform` method. """ x = torch.tensor(contexts, device=self.device, dtype=torch.double) scores = torch.empty((contexts.shape[0], self.num_arms), device=self.device, dtype=torch.double) random_mask = self.rng.random(contexts.shape[0]) < self.epsilon random_indexes = random_mask.nonzero()[0] not_random_indexes = (~random_mask).nonzero()[0] scores[random_mask] = torch.tensor(self.rng.random((len(random_indexes), self.num_arms)), device=self.device, dtype=torch.double) for start in range(0, len(not_random_indexes), self.items_per_batch): end = min(start + self.items_per_batch, len(not_random_indexes)) batch_indexes = not_random_indexes[start:end] scores[batch_indexes] = torch.einsum('bd,ad->ba', x[batch_indexes], self.beta) return scores