PolyAI-pheme

Build error

App Files Files Community

PolyAI-pheme / modules /masking_logic.py

taras-sereda

minimal set of files to run inference; pheme-small checkpoint

96ee597 9 months ago

raw

history blame contribute delete

No virus

3.36 kB

	"""Masking and sampling logic adapted from MaskGIT original paper:
	https://github.com/google-research/maskgit

	Copyright PolyAI Limited.
	"""
	from dataclasses import dataclass

	import numpy as np
	import torch
	import torch.nn.functional as F


	@dataclass
	class State:
	"""Holds decoding state data."""
	# The position of the decoding loop in the length dimension.
	cur_index: None
	# The active sequence log probabilities and finished sequence scores.
	cur_seqs: None
	final_seqs: None


	def state_init(init_indices, num_iter, start_iter=0):
	"""Initializes the decoding state data structure."""
	cur_index_0 = start_iter
	cur_seqs_0 = init_indices
	final_seqs_0 = torch.unsqueeze(init_indices, 1)
	final_seqs_0 = torch.tile(final_seqs_0, (1, num_iter, 1))
	return State(
	cur_index=cur_index_0, cur_seqs=cur_seqs_0, final_seqs=final_seqs_0)


	def schedule(ratio, method="cosine"):
	if method == "uniform":
	mask_ratio = 1. - ratio
	elif "pow" in method:
	exponent = float(method.replace("pow", ""))
	mask_ratio = 1. - ratio**exponent
	elif method == "cosine":
	mask_ratio = np.cos(ratio * (np.pi/2))

	mask_ratio = np.clip(mask_ratio, 1e-6, 1.)
	return mask_ratio


	def mask_by_random_topk(mask_len, probs, temperature=1.0):
	noise = gumbel_noise_like(probs)
	confidence = torch.log(probs) + temperature * noise
	sorted_confidence, _ = torch.sort(confidence, dim=-1)
	# Obtains cut off threshold given the mask lengths.
	cut_off = torch.take_along_dim(sorted_confidence, mask_len.long(), dim=-1)
	# Masks tokens with lower confidence.
	masking = (confidence < cut_off)
	return masking


	def gumbel_noise_like(t):
	noise = torch.zeros_like(t).uniform_(1e-20, 1)
	return -torch.log(-torch.log(noise))


	def sample_from_logits(
	logits,
	sample: bool = True,
	temperature: float = 1.0,
	top_k: int = None,
	top_p: float = None,
	return_probs: bool = False
	):
	shp = logits.shape[:-1]

	# Apply top_k sampling
	if top_k is not None:
	v, _ = logits.topk(top_k)
	logits[logits < v[..., [-1]]] = -float("inf")

	# Apply top_p (nucleus) sampling
	if top_p is not None and top_p < 1.0:
	v, sorted_indices = logits.sort(descending=True)
	cumulative_probs = v.softmax(dim=-1).cumsum(dim=-1)

	sorted_indices_to_remove = cumulative_probs > top_p
	# Right shift indices_to_remove to keep 1st token over threshold
	sorted_indices_to_remove = F.pad(
	sorted_indices_to_remove, (1, 0), value=False)[..., :-1]

	# Compute indices_to_remove in unsorted array
	indices_to_remove = sorted_indices_to_remove.scatter(
	-1, sorted_indices, sorted_indices_to_remove
	)

	logits[indices_to_remove] = -float("inf")

	# Perform multinomial sampling after normalizing logits
	probs = (
	F.softmax(logits / temperature, dim=-1)
	if temperature > 0
	else logits.softmax(dim=-1)
	)
	token = (
	probs.view(-1, probs.size(-1)).multinomial(1).squeeze(1).view(*shp)
	if sample
	else logits.argmax(-1)
	)

	if return_probs:
	token_probs = probs.take_along_dim(
	token.unsqueeze(-1), dim=-1).squeeze(-1)
	return token, token_probs
	else:
	return token