-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcombinatorics.py
More file actions
120 lines (97 loc) · 3.39 KB
/
combinatorics.py
File metadata and controls
120 lines (97 loc) · 3.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
from functools import lru_cache
import random
import torch
from utils.tokenization_utils import tokenize
from typing import Iterable, List, Tuple
from collections import deque
@lru_cache(maxsize=None)
def partitions_of_n_with_up_to_k(n: int, k: int) -> list[list[int]]:
if n == 0:
return [[]]
if n < 0:
return []
ways = []
for num in range(1, k + 1):
sub_ways = partitions_of_n_with_up_to_k(n - num, k)
for way in sub_ways:
ways.append([num] + way)
return ways
@lru_cache(maxsize=None)
def _counts_1_k(n: int, k: int) -> List[int]:
"""
Return the list [C_0, C_1, ..., C_n] where C_r is the number of
ordered compositions of r into parts in {1, ..., k}.
Recurrence: C_0=1 and C_r = sum_{i=1..min(k,r)} C_{r-i}.
Uses an O(n) sliding-window implementation.
"""
if n < 0:
raise ValueError("n must be non-negative")
if k < 1:
raise ValueError("k must be at least 1")
C = [1] # C_0
window = deque([1]) # last up to k terms: starts with C_0
window_sum = 1
for r in range(1, n + 1):
c_r = window_sum
C.append(c_r)
window.append(c_r)
window_sum += c_r
if len(window) > k:
window_sum -= window.popleft()
return C
def sample_1_k_composition(n: int, k: int, rng=random.random) -> Tuple[int, ...]:
"""
Return a uniform random ordered composition of n into parts in {1, ..., k}.
"""
if n < 0:
raise ValueError("n must be non-negative")
if n == 0:
return tuple()
if k < 1:
raise ValueError("k must be at least 1")
C = _counts_1_k(n, k) # C[r] = number of compositions of r
r = n
out = []
while r > 0:
m = min(k, r)
# Draw i in {1..m} with probability C[r-i]/C[r]
threshold = rng() * C[r]
acc = 0
chosen = None
for i in range(1, m + 1):
acc += C[r - i]
if threshold < acc:
chosen = i
break
out.append(chosen)
r -= chosen
return tuple(out)
def tokenize_number_by_random_signature(number: int | str,
signatures_dict: dict[int, list[list[int]]] | None = None,
up_to_k_digits_token: int | None = 3 # 3 as llama "default"
) -> list[int]:
number = str(number)
if signatures_dict is None:
signature = sample_1_k_composition(len(number), k=up_to_k_digits_token)
else:
signature = random.choice(signatures_dict[len(number)])
assert len(number) == sum(signature), f"{number} -- {signature}"
tokenization = []
for i in signature:
tokenization.append(
tokenize(number[:i])[0]
)
number = number[i:]
return tokenization
def sample_k_pairs_of_numbers(acceptable_digits, k, weight_factor=None):
if weight_factor is None:
weight_factor = 1
weights = [weight_factor ** (d - min(acceptable_digits)) for d in acceptable_digits]
digit_counts = random.choices(acceptable_digits, weights=weights, k=2*k)
nums = []
for n in digit_counts:
low = 10 ** (n - 1)
high = 10 ** n - 1
nums.append(torch.randint(low=low, high=high+1, size=(1,)).item())
xs, ys = nums[:k], nums[k:]
return list(zip(xs, ys))