-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpyproject.toml
More file actions
103 lines (87 loc) · 1.84 KB
/
pyproject.toml
File metadata and controls
103 lines (87 loc) · 1.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
[build-system]
requires = ["setuptools>=61.0", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "tokenization"
version = "0.1.0"
description = "Research codebase exploring tokenization inefficiencies in language models"
readme = "README.md"
requires-python = ">=3.8"
dependencies = [
# Core dependencies
"torch==2.8.0",
"transformers==4.53.2",
"datasets",
"huggingface-hub",
"accelerate",
"peft",
"bitsandbytes",
# Visualization and analysis
"jupyter",
"ipywidgets",
"plotille",
"distinctipy",
# Statistical modeling
"statsmodels>=0.14.0",
"patsy>=0.5.0",
"scipy",
"pandas",
"numpy",
"pygam",
# Utilities
"pyyaml",
"brotli",
"tqdm",
"colorama",
"openai",
]
[project.optional-dependencies]
dev = [
"ruff",
]
[tool.setuptools.packages.find]
where = ["."]
include = ["*"]
[tool.ruff]
# Python version target
target-version = "py311"
# Line length
line-length = 120
# Include only minimal_example for now
include = ["minimal_example/**/*.py"]
# Exclude common directories
exclude = [
".git",
"__pycache__",
".venv",
"venv",
"build",
"dist",
"*.egg-info",
]
[tool.ruff.lint]
# Enable specific rule sets
select = [
"E", # pycodestyle errors
"W", # pycodestyle warnings
"F", # pyflakes
"I", # isort
"UP", # pyupgrade
"B", # flake8-bugbear
"SIM", # flake8-simplify
]
# Ignore specific rules
ignore = [
"E501", # line too long (handled by formatter)
"E402", # module import not at top of file
]
[tool.ruff.lint.per-file-ignores]
# Allow unused imports in __init__.py files
"__init__.py" = ["F401"]
[tool.ruff.format]
# Use double quotes for strings
quote-style = "double"
# Indent with spaces
indent-style = "space"
# Magic trailing comma
skip-magic-trailing-comma = false