languagemodels.config
Global model and inference configuration
This module manages the global configuration object shared between other modules in the package. It implements a dictionary with data validation on the keys and values.
Note that this module provides access to many implementation details that are not expected to be used by average users. Specific models that have never been the default for the package may be removed at any time.
1"""Global model and inference configuration 2 3This module manages the global configuration object shared between other 4modules in the package. It implements a dictionary with data validation 5on the keys and values. 6 7Note that this module provides access to many implementation details 8that are not expected to be used by average users. Specific models that 9have never been the default for the package may be removed at any time. 10""" 11 12import re 13import os 14from collections import namedtuple 15from huggingface_hub import hf_hub_download 16import json 17 18ConfigItem = namedtuple("ConfigItem", "initfn default") 19 20 21class ModelFilterException(Exception): 22 pass 23 24 25# Model list 26# This list is sorted in priority order, with the best models first 27# The best model that fits in the memory bounds and matches the model filter 28# will be selected 29models = [ 30 { 31 "name": "openchat-3.5-0106", 32 "tuning": "instruct", 33 "datasets": ["mistral", "openorca", "flan"], 34 "params": 7e9, 35 "quantization": "int8", 36 "backend": "ct2", 37 "architecture": "decoder-only-transformer", 38 "license": "apache-2.0", 39 "prompt_fmt": ( 40 "GPT4 Correct User: {instruction}<|end_of_turn|>" "GPT4 Correct Assistant:" 41 ), 42 }, 43 { 44 "name": "Llama-3.1-8B-Instruct", 45 "tuning": "instruct", 46 "revision": "d02fc85", 47 "datasets": ["llama3"], 48 "params": 8e9, 49 "quantization": "int8", 50 "backend": "ct2", 51 "architecture": "decoder-only-transformer", 52 "license": "llama3", 53 "prompt_fmt": ( 54 "<|start_header_id|>user<|end_header_id|>\n\n" 55 "{instruction}<|eot_id|>" 56 "<|start_header_id|>assistant<|end_header_id|>\n\n" 57 ), 58 }, 59 { 60 "name": "Meta-Llama-3-8B-Instruct", 61 "tuning": "instruct", 62 "datasets": ["llama3"], 63 "params": 8e9, 64 "quantization": "int8", 65 "backend": "ct2", 66 "architecture": "decoder-only-transformer", 67 "license": "llama3", 68 "prompt_fmt": ( 69 "<|start_header_id|>user<|end_header_id|>\n\n" 70 "{instruction}<|eot_id|>" 71 "<|start_header_id|>assistant<|end_header_id|>\n\n" 72 ), 73 }, 74 { 75 "name": "openchat-3.5-1210", 76 "tuning": "instruct", 77 "datasets": ["mistral", "openorca", "flan"], 78 "params": 7e9, 79 "quantization": "int8", 80 "backend": "ct2", 81 "architecture": "decoder-only-transformer", 82 "license": "apache-2.0", 83 "prompt_fmt": ( 84 "GPT4 Correct User: {instruction}<|end_of_turn|>" "GPT4 Correct Assistant:" 85 ), 86 }, 87 { 88 "name": "WizardLM-2-7B", 89 "tuning": "instruct", 90 "datasets": ["mistral", "wizardlm"], 91 "params": 7e9, 92 "quantization": "int8", 93 "backend": "ct2", 94 "architecture": "decoder-only-transformer", 95 "license": "apache-2.0", 96 "prompt_fmt": "USER: {instruction} ASSISTANT:", 97 }, 98 { 99 "name": "neural-chat-7b-v3-1", 100 "tuning": "instruct", 101 "datasets": ["mistral", "slimorca"], 102 "params": 7e9, 103 "quantization": "int8", 104 "backend": "ct2", 105 "architecture": "decoder-only-transformer", 106 "license": "apache-2.0", 107 "prompt_fmt": ( 108 "### System:\n" 109 "Be helpful\n" 110 "### User:\n{instruction}\n" 111 "### Assistant:\n" 112 ), 113 }, 114 { 115 "name": "Mistral-7B-Instruct-v0.2", 116 "tuning": "instruct", 117 "datasets": ["mistral"], 118 "params": 7e9, 119 "quantization": "int8", 120 "backend": "ct2", 121 "architecture": "decoder-only-transformer", 122 "license": "apache-2.0", 123 "prompt_fmt": "<s>[INST] {instruction} [/INST]", 124 }, 125 { 126 "name": "flan-alpaca-gpt4-xl", 127 "tuning": "instruct", 128 "datasets": ["c4", "flan", "gpt4-alpaca"], 129 "params": 3e9, 130 "quantization": "int8", 131 "backend": "ct2", 132 "architecture": "encoder-decoder-transformer", 133 "license": "apache-2.0", 134 }, 135 { 136 "name": "flan-alpaca-xl", 137 "tuning": "instruct", 138 "datasets": ["c4", "flan", "alpaca"], 139 "params": 3e9, 140 "quantization": "int8", 141 "backend": "ct2", 142 "architecture": "encoder-decoder-transformer", 143 "license": "apache-2.0", 144 }, 145 { 146 "name": "flan-t5-xl", 147 "tuning": "instruct", 148 "datasets": ["c4", "flan"], 149 "params": 3e9, 150 "quantization": "int8", 151 "backend": "ct2", 152 "architecture": "encoder-decoder-transformer", 153 "license": "apache-2.0", 154 }, 155 { 156 "name": "Llama-3.2-3B-Instruct", 157 "tuning": "instruct", 158 "revision": "5da4ba8", 159 "datasets": ["llama3"], 160 "params": 1e9, 161 "quantization": "int8", 162 "backend": "ct2", 163 "architecture": "decoder-only-transformer", 164 "license": "llama3.2", 165 "repetition_penalty": 1.1, 166 "prompt_fmt": ( 167 "<|start_header_id|>user<|end_header_id|>\n\n" 168 "{instruction}<|eot_id|>" 169 "<|start_header_id|>assistant<|end_header_id|>\n\n" 170 ), 171 }, 172 { 173 "name": "fastchat-t5-3b-v1.0", 174 "tuning": "instruct", 175 "datasets": ["c4", "flan", "sharegpt"], 176 "params": 3e9, 177 "quantization": "int8", 178 "backend": "ct2", 179 "architecture": "encoder-decoder-transformer", 180 "license": "apache-2.0", 181 }, 182 { 183 "name": "LaMini-Flan-T5-783M", 184 "tuning": "instruct", 185 "revision": "e5e20a1", 186 "datasets": ["c4", "flan", "lamini"], 187 "params": 783e6, 188 "quantization": "int8", 189 "backend": "ct2", 190 "architecture": "encoder-decoder-transformer", 191 "license": "cc-by-nc-4.0", 192 }, 193 { 194 "name": "flan-t5-large", 195 "tuning": "instruct", 196 "datasets": ["c4", "flan"], 197 "params": 783e6, 198 "quantization": "int8", 199 "backend": "ct2", 200 "architecture": "encoder-decoder-transformer", 201 "license": "apache-2.0", 202 }, 203 { 204 "name": "Llama-3.2-1B-Instruct", 205 "tuning": "instruct", 206 "revision": "6e3e3a1", 207 "datasets": ["llama3"], 208 "params": 1e9, 209 "quantization": "int8", 210 "backend": "ct2", 211 "architecture": "decoder-only-transformer", 212 "license": "llama3.2", 213 "repetition_penalty": 1.1, 214 "prompt_fmt": ( 215 "<|start_header_id|>user<|end_header_id|>\n\n" 216 "{instruction}<|eot_id|>" 217 "<|start_header_id|>assistant<|end_header_id|>\n\n" 218 ), 219 }, 220 { 221 "name": "LaMini-Flan-T5-248M", 222 "tuning": "instruct", 223 "revision": "96cfe99", 224 "datasets": ["c4", "flan", "lamini"], 225 "params": 248e6, 226 "quantization": "int8", 227 "backend": "ct2", 228 "architecture": "encoder-decoder-transformer", 229 "license": "cc-by-nc-4.0", 230 }, 231 { 232 "name": "flan-t5-base", 233 "tuning": "instruct", 234 "datasets": ["c4", "flan"], 235 "params": 248e6, 236 "quantization": "int8", 237 "backend": "ct2", 238 "architecture": "encoder-decoder-transformer", 239 "license": "apache-2.0", 240 }, 241 { 242 "name": "flan-alpaca-base", 243 "tuning": "instruct", 244 "datasets": ["c4", "flan", "alpaca"], 245 "params": 248e6, 246 "quantization": "int8", 247 "backend": "ct2", 248 "architecture": "encoder-decoder-transformer", 249 "license": "apache-2.0", 250 }, 251 { 252 "name": "dialogstudio-t5-base-v1.0", 253 "tuning": "instruct", 254 "datasets": ["c4", "flan", "dialogstudio"], 255 "params": 248e6, 256 "quantization": "int8", 257 "backend": "ct2", 258 "architecture": "encoder-decoder-transformer", 259 "license": "apache-2.0", 260 "prompt_fmt": ("Instruction: Be helpful. <USER> {instruction}"), 261 }, 262 { 263 "name": "LaMini-Flan-T5-77M", 264 "tuning": "instruct", 265 "datasets": ["c4", "flan", "lamini"], 266 "params": 77e6, 267 "backend": "ct2", 268 "quantization": "int8", 269 "architecture": "encoder-decoder-transformer", 270 "license": "cc-by-nc-4.0", 271 }, 272 { 273 "name": "flan-t5-small", 274 "tuning": "instruct", 275 "datasets": ["c4", "flan"], 276 "params": 77e6, 277 "quantization": "int8", 278 "backend": "ct2", 279 "architecture": "encoder-decoder-transformer", 280 "license": "apache-2.0", 281 }, 282 { 283 "name": "Phi-3-mini-4k-instruct-20240701", 284 "tuning": "instruct", 285 "datasets": ["phi-3"], 286 "params": 3.8e9, 287 "quantization": "int8", 288 "backend": "ct2", 289 "architecture": "decoder-only-transformer", 290 "license": "mit", 291 "prompt_fmt": "<|user|>\n{instruction}<|end|>\n<|assistant|>", 292 "repetition_penalty": 1.1, 293 }, 294 { 295 "name": "Phi-3-mini-4k-instruct", 296 "tuning": "instruct", 297 "datasets": ["phi-3"], 298 "params": 3.8e9, 299 "quantization": "int8", 300 "backend": "ct2", 301 "architecture": "decoder-only-transformer", 302 "license": "mit", 303 "prompt_fmt": "<|user|>\n{instruction}<|end|>\n<|assistant|>", 304 "repetition_penalty": 1.1, 305 }, 306 { 307 "name": "phi-2", 308 "tuning": "instruct", 309 "datasets": ["phi-2"], 310 "params": 2.7e9, 311 "quantization": "int8", 312 "backend": "ct2", 313 "architecture": "decoder-only-transformer", 314 "license": "microsoft-research-license", 315 "prompt_fmt": "Instruct: {instruction}\nOutput:", 316 }, 317 { 318 "name": "gemma-2b-it", 319 "tuning": "instruct", 320 "datasets": ["gemma"], 321 "params": 2.5e9, 322 "quantization": "int8", 323 "backend": "ct2", 324 "architecture": "decoder-only-transformer", 325 "license": "gemma-terms-of-use", 326 "prompt_fmt": "<bos><start_of_turn>user\n" 327 "{instruction}<end_of_turn>\n" 328 "<start_of_turn>model", 329 }, 330 { 331 "name": "h2o-danube3-4b-chat", 332 "tuning": "instruct", 333 "datasets": [], 334 "params": 4.0e9, 335 "quantization": "int8", 336 "backend": "ct2", 337 "architecture": "decoder-only-transformer", 338 "license": "apache-2.0", 339 "prompt_fmt": "<|prompt|>{instruction}</s><|answer|>", 340 }, 341 { 342 "name": "h2o-danube2-1.8b-chat", 343 "tuning": "instruct", 344 "datasets": [], 345 "params": 1.8e9, 346 "quantization": "int8", 347 "backend": "ct2", 348 "architecture": "decoder-only-transformer", 349 "license": "other", 350 "prompt_fmt": "<|prompt|>{instruction}</s><|answer|>", 351 }, 352 { 353 "name": "h2o-danube-1.8b-chat", 354 "tuning": "instruct", 355 "datasets": [], 356 "params": 1.8e9, 357 "quantization": "int8", 358 "backend": "ct2", 359 "architecture": "decoder-only-transformer", 360 "license": "other", 361 "prompt_fmt": "<|prompt|>{instruction}</s><|answer|>", 362 }, 363 { 364 "name": "Falcon3-3B-Instruct", 365 "tuning": "instruct", 366 "languages": ["en", "fr", "es", "pt"], 367 "revision": "b183d4d", 368 "datasets": [], 369 "params": 3.23e9, 370 "quantization": "int8", 371 "backend": "ct2", 372 "context_length": 8192, 373 "repetition_penalty": 1.1, 374 "architecture": "decoder-only-transformer", 375 "license": "falcon", 376 "prompt_fmt": ( 377 "<|system|>\nAnswer concisely.\n<|user|>\n{instruction}\n<|assistant|>\n" 378 ), 379 }, 380 { 381 "name": "phi-1_5", 382 "tuning": "instruct", 383 "datasets": ["phi-1_5"], 384 "params": 1.4e9, 385 "quantization": "int8", 386 "backend": "ct2", 387 "architecture": "decoder-only-transformer", 388 "license": "other", 389 "prompt_fmt": "{instruction}\n\nAnswer:", 390 }, 391 { 392 "name": "h2o-danube3-500m-chat", 393 "tuning": "instruct", 394 "datasets": [], 395 "params": 0.5e9, 396 "quantization": "int8", 397 "backend": "ct2", 398 "architecture": "decoder-only-transformer", 399 "license": "apache-2.0", 400 "prompt_fmt": "<|prompt|>{instruction}</s><|answer|>", 401 }, 402 { 403 "name": "SmolLM2-1.7B-Instruct", 404 "tuning": "instruct", 405 "revision": "83b1658", 406 "datasets": [], 407 "params": 1.7e9, 408 "quantization": "int8", 409 "backend": "ct2", 410 "context_length": 2048, 411 "repetition_penalty": 1.0, 412 "architecture": "decoder-only-transformer", 413 "license": "apache-2.0", 414 "prompt_fmt": ( 415 "<|im_start|>system\nAnswer concisely.<|im_end|>\n" 416 "<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n" 417 ), 418 }, 419 { 420 "name": "SmolLM-1.7B-Instruct", 421 "tuning": "instruct", 422 "revision": "dc3dfe2", 423 "datasets": [], 424 "params": 1.7e9, 425 "quantization": "int8", 426 "backend": "ct2", 427 "context_length": 2048, 428 "repetition_penalty": 1.1, 429 "architecture": "decoder-only-transformer", 430 "license": "apache-2.0", 431 "prompt_fmt": ( 432 "<|im_start|>system\nAnswer concisely.<|im_end|>\n" 433 "<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n" 434 ), 435 }, 436 { 437 "name": "Falcon3-1B-Instruct", 438 "tuning": "instruct", 439 "languages": ["en", "fr", "es", "pt"], 440 "revision": "74391aa", 441 "datasets": [], 442 "params": 1.7e9, 443 "quantization": "int8", 444 "backend": "ct2", 445 "context_length": 8192, 446 "repetition_penalty": 1.1, 447 "architecture": "decoder-only-transformer", 448 "license": "falcon", 449 "prompt_fmt": ( 450 "<|system|>\nAnswer concisely.\n<|user|>\n{instruction}\n<|assistant|>\n" 451 ), 452 }, 453 { 454 "name": "Qwen2.5-1.5B-Instruct", 455 "tuning": "instruct", 456 "languages": [ 457 "zh", 458 "en", 459 "fr", 460 "es", 461 "pt", 462 "de", 463 "it", 464 "ru", 465 "ja", 466 "ko", 467 "vi", 468 "th", 469 "ar", 470 ], 471 "revision": "5de22ab", 472 "datasets": [], 473 "params": 1.5e9, 474 "quantization": "int8", 475 "backend": "ct2", 476 "context_length": 32 * 1024, 477 "repetition_penalty": 1.1, 478 "architecture": "decoder-only-transformer", 479 "license": "apache-2.0", 480 "prompt_fmt": ( 481 "<|im_start|>system\nAnswer concisely.<|im_end|>\n" 482 "<|im_start|>user\n{instruction}<|im_end|>\n" 483 "<|im_start|>assistant\n" 484 ), 485 }, 486 { 487 "name": "Qwen2.5-0.5B-Instruct", 488 "tuning": "instruct", 489 "languages": [ 490 "zh", 491 "en", 492 "fr", 493 "es", 494 "pt", 495 "de", 496 "it", 497 "ru", 498 "ja", 499 "ko", 500 "vi", 501 "th", 502 "ar", 503 ], 504 "revision": "554ffe5", 505 "datasets": [], 506 "params": 0.5e9, 507 "quantization": "int8", 508 "backend": "ct2", 509 "context_length": 32 * 1024, 510 "repetition_penalty": 1.1, 511 "architecture": "decoder-only-transformer", 512 "license": "apache-2.0", 513 "prompt_fmt": ( 514 "<|im_start|>system\nAnswer concisely.<|im_end|>\n" 515 "<|im_start|>user\n{instruction}<|im_end|>\n" 516 "<|im_start|>assistant\n" 517 ), 518 }, 519 { 520 "name": "SmolLM2-360M-Instruct", 521 "tuning": "instruct", 522 "revision": "ed9c4fe", 523 "datasets": [], 524 "params": 360e6, 525 "quantization": "int8", 526 "backend": "ct2", 527 "context_length": 2048, 528 "repetition_penalty": 1.0, 529 "architecture": "decoder-only-transformer", 530 "license": "apache-2.0", 531 "prompt_fmt": ( 532 "<|im_start|>system\nAnswer concisely.<|im_end|>\n" 533 "<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n" 534 ), 535 }, 536 { 537 "name": "SmolLM-360M-Instruct", 538 "tuning": "instruct", 539 "revision": "0b0e861", 540 "datasets": [], 541 "params": 360e6, 542 "quantization": "int8", 543 "backend": "ct2", 544 "context_length": 2048, 545 "repetition_penalty": 1.1, 546 "architecture": "decoder-only-transformer", 547 "license": "apache-2.0", 548 "prompt_fmt": ( 549 "<|im_start|>system\nAnswer concisely.<|im_end|>\n" 550 "<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n" 551 ), 552 }, 553 { 554 "name": "SmolLM2-135M-Instruct", 555 "tuning": "instruct", 556 "revision": "e52a3dc", 557 "datasets": [], 558 "params": 135e6, 559 "quantization": "int8", 560 "backend": "ct2", 561 "context_length": 2048, 562 "repetition_penalty": 1.0, 563 "architecture": "decoder-only-transformer", 564 "license": "apache-2.0", 565 "prompt_fmt": ( 566 "<|im_start|>system\nAnswer concisely.<|im_end|>\n" 567 "<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n" 568 ), 569 }, 570 { 571 "name": "SmolLM-135M-Instruct", 572 "tuning": "instruct", 573 "revision": "90046ba", 574 "datasets": [], 575 "params": 135e6, 576 "quantization": "int8", 577 "backend": "ct2", 578 "context_length": 2048, 579 "repetition_penalty": 1.3, 580 "architecture": "decoder-only-transformer", 581 "license": "apache-2.0", 582 "prompt_fmt": ( 583 "<|im_start|>system\nAnswer concisely.<|im_end|>\n" 584 "<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n" 585 ), 586 }, 587 { 588 "name": "LaMini-GPT-774M", 589 "tuning": "instruct", 590 "datasets": ["webtext", "lamini"], 591 "params": 774e6, 592 "quantization": "int8", 593 "backend": "ct2", 594 "architecture": "decoder-only-transformer", 595 "license": "mit", 596 "prompt_fmt": ( 597 "Below is an instruction that describes a task.\n" 598 "Write a response that completes the request.\n\n" 599 "### Instruction:\n{instruction}\n\n### Response:" 600 ), 601 }, 602 { 603 "name": "LaMini-GPT-124M", 604 "tuning": "instruct", 605 "datasets": ["webtext", "lamini"], 606 "params": 124e6, 607 "quantization": "int8", 608 "backend": "ct2", 609 "architecture": "decoder-only-transformer", 610 "license": "mit", 611 "prompt_fmt": ( 612 "Below is an instruction that describes a task.\n" 613 "Write a response that completes the request.\n\n" 614 "### Instruction:\n{instruction}\n\n### Response:" 615 ), 616 }, 617 { 618 "name": "TinyLlama-1.1B-Chat-v1.0", 619 "tuning": "instruct", 620 "datasets": ["slimpajama", "starcoderdata"], 621 "params": 1.1e9, 622 "quantization": "int8", 623 "backend": "ct2", 624 "architecture": "decoder-only-transformer", 625 "license": "mit", 626 "prompt_fmt": ("<|user|>{instruction}<|assistant|>"), 627 }, 628 { 629 "name": "codet5p-770m-py", 630 "tuning": "code", 631 "datasets": ["github-code"], 632 "params": 770e6, 633 "quantization": "int8", 634 "backend": "ct2", 635 "architecture": "encoder-decoder-transformer", 636 "license": "bsd-3-clause", 637 }, 638 { 639 "name": "codet5p-220m-py", 640 "tuning": "code", 641 "datasets": ["github-code"], 642 "params": 220e6, 643 "quantization": "int8", 644 "backend": "ct2", 645 "architecture": "encoder-decoder-transformer", 646 "license": "bsd-3-clause", 647 }, 648 { 649 "name": "all-MiniLM-L6-v2", 650 "tuning": "embedding", 651 "revision": "28efeb4", 652 "params": 22e6, 653 "quantization": "int8", 654 "backend": "ct2", 655 "architecture": "encoder-only-transformer", 656 "license": "apache-2.0", 657 }, 658 { 659 "name": "gte-tiny", 660 "tuning": "embedding", 661 "params": 22e6, 662 "quantization": "int8", 663 "backend": "ct2", 664 "architecture": "encoder-only-transformer", 665 "license": "mit", 666 }, 667 { 668 "name": "gte-small", 669 "tuning": "embedding", 670 "params": 33e6, 671 "quantization": "int8", 672 "backend": "ct2", 673 "architecture": "encoder-only-transformer", 674 "license": "mit", 675 }, 676 { 677 "name": "GIST-small-Embedding-v0", 678 "tuning": "embedding", 679 "params": 33e6, 680 "quantization": "int8", 681 "backend": "ct2", 682 "architecture": "encoder-only-transformer", 683 "license": "mit", 684 }, 685 { 686 "name": "bge-small-en", 687 "tuning": "embedding", 688 "query_prefix": "Represent this sentence for searching relevant passages: ", 689 "params": 33e6, 690 "quantization": "int8", 691 "backend": "ct2", 692 "architecture": "encoder-only-transformer", 693 "license": "mit", 694 }, 695 { 696 "name": "e5-small-v2", 697 "tuning": "embedding", 698 "params": 33e6, 699 "quantization": "int8", 700 "backend": "ct2", 701 "architecture": "encoder-only-transformer", 702 "license": "mit", 703 }, 704 { 705 "name": "granite-embedding-125m-english", 706 "tuning": "embedding", 707 "params": 30e6, 708 "quantization": "int8", 709 "backend": "ct2", 710 "architecture": "encoder-only-transformer", 711 "license": "apache-2.0", 712 }, 713 { 714 "name": "granite-embedding-107m-multilingual", 715 "tuning": "embedding", 716 "params": 30e6, 717 "quantization": "int8", 718 "backend": "ct2", 719 "architecture": "encoder-only-transformer", 720 "license": "apache-2.0", 721 }, 722 { 723 "name": "granite-embedding-30m-english", 724 "tuning": "embedding", 725 "params": 30e6, 726 "quantization": "int8", 727 "backend": "ct2", 728 "architecture": "encoder-only-transformer", 729 "license": "apache-2.0", 730 }, 731 { 732 "name": "multilingual-e5-small", 733 "tuning": "embedding", 734 "params": 120e6, 735 "quantization": "int8", 736 "backend": "ct2", 737 "architecture": "encoder-only-transformer", 738 "license": "mit", 739 }, 740] 741 742 743class Config(dict): 744 """ 745 Store configuration information for the package. 746 747 This is a dictionary that provides data basic data validation. 748 749 Only appropriate keys and values are allowed to be set. 750 751 >>> c = Config({'max_ram': '4gb'}) 752 >>> c 753 {...'max_ram': 4.0...} 754 755 >>> c = Config({'instruct_model': 'flan-t5-small'}) 756 >>> c 757 {...'instruct_model': 'flan-t5-small'...} 758 759 >>> c = Config({'model_license': 'apache|mit|bsd'}) 760 >>> c 761 {...'model_license': re.compile('apache|mit|bsd')...} 762 763 >>> c = Config({'instruct_model': 'flan-t5-bad'}) 764 Traceback (most recent call last): 765 ... 766 KeyError: 'flan-t5-bad' 767 768 >>> c = Config({'bad_value': 1}) 769 Traceback (most recent call last): 770 ... 771 KeyError: 'bad_value' 772 773 >>> c = Config() 774 >>> c.update({'bad_value': 1}) 775 Traceback (most recent call last): 776 ... 777 KeyError: 'bad_value' 778 779 """ 780 781 model_names = {m["name"]: m for m in models} 782 783 def __init__(self, config={}): 784 # Defaults are loaded first 785 for key in Config.schema: 786 self[key] = self.schema[key].default 787 788 # Environment variables override defaults 789 for key in Config.schema: 790 value = os.environ.get(f"LANGUAGEMODELS_{key.upper()}") 791 if value: 792 self[key] = value 793 794 # Any values passed in the config dict override environment vars 795 for key in config.keys(): 796 self[key] = config[key] 797 798 def __setitem__(self, key, value): 799 super().__setitem__(key, Config.schema[key].initfn(value)) 800 801 # Auto-adjust instruct_model when filters change 802 if key == "max_ram" or key == "model_license": 803 found = set() 804 for model in models: 805 if model["quantization"] == "int8": 806 memsize = model["params"] / 1e9 807 elif model["quantization"] == "q3_k_m": 808 memsize = model["params"] * 0.48 / 1e9 809 elif model["quantization"] == "q4_k_m": 810 memsize = model["params"] * 0.59 / 1e9 811 812 sizefit = memsize < self["max_ram"] 813 814 if "model_license" in self: 815 licensematch = self["model_license"].match(model["license"]) 816 else: 817 licensematch = True 818 819 if model["tuning"] not in found and sizefit and licensematch: 820 self[model["tuning"] + "_model"] = model["name"] 821 found.add(model["tuning"]) 822 823 if len(found) < 3: 824 raise ModelFilterException("Unable to find models to match filters") 825 826 def update(self, other): 827 for key in other: 828 self[key] = other[key] 829 830 def use_hf_model(self, hf_path, revision, model_type="instruct"): 831 """Load and use a model from Huggingface 832 833 :param hf_path: Path for the model e.g. "org/model" 834 :param revision: The model git revision to load 835 :param model_type: Model type to load 836 """ 837 838 assert "ct2" in hf_path.lower() 839 assert "int8" in hf_path.lower() 840 841 # We defer importing jinja2 until this point as it is only needed 842 # for interpolating hf model chat templates and does not need 843 # to be installed unless this method is used 844 from jinja2 import Environment, BaseLoader 845 846 tok_config = hf_hub_download( 847 hf_path, "tokenizer_config.json", revision=revision 848 ) 849 850 with open(tok_config) as f: 851 chat_template = json.load(f)["chat_template"] 852 853 env = Environment(loader=BaseLoader()) 854 855 template = env.from_string(chat_template) 856 857 prompt_fmt = template.render( 858 messages=[{"role": "user", "content": "{instruction}"}], 859 add_generation_prompt=True, 860 ) 861 862 model = { 863 "name": hf_path, 864 "backend": "ct2", 865 "quantization": "int8", 866 "architecture": "decoder-only-transformer", 867 "max_tokens": 2048, 868 "params": 0, 869 "prompt_fmt": prompt_fmt, 870 } 871 872 models.insert(0, model) 873 self.model_names[model["name"]] = model 874 self[f"{model_type}_model"] = model["name"] 875 876 @staticmethod 877 def validate_model(model_name): 878 return Config.model_names[model_name]["name"] 879 880 @staticmethod 881 def validate_device(device): 882 assert device in ["auto", "cpu"] 883 884 return device 885 886 @staticmethod 887 def convert_to_gb(space): 888 """Convert max RAM string to int 889 890 Output will be in gigabytes 891 892 If not specified, input is assumed to be in gigabytes 893 894 >>> Config.convert_to_gb("512") 895 512.0 896 897 >>> Config.convert_to_gb(".5") 898 0.5 899 900 >>> Config.convert_to_gb("4G") 901 4.0 902 903 >>> Config.convert_to_gb("256mb") 904 0.25 905 906 >>> Config.convert_to_gb("256M") 907 0.25 908 909 >>> Config.convert_to_gb("small") 910 0.2 911 912 >>> Config.convert_to_gb("base") 913 0.48 914 915 >>> Config.convert_to_gb("large") 916 1.0 917 918 >>> Config.convert_to_gb("xl") 919 4.0 920 921 >>> Config.convert_to_gb("xxl") 922 16.0 923 """ 924 925 if isinstance(space, int) or isinstance(space, float): 926 return float(space) 927 928 size_names = { 929 "small": 0.2, 930 "base": 0.48, 931 "large": 1.0, 932 "xl": 4.0, 933 "xxl": 16.0, 934 } 935 936 if space.lower().strip() in size_names: 937 return size_names[space.lower().strip()] 938 939 multipliers = { 940 "g": 1.0, 941 "m": 2**-10, 942 } 943 944 space = space.lower() 945 space = space.rstrip("b") 946 947 if space[-1] in multipliers: 948 return float(space[:-1]) * multipliers[space[-1]] 949 else: 950 return float(space) 951 952 953Config.schema = { 954 "max_ram": ConfigItem(Config.convert_to_gb, 0.48), 955 "max_tokens": ConfigItem(int, 200), 956 "echo": ConfigItem(int, False), 957 "device": ConfigItem(Config.validate_device, "cpu"), 958 "model_license": ConfigItem(re.compile, ".*"), 959 "instruct_model": ConfigItem(Config.validate_model, "LaMini-Flan-T5-248M"), 960 "embedding_model": ConfigItem(Config.validate_model, "all-MiniLM-L6-v2"), 961 "code_model": ConfigItem(Config.validate_model, "codet5p-220m-py"), 962 "max_prompt_length": ConfigItem(int, 50_000), 963} 964 965config = Config() 966 967if "COLAB_GPU" in os.environ: 968 if len(os.environ["COLAB_GPU"]) > 0: 969 # We have a Colab GPU, so default to using it 970 config["device"] = "auto"
ConfigItem(initfn, default)
Common base class for all non-exit exceptions.
744class Config(dict): 745 """ 746 Store configuration information for the package. 747 748 This is a dictionary that provides data basic data validation. 749 750 Only appropriate keys and values are allowed to be set. 751 752 >>> c = Config({'max_ram': '4gb'}) 753 >>> c 754 {...'max_ram': 4.0...} 755 756 >>> c = Config({'instruct_model': 'flan-t5-small'}) 757 >>> c 758 {...'instruct_model': 'flan-t5-small'...} 759 760 >>> c = Config({'model_license': 'apache|mit|bsd'}) 761 >>> c 762 {...'model_license': re.compile('apache|mit|bsd')...} 763 764 >>> c = Config({'instruct_model': 'flan-t5-bad'}) 765 Traceback (most recent call last): 766 ... 767 KeyError: 'flan-t5-bad' 768 769 >>> c = Config({'bad_value': 1}) 770 Traceback (most recent call last): 771 ... 772 KeyError: 'bad_value' 773 774 >>> c = Config() 775 >>> c.update({'bad_value': 1}) 776 Traceback (most recent call last): 777 ... 778 KeyError: 'bad_value' 779 780 """ 781 782 model_names = {m["name"]: m for m in models} 783 784 def __init__(self, config={}): 785 # Defaults are loaded first 786 for key in Config.schema: 787 self[key] = self.schema[key].default 788 789 # Environment variables override defaults 790 for key in Config.schema: 791 value = os.environ.get(f"LANGUAGEMODELS_{key.upper()}") 792 if value: 793 self[key] = value 794 795 # Any values passed in the config dict override environment vars 796 for key in config.keys(): 797 self[key] = config[key] 798 799 def __setitem__(self, key, value): 800 super().__setitem__(key, Config.schema[key].initfn(value)) 801 802 # Auto-adjust instruct_model when filters change 803 if key == "max_ram" or key == "model_license": 804 found = set() 805 for model in models: 806 if model["quantization"] == "int8": 807 memsize = model["params"] / 1e9 808 elif model["quantization"] == "q3_k_m": 809 memsize = model["params"] * 0.48 / 1e9 810 elif model["quantization"] == "q4_k_m": 811 memsize = model["params"] * 0.59 / 1e9 812 813 sizefit = memsize < self["max_ram"] 814 815 if "model_license" in self: 816 licensematch = self["model_license"].match(model["license"]) 817 else: 818 licensematch = True 819 820 if model["tuning"] not in found and sizefit and licensematch: 821 self[model["tuning"] + "_model"] = model["name"] 822 found.add(model["tuning"]) 823 824 if len(found) < 3: 825 raise ModelFilterException("Unable to find models to match filters") 826 827 def update(self, other): 828 for key in other: 829 self[key] = other[key] 830 831 def use_hf_model(self, hf_path, revision, model_type="instruct"): 832 """Load and use a model from Huggingface 833 834 :param hf_path: Path for the model e.g. "org/model" 835 :param revision: The model git revision to load 836 :param model_type: Model type to load 837 """ 838 839 assert "ct2" in hf_path.lower() 840 assert "int8" in hf_path.lower() 841 842 # We defer importing jinja2 until this point as it is only needed 843 # for interpolating hf model chat templates and does not need 844 # to be installed unless this method is used 845 from jinja2 import Environment, BaseLoader 846 847 tok_config = hf_hub_download( 848 hf_path, "tokenizer_config.json", revision=revision 849 ) 850 851 with open(tok_config) as f: 852 chat_template = json.load(f)["chat_template"] 853 854 env = Environment(loader=BaseLoader()) 855 856 template = env.from_string(chat_template) 857 858 prompt_fmt = template.render( 859 messages=[{"role": "user", "content": "{instruction}"}], 860 add_generation_prompt=True, 861 ) 862 863 model = { 864 "name": hf_path, 865 "backend": "ct2", 866 "quantization": "int8", 867 "architecture": "decoder-only-transformer", 868 "max_tokens": 2048, 869 "params": 0, 870 "prompt_fmt": prompt_fmt, 871 } 872 873 models.insert(0, model) 874 self.model_names[model["name"]] = model 875 self[f"{model_type}_model"] = model["name"] 876 877 @staticmethod 878 def validate_model(model_name): 879 return Config.model_names[model_name]["name"] 880 881 @staticmethod 882 def validate_device(device): 883 assert device in ["auto", "cpu"] 884 885 return device 886 887 @staticmethod 888 def convert_to_gb(space): 889 """Convert max RAM string to int 890 891 Output will be in gigabytes 892 893 If not specified, input is assumed to be in gigabytes 894 895 >>> Config.convert_to_gb("512") 896 512.0 897 898 >>> Config.convert_to_gb(".5") 899 0.5 900 901 >>> Config.convert_to_gb("4G") 902 4.0 903 904 >>> Config.convert_to_gb("256mb") 905 0.25 906 907 >>> Config.convert_to_gb("256M") 908 0.25 909 910 >>> Config.convert_to_gb("small") 911 0.2 912 913 >>> Config.convert_to_gb("base") 914 0.48 915 916 >>> Config.convert_to_gb("large") 917 1.0 918 919 >>> Config.convert_to_gb("xl") 920 4.0 921 922 >>> Config.convert_to_gb("xxl") 923 16.0 924 """ 925 926 if isinstance(space, int) or isinstance(space, float): 927 return float(space) 928 929 size_names = { 930 "small": 0.2, 931 "base": 0.48, 932 "large": 1.0, 933 "xl": 4.0, 934 "xxl": 16.0, 935 } 936 937 if space.lower().strip() in size_names: 938 return size_names[space.lower().strip()] 939 940 multipliers = { 941 "g": 1.0, 942 "m": 2**-10, 943 } 944 945 space = space.lower() 946 space = space.rstrip("b") 947 948 if space[-1] in multipliers: 949 return float(space[:-1]) * multipliers[space[-1]] 950 else: 951 return float(space)
Store configuration information for the package.
This is a dictionary that provides data basic data validation.
Only appropriate keys and values are allowed to be set.
>>> c = Config({'max_ram': '4gb'})
>>> c
{...'max_ram': 4.0...}
>>> c = Config({'instruct_model': 'flan-t5-small'})
>>> c
{...'instruct_model': 'flan-t5-small'...}
>>> c = Config({'model_license': 'apache|mit|bsd'})
>>> c
{...'model_license': re.compile('apache|mit|bsd')...}
>>> c = Config({'instruct_model': 'flan-t5-bad'})
Traceback (most recent call last):
...
KeyError: 'flan-t5-bad'
>>> c = Config({'bad_value': 1})
Traceback (most recent call last):
...
KeyError: 'bad_value'
>>> c = Config()
>>> c.update({'bad_value': 1})
Traceback (most recent call last):
...
KeyError: 'bad_value'
D.update([E, ]**F) -> None. Update D from mapping/iterable E and F. If E is present and has a .keys() method, then does: for k in E.keys(): D[k] = E[k] If E is present and lacks a .keys() method, then does: for k, v in E: D[k] = v In either case, this is followed by: for k in F: D[k] = F[k]
831 def use_hf_model(self, hf_path, revision, model_type="instruct"): 832 """Load and use a model from Huggingface 833 834 :param hf_path: Path for the model e.g. "org/model" 835 :param revision: The model git revision to load 836 :param model_type: Model type to load 837 """ 838 839 assert "ct2" in hf_path.lower() 840 assert "int8" in hf_path.lower() 841 842 # We defer importing jinja2 until this point as it is only needed 843 # for interpolating hf model chat templates and does not need 844 # to be installed unless this method is used 845 from jinja2 import Environment, BaseLoader 846 847 tok_config = hf_hub_download( 848 hf_path, "tokenizer_config.json", revision=revision 849 ) 850 851 with open(tok_config) as f: 852 chat_template = json.load(f)["chat_template"] 853 854 env = Environment(loader=BaseLoader()) 855 856 template = env.from_string(chat_template) 857 858 prompt_fmt = template.render( 859 messages=[{"role": "user", "content": "{instruction}"}], 860 add_generation_prompt=True, 861 ) 862 863 model = { 864 "name": hf_path, 865 "backend": "ct2", 866 "quantization": "int8", 867 "architecture": "decoder-only-transformer", 868 "max_tokens": 2048, 869 "params": 0, 870 "prompt_fmt": prompt_fmt, 871 } 872 873 models.insert(0, model) 874 self.model_names[model["name"]] = model 875 self[f"{model_type}_model"] = model["name"]
Load and use a model from Huggingface
Parameters
- hf_path: Path for the model e.g. "org/model"
- revision: The model git revision to load
- model_type: Model type to load
887 @staticmethod 888 def convert_to_gb(space): 889 """Convert max RAM string to int 890 891 Output will be in gigabytes 892 893 If not specified, input is assumed to be in gigabytes 894 895 >>> Config.convert_to_gb("512") 896 512.0 897 898 >>> Config.convert_to_gb(".5") 899 0.5 900 901 >>> Config.convert_to_gb("4G") 902 4.0 903 904 >>> Config.convert_to_gb("256mb") 905 0.25 906 907 >>> Config.convert_to_gb("256M") 908 0.25 909 910 >>> Config.convert_to_gb("small") 911 0.2 912 913 >>> Config.convert_to_gb("base") 914 0.48 915 916 >>> Config.convert_to_gb("large") 917 1.0 918 919 >>> Config.convert_to_gb("xl") 920 4.0 921 922 >>> Config.convert_to_gb("xxl") 923 16.0 924 """ 925 926 if isinstance(space, int) or isinstance(space, float): 927 return float(space) 928 929 size_names = { 930 "small": 0.2, 931 "base": 0.48, 932 "large": 1.0, 933 "xl": 4.0, 934 "xxl": 16.0, 935 } 936 937 if space.lower().strip() in size_names: 938 return size_names[space.lower().strip()] 939 940 multipliers = { 941 "g": 1.0, 942 "m": 2**-10, 943 } 944 945 space = space.lower() 946 space = space.rstrip("b") 947 948 if space[-1] in multipliers: 949 return float(space[:-1]) * multipliers[space[-1]] 950 else: 951 return float(space)
Convert max RAM string to int
Output will be in gigabytes
If not specified, input is assumed to be in gigabytes
>>> Config.convert_to_gb("512")
512.0
>>> Config.convert_to_gb(".5")
0.5
>>> Config.convert_to_gb("4G")
4.0
>>> Config.convert_to_gb("256mb")
0.25
>>> Config.convert_to_gb("256M")
0.25
>>> Config.convert_to_gb("small")
0.2
>>> Config.convert_to_gb("base")
0.48
>>> Config.convert_to_gb("large")
1.0
>>> Config.convert_to_gb("xl")
4.0
>>> Config.convert_to_gb("xxl")
16.0