languagemodels.config

Global model and inference configuration

This module manages the global configuration object shared between other modules in the package. It implements a dictionary with data validation on the keys and values.

Note that this module provides access to many implementation details that are not expected to be used by average users. Specific models that have never been the default for the package may be removed at any time.

  1"""Global model and inference configuration
  2
  3This module manages the global configuration object shared between other
  4modules in the package. It implements a dictionary with data validation
  5on the keys and values.
  6
  7Note that this module provides access to many implementation details
  8that are not expected to be used by average users. Specific models that
  9have never been the default for the package may be removed at any time.
 10"""
 11
 12import re
 13import os
 14from collections import namedtuple
 15from huggingface_hub import hf_hub_download
 16import json
 17
 18ConfigItem = namedtuple("ConfigItem", "initfn default")
 19
 20
 21class ModelFilterException(Exception):
 22    pass
 23
 24
 25# Model list
 26# This list is sorted in priority order, with the best models first
 27# The best model that fits in the memory bounds and matches the model filter
 28# will be selected
 29models = [
 30    {
 31        "name": "openchat-3.5-0106",
 32        "tuning": "instruct",
 33        "datasets": ["mistral", "openorca", "flan"],
 34        "params": 7e9,
 35        "quantization": "int8",
 36        "backend": "ct2",
 37        "architecture": "decoder-only-transformer",
 38        "license": "apache-2.0",
 39        "prompt_fmt": (
 40            "GPT4 Correct User: {instruction}<|end_of_turn|>" "GPT4 Correct Assistant:"
 41        ),
 42    },
 43    {
 44        "name": "Llama-3.1-8B-Instruct",
 45        "tuning": "instruct",
 46        "revision": "d02fc85",
 47        "datasets": ["llama3"],
 48        "params": 8e9,
 49        "quantization": "int8",
 50        "backend": "ct2",
 51        "architecture": "decoder-only-transformer",
 52        "license": "llama3",
 53        "prompt_fmt": (
 54            "<|start_header_id|>user<|end_header_id|>\n\n"
 55            "{instruction}<|eot_id|>"
 56            "<|start_header_id|>assistant<|end_header_id|>\n\n"
 57        ),
 58    },
 59    {
 60        "name": "Meta-Llama-3-8B-Instruct",
 61        "tuning": "instruct",
 62        "datasets": ["llama3"],
 63        "params": 8e9,
 64        "quantization": "int8",
 65        "backend": "ct2",
 66        "architecture": "decoder-only-transformer",
 67        "license": "llama3",
 68        "prompt_fmt": (
 69            "<|start_header_id|>user<|end_header_id|>\n\n"
 70            "{instruction}<|eot_id|>"
 71            "<|start_header_id|>assistant<|end_header_id|>\n\n"
 72        ),
 73    },
 74    {
 75        "name": "openchat-3.5-1210",
 76        "tuning": "instruct",
 77        "datasets": ["mistral", "openorca", "flan"],
 78        "params": 7e9,
 79        "quantization": "int8",
 80        "backend": "ct2",
 81        "architecture": "decoder-only-transformer",
 82        "license": "apache-2.0",
 83        "prompt_fmt": (
 84            "GPT4 Correct User: {instruction}<|end_of_turn|>" "GPT4 Correct Assistant:"
 85        ),
 86    },
 87    {
 88        "name": "WizardLM-2-7B",
 89        "tuning": "instruct",
 90        "datasets": ["mistral", "wizardlm"],
 91        "params": 7e9,
 92        "quantization": "int8",
 93        "backend": "ct2",
 94        "architecture": "decoder-only-transformer",
 95        "license": "apache-2.0",
 96        "prompt_fmt": "USER: {instruction} ASSISTANT:",
 97    },
 98    {
 99        "name": "neural-chat-7b-v3-1",
100        "tuning": "instruct",
101        "datasets": ["mistral", "slimorca"],
102        "params": 7e9,
103        "quantization": "int8",
104        "backend": "ct2",
105        "architecture": "decoder-only-transformer",
106        "license": "apache-2.0",
107        "prompt_fmt": (
108            "### System:\n"
109            "Be helpful\n"
110            "### User:\n{instruction}\n"
111            "### Assistant:\n"
112        ),
113    },
114    {
115        "name": "Mistral-7B-Instruct-v0.2",
116        "tuning": "instruct",
117        "datasets": ["mistral"],
118        "params": 7e9,
119        "quantization": "int8",
120        "backend": "ct2",
121        "architecture": "decoder-only-transformer",
122        "license": "apache-2.0",
123        "prompt_fmt": "<s>[INST] {instruction} [/INST]",
124    },
125    {
126        "name": "flan-alpaca-gpt4-xl",
127        "tuning": "instruct",
128        "datasets": ["c4", "flan", "gpt4-alpaca"],
129        "params": 3e9,
130        "quantization": "int8",
131        "backend": "ct2",
132        "architecture": "encoder-decoder-transformer",
133        "license": "apache-2.0",
134    },
135    {
136        "name": "flan-alpaca-xl",
137        "tuning": "instruct",
138        "datasets": ["c4", "flan", "alpaca"],
139        "params": 3e9,
140        "quantization": "int8",
141        "backend": "ct2",
142        "architecture": "encoder-decoder-transformer",
143        "license": "apache-2.0",
144    },
145    {
146        "name": "flan-t5-xl",
147        "tuning": "instruct",
148        "datasets": ["c4", "flan"],
149        "params": 3e9,
150        "quantization": "int8",
151        "backend": "ct2",
152        "architecture": "encoder-decoder-transformer",
153        "license": "apache-2.0",
154    },
155    {
156        "name": "Llama-3.2-3B-Instruct",
157        "tuning": "instruct",
158        "revision": "5da4ba8",
159        "datasets": ["llama3"],
160        "params": 1e9,
161        "quantization": "int8",
162        "backend": "ct2",
163        "architecture": "decoder-only-transformer",
164        "license": "llama3.2",
165        "repetition_penalty": 1.1,
166        "prompt_fmt": (
167            "<|start_header_id|>user<|end_header_id|>\n\n"
168            "{instruction}<|eot_id|>"
169            "<|start_header_id|>assistant<|end_header_id|>\n\n"
170        ),
171    },
172    {
173        "name": "fastchat-t5-3b-v1.0",
174        "tuning": "instruct",
175        "datasets": ["c4", "flan", "sharegpt"],
176        "params": 3e9,
177        "quantization": "int8",
178        "backend": "ct2",
179        "architecture": "encoder-decoder-transformer",
180        "license": "apache-2.0",
181    },
182    {
183        "name": "LaMini-Flan-T5-783M",
184        "tuning": "instruct",
185        "revision": "e5e20a1",
186        "datasets": ["c4", "flan", "lamini"],
187        "params": 783e6,
188        "quantization": "int8",
189        "backend": "ct2",
190        "architecture": "encoder-decoder-transformer",
191        "license": "cc-by-nc-4.0",
192    },
193    {
194        "name": "flan-t5-large",
195        "tuning": "instruct",
196        "datasets": ["c4", "flan"],
197        "params": 783e6,
198        "quantization": "int8",
199        "backend": "ct2",
200        "architecture": "encoder-decoder-transformer",
201        "license": "apache-2.0",
202    },
203    {
204        "name": "Llama-3.2-1B-Instruct",
205        "tuning": "instruct",
206        "revision": "6e3e3a1",
207        "datasets": ["llama3"],
208        "params": 1e9,
209        "quantization": "int8",
210        "backend": "ct2",
211        "architecture": "decoder-only-transformer",
212        "license": "llama3.2",
213        "repetition_penalty": 1.1,
214        "prompt_fmt": (
215            "<|start_header_id|>user<|end_header_id|>\n\n"
216            "{instruction}<|eot_id|>"
217            "<|start_header_id|>assistant<|end_header_id|>\n\n"
218        ),
219    },
220    {
221        "name": "LaMini-Flan-T5-248M",
222        "tuning": "instruct",
223        "revision": "96cfe99",
224        "datasets": ["c4", "flan", "lamini"],
225        "params": 248e6,
226        "quantization": "int8",
227        "backend": "ct2",
228        "architecture": "encoder-decoder-transformer",
229        "license": "cc-by-nc-4.0",
230    },
231    {
232        "name": "flan-t5-base",
233        "tuning": "instruct",
234        "datasets": ["c4", "flan"],
235        "params": 248e6,
236        "quantization": "int8",
237        "backend": "ct2",
238        "architecture": "encoder-decoder-transformer",
239        "license": "apache-2.0",
240    },
241    {
242        "name": "flan-alpaca-base",
243        "tuning": "instruct",
244        "datasets": ["c4", "flan", "alpaca"],
245        "params": 248e6,
246        "quantization": "int8",
247        "backend": "ct2",
248        "architecture": "encoder-decoder-transformer",
249        "license": "apache-2.0",
250    },
251    {
252        "name": "dialogstudio-t5-base-v1.0",
253        "tuning": "instruct",
254        "datasets": ["c4", "flan", "dialogstudio"],
255        "params": 248e6,
256        "quantization": "int8",
257        "backend": "ct2",
258        "architecture": "encoder-decoder-transformer",
259        "license": "apache-2.0",
260        "prompt_fmt": ("Instruction: Be helpful. <USER> {instruction}"),
261    },
262    {
263        "name": "LaMini-Flan-T5-77M",
264        "tuning": "instruct",
265        "datasets": ["c4", "flan", "lamini"],
266        "params": 77e6,
267        "backend": "ct2",
268        "quantization": "int8",
269        "architecture": "encoder-decoder-transformer",
270        "license": "cc-by-nc-4.0",
271    },
272    {
273        "name": "flan-t5-small",
274        "tuning": "instruct",
275        "datasets": ["c4", "flan"],
276        "params": 77e6,
277        "quantization": "int8",
278        "backend": "ct2",
279        "architecture": "encoder-decoder-transformer",
280        "license": "apache-2.0",
281    },
282    {
283        "name": "Phi-3-mini-4k-instruct-20240701",
284        "tuning": "instruct",
285        "datasets": ["phi-3"],
286        "params": 3.8e9,
287        "quantization": "int8",
288        "backend": "ct2",
289        "architecture": "decoder-only-transformer",
290        "license": "mit",
291        "prompt_fmt": "<|user|>\n{instruction}<|end|>\n<|assistant|>",
292        "repetition_penalty": 1.1,
293    },
294    {
295        "name": "Phi-3-mini-4k-instruct",
296        "tuning": "instruct",
297        "datasets": ["phi-3"],
298        "params": 3.8e9,
299        "quantization": "int8",
300        "backend": "ct2",
301        "architecture": "decoder-only-transformer",
302        "license": "mit",
303        "prompt_fmt": "<|user|>\n{instruction}<|end|>\n<|assistant|>",
304        "repetition_penalty": 1.1,
305    },
306    {
307        "name": "phi-2",
308        "tuning": "instruct",
309        "datasets": ["phi-2"],
310        "params": 2.7e9,
311        "quantization": "int8",
312        "backend": "ct2",
313        "architecture": "decoder-only-transformer",
314        "license": "microsoft-research-license",
315        "prompt_fmt": "Instruct: {instruction}\nOutput:",
316    },
317    {
318        "name": "gemma-2b-it",
319        "tuning": "instruct",
320        "datasets": ["gemma"],
321        "params": 2.5e9,
322        "quantization": "int8",
323        "backend": "ct2",
324        "architecture": "decoder-only-transformer",
325        "license": "gemma-terms-of-use",
326        "prompt_fmt": "<bos><start_of_turn>user\n"
327        "{instruction}<end_of_turn>\n"
328        "<start_of_turn>model",
329    },
330    {
331        "name": "h2o-danube3-4b-chat",
332        "tuning": "instruct",
333        "datasets": [],
334        "params": 4.0e9,
335        "quantization": "int8",
336        "backend": "ct2",
337        "architecture": "decoder-only-transformer",
338        "license": "apache-2.0",
339        "prompt_fmt": "<|prompt|>{instruction}</s><|answer|>",
340    },
341    {
342        "name": "h2o-danube2-1.8b-chat",
343        "tuning": "instruct",
344        "datasets": [],
345        "params": 1.8e9,
346        "quantization": "int8",
347        "backend": "ct2",
348        "architecture": "decoder-only-transformer",
349        "license": "other",
350        "prompt_fmt": "<|prompt|>{instruction}</s><|answer|>",
351    },
352    {
353        "name": "h2o-danube-1.8b-chat",
354        "tuning": "instruct",
355        "datasets": [],
356        "params": 1.8e9,
357        "quantization": "int8",
358        "backend": "ct2",
359        "architecture": "decoder-only-transformer",
360        "license": "other",
361        "prompt_fmt": "<|prompt|>{instruction}</s><|answer|>",
362    },
363    {
364        "name": "Falcon3-3B-Instruct",
365        "tuning": "instruct",
366        "languages": ["en", "fr", "es", "pt"],
367        "revision": "b183d4d",
368        "datasets": [],
369        "params": 3.23e9,
370        "quantization": "int8",
371        "backend": "ct2",
372        "context_length": 8192,
373        "repetition_penalty": 1.1,
374        "architecture": "decoder-only-transformer",
375        "license": "falcon",
376        "prompt_fmt": (
377            "<|system|>\nAnswer concisely.\n<|user|>\n{instruction}\n<|assistant|>\n"
378        ),
379    },
380    {
381        "name": "phi-1_5",
382        "tuning": "instruct",
383        "datasets": ["phi-1_5"],
384        "params": 1.4e9,
385        "quantization": "int8",
386        "backend": "ct2",
387        "architecture": "decoder-only-transformer",
388        "license": "other",
389        "prompt_fmt": "{instruction}\n\nAnswer:",
390    },
391    {
392        "name": "h2o-danube3-500m-chat",
393        "tuning": "instruct",
394        "datasets": [],
395        "params": 0.5e9,
396        "quantization": "int8",
397        "backend": "ct2",
398        "architecture": "decoder-only-transformer",
399        "license": "apache-2.0",
400        "prompt_fmt": "<|prompt|>{instruction}</s><|answer|>",
401    },
402    {
403        "name": "SmolLM2-1.7B-Instruct",
404        "tuning": "instruct",
405        "revision": "83b1658",
406        "datasets": [],
407        "params": 1.7e9,
408        "quantization": "int8",
409        "backend": "ct2",
410        "context_length": 2048,
411        "repetition_penalty": 1.0,
412        "architecture": "decoder-only-transformer",
413        "license": "apache-2.0",
414        "prompt_fmt": (
415            "<|im_start|>system\nAnswer concisely.<|im_end|>\n"
416            "<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n"
417        ),
418    },
419    {
420        "name": "SmolLM-1.7B-Instruct",
421        "tuning": "instruct",
422        "revision": "dc3dfe2",
423        "datasets": [],
424        "params": 1.7e9,
425        "quantization": "int8",
426        "backend": "ct2",
427        "context_length": 2048,
428        "repetition_penalty": 1.1,
429        "architecture": "decoder-only-transformer",
430        "license": "apache-2.0",
431        "prompt_fmt": (
432            "<|im_start|>system\nAnswer concisely.<|im_end|>\n"
433            "<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n"
434        ),
435    },
436    {
437        "name": "Falcon3-1B-Instruct",
438        "tuning": "instruct",
439        "languages": ["en", "fr", "es", "pt"],
440        "revision": "74391aa",
441        "datasets": [],
442        "params": 1.7e9,
443        "quantization": "int8",
444        "backend": "ct2",
445        "context_length": 8192,
446        "repetition_penalty": 1.1,
447        "architecture": "decoder-only-transformer",
448        "license": "falcon",
449        "prompt_fmt": (
450            "<|system|>\nAnswer concisely.\n<|user|>\n{instruction}\n<|assistant|>\n"
451        ),
452    },
453    {
454        "name": "Qwen2.5-1.5B-Instruct",
455        "tuning": "instruct",
456        "languages": [
457            "zh",
458            "en",
459            "fr",
460            "es",
461            "pt",
462            "de",
463            "it",
464            "ru",
465            "ja",
466            "ko",
467            "vi",
468            "th",
469            "ar",
470        ],
471        "revision": "5de22ab",
472        "datasets": [],
473        "params": 1.5e9,
474        "quantization": "int8",
475        "backend": "ct2",
476        "context_length": 32 * 1024,
477        "repetition_penalty": 1.1,
478        "architecture": "decoder-only-transformer",
479        "license": "apache-2.0",
480        "prompt_fmt": (
481            "<|im_start|>system\nAnswer concisely.<|im_end|>\n"
482            "<|im_start|>user\n{instruction}<|im_end|>\n"
483            "<|im_start|>assistant\n"
484        ),
485    },
486    {
487        "name": "Qwen2.5-0.5B-Instruct",
488        "tuning": "instruct",
489        "languages": [
490            "zh",
491            "en",
492            "fr",
493            "es",
494            "pt",
495            "de",
496            "it",
497            "ru",
498            "ja",
499            "ko",
500            "vi",
501            "th",
502            "ar",
503        ],
504        "revision": "554ffe5",
505        "datasets": [],
506        "params": 0.5e9,
507        "quantization": "int8",
508        "backend": "ct2",
509        "context_length": 32 * 1024,
510        "repetition_penalty": 1.1,
511        "architecture": "decoder-only-transformer",
512        "license": "apache-2.0",
513        "prompt_fmt": (
514            "<|im_start|>system\nAnswer concisely.<|im_end|>\n"
515            "<|im_start|>user\n{instruction}<|im_end|>\n"
516            "<|im_start|>assistant\n"
517        ),
518    },
519    {
520        "name": "SmolLM2-360M-Instruct",
521        "tuning": "instruct",
522        "revision": "ed9c4fe",
523        "datasets": [],
524        "params": 360e6,
525        "quantization": "int8",
526        "backend": "ct2",
527        "context_length": 2048,
528        "repetition_penalty": 1.0,
529        "architecture": "decoder-only-transformer",
530        "license": "apache-2.0",
531        "prompt_fmt": (
532            "<|im_start|>system\nAnswer concisely.<|im_end|>\n"
533            "<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n"
534        ),
535    },
536    {
537        "name": "SmolLM-360M-Instruct",
538        "tuning": "instruct",
539        "revision": "0b0e861",
540        "datasets": [],
541        "params": 360e6,
542        "quantization": "int8",
543        "backend": "ct2",
544        "context_length": 2048,
545        "repetition_penalty": 1.1,
546        "architecture": "decoder-only-transformer",
547        "license": "apache-2.0",
548        "prompt_fmt": (
549            "<|im_start|>system\nAnswer concisely.<|im_end|>\n"
550            "<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n"
551        ),
552    },
553    {
554        "name": "SmolLM2-135M-Instruct",
555        "tuning": "instruct",
556        "revision": "e52a3dc",
557        "datasets": [],
558        "params": 135e6,
559        "quantization": "int8",
560        "backend": "ct2",
561        "context_length": 2048,
562        "repetition_penalty": 1.0,
563        "architecture": "decoder-only-transformer",
564        "license": "apache-2.0",
565        "prompt_fmt": (
566            "<|im_start|>system\nAnswer concisely.<|im_end|>\n"
567            "<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n"
568        ),
569    },
570    {
571        "name": "SmolLM-135M-Instruct",
572        "tuning": "instruct",
573        "revision": "90046ba",
574        "datasets": [],
575        "params": 135e6,
576        "quantization": "int8",
577        "backend": "ct2",
578        "context_length": 2048,
579        "repetition_penalty": 1.3,
580        "architecture": "decoder-only-transformer",
581        "license": "apache-2.0",
582        "prompt_fmt": (
583            "<|im_start|>system\nAnswer concisely.<|im_end|>\n"
584            "<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n"
585        ),
586    },
587    {
588        "name": "LaMini-GPT-774M",
589        "tuning": "instruct",
590        "datasets": ["webtext", "lamini"],
591        "params": 774e6,
592        "quantization": "int8",
593        "backend": "ct2",
594        "architecture": "decoder-only-transformer",
595        "license": "mit",
596        "prompt_fmt": (
597            "Below is an instruction that describes a task.\n"
598            "Write a response that completes the request.\n\n"
599            "### Instruction:\n{instruction}\n\n### Response:"
600        ),
601    },
602    {
603        "name": "LaMini-GPT-124M",
604        "tuning": "instruct",
605        "datasets": ["webtext", "lamini"],
606        "params": 124e6,
607        "quantization": "int8",
608        "backend": "ct2",
609        "architecture": "decoder-only-transformer",
610        "license": "mit",
611        "prompt_fmt": (
612            "Below is an instruction that describes a task.\n"
613            "Write a response that completes the request.\n\n"
614            "### Instruction:\n{instruction}\n\n### Response:"
615        ),
616    },
617    {
618        "name": "TinyLlama-1.1B-Chat-v1.0",
619        "tuning": "instruct",
620        "datasets": ["slimpajama", "starcoderdata"],
621        "params": 1.1e9,
622        "quantization": "int8",
623        "backend": "ct2",
624        "architecture": "decoder-only-transformer",
625        "license": "mit",
626        "prompt_fmt": ("<|user|>{instruction}<|assistant|>"),
627    },
628    {
629        "name": "codet5p-770m-py",
630        "tuning": "code",
631        "datasets": ["github-code"],
632        "params": 770e6,
633        "quantization": "int8",
634        "backend": "ct2",
635        "architecture": "encoder-decoder-transformer",
636        "license": "bsd-3-clause",
637    },
638    {
639        "name": "codet5p-220m-py",
640        "tuning": "code",
641        "datasets": ["github-code"],
642        "params": 220e6,
643        "quantization": "int8",
644        "backend": "ct2",
645        "architecture": "encoder-decoder-transformer",
646        "license": "bsd-3-clause",
647    },
648    {
649        "name": "all-MiniLM-L6-v2",
650        "tuning": "embedding",
651        "revision": "28efeb4",
652        "params": 22e6,
653        "quantization": "int8",
654        "backend": "ct2",
655        "architecture": "encoder-only-transformer",
656        "license": "apache-2.0",
657    },
658    {
659        "name": "gte-tiny",
660        "tuning": "embedding",
661        "params": 22e6,
662        "quantization": "int8",
663        "backend": "ct2",
664        "architecture": "encoder-only-transformer",
665        "license": "mit",
666    },
667    {
668        "name": "gte-small",
669        "tuning": "embedding",
670        "params": 33e6,
671        "quantization": "int8",
672        "backend": "ct2",
673        "architecture": "encoder-only-transformer",
674        "license": "mit",
675    },
676    {
677        "name": "GIST-small-Embedding-v0",
678        "tuning": "embedding",
679        "params": 33e6,
680        "quantization": "int8",
681        "backend": "ct2",
682        "architecture": "encoder-only-transformer",
683        "license": "mit",
684    },
685    {
686        "name": "bge-small-en",
687        "tuning": "embedding",
688        "query_prefix": "Represent this sentence for searching relevant passages: ",
689        "params": 33e6,
690        "quantization": "int8",
691        "backend": "ct2",
692        "architecture": "encoder-only-transformer",
693        "license": "mit",
694    },
695    {
696        "name": "e5-small-v2",
697        "tuning": "embedding",
698        "params": 33e6,
699        "quantization": "int8",
700        "backend": "ct2",
701        "architecture": "encoder-only-transformer",
702        "license": "mit",
703    },
704    {
705        "name": "granite-embedding-125m-english",
706        "tuning": "embedding",
707        "params": 30e6,
708        "quantization": "int8",
709        "backend": "ct2",
710        "architecture": "encoder-only-transformer",
711        "license": "apache-2.0",
712    },
713    {
714        "name": "granite-embedding-107m-multilingual",
715        "tuning": "embedding",
716        "params": 30e6,
717        "quantization": "int8",
718        "backend": "ct2",
719        "architecture": "encoder-only-transformer",
720        "license": "apache-2.0",
721    },
722    {
723        "name": "granite-embedding-30m-english",
724        "tuning": "embedding",
725        "params": 30e6,
726        "quantization": "int8",
727        "backend": "ct2",
728        "architecture": "encoder-only-transformer",
729        "license": "apache-2.0",
730    },
731    {
732        "name": "multilingual-e5-small",
733        "tuning": "embedding",
734        "params": 120e6,
735        "quantization": "int8",
736        "backend": "ct2",
737        "architecture": "encoder-only-transformer",
738        "license": "mit",
739    },
740]
741
742
743class Config(dict):
744    """
745    Store configuration information for the package.
746
747    This is a dictionary that provides data basic data validation.
748
749    Only appropriate keys and values are allowed to be set.
750
751    >>> c = Config({'max_ram': '4gb'})
752    >>> c
753    {...'max_ram': 4.0...}
754
755    >>> c = Config({'instruct_model': 'flan-t5-small'})
756    >>> c
757    {...'instruct_model': 'flan-t5-small'...}
758
759    >>> c = Config({'model_license': 'apache|mit|bsd'})
760    >>> c
761    {...'model_license': re.compile('apache|mit|bsd')...}
762
763    >>> c = Config({'instruct_model': 'flan-t5-bad'})
764    Traceback (most recent call last):
765      ...
766    KeyError: 'flan-t5-bad'
767
768    >>> c = Config({'bad_value': 1})
769    Traceback (most recent call last):
770      ...
771    KeyError: 'bad_value'
772
773    >>> c = Config()
774    >>> c.update({'bad_value': 1})
775    Traceback (most recent call last):
776      ...
777    KeyError: 'bad_value'
778
779    """
780
781    model_names = {m["name"]: m for m in models}
782
783    def __init__(self, config={}):
784        # Defaults are loaded first
785        for key in Config.schema:
786            self[key] = self.schema[key].default
787
788        # Environment variables override defaults
789        for key in Config.schema:
790            value = os.environ.get(f"LANGUAGEMODELS_{key.upper()}")
791            if value:
792                self[key] = value
793
794        # Any values passed in the config dict override environment vars
795        for key in config.keys():
796            self[key] = config[key]
797
798    def __setitem__(self, key, value):
799        super().__setitem__(key, Config.schema[key].initfn(value))
800
801        # Auto-adjust instruct_model when filters change
802        if key == "max_ram" or key == "model_license":
803            found = set()
804            for model in models:
805                if model["quantization"] == "int8":
806                    memsize = model["params"] / 1e9
807                elif model["quantization"] == "q3_k_m":
808                    memsize = model["params"] * 0.48 / 1e9
809                elif model["quantization"] == "q4_k_m":
810                    memsize = model["params"] * 0.59 / 1e9
811
812                sizefit = memsize < self["max_ram"]
813
814                if "model_license" in self:
815                    licensematch = self["model_license"].match(model["license"])
816                else:
817                    licensematch = True
818
819                if model["tuning"] not in found and sizefit and licensematch:
820                    self[model["tuning"] + "_model"] = model["name"]
821                    found.add(model["tuning"])
822
823            if len(found) < 3:
824                raise ModelFilterException("Unable to find models to match filters")
825
826    def update(self, other):
827        for key in other:
828            self[key] = other[key]
829
830    def use_hf_model(self, hf_path, revision, model_type="instruct"):
831        """Load and use a model from Huggingface
832
833        :param hf_path: Path for the model e.g. "org/model"
834        :param revision: The model git revision to load
835        :param model_type: Model type to load
836        """
837
838        assert "ct2" in hf_path.lower()
839        assert "int8" in hf_path.lower()
840
841        # We defer importing jinja2 until this point as it is only needed
842        # for interpolating hf model chat templates and does not need
843        # to be installed unless this method is used
844        from jinja2 import Environment, BaseLoader
845
846        tok_config = hf_hub_download(
847            hf_path, "tokenizer_config.json", revision=revision
848        )
849
850        with open(tok_config) as f:
851            chat_template = json.load(f)["chat_template"]
852
853        env = Environment(loader=BaseLoader())
854
855        template = env.from_string(chat_template)
856
857        prompt_fmt = template.render(
858            messages=[{"role": "user", "content": "{instruction}"}],
859            add_generation_prompt=True,
860        )
861
862        model = {
863            "name": hf_path,
864            "backend": "ct2",
865            "quantization": "int8",
866            "architecture": "decoder-only-transformer",
867            "max_tokens": 2048,
868            "params": 0,
869            "prompt_fmt": prompt_fmt,
870        }
871
872        models.insert(0, model)
873        self.model_names[model["name"]] = model
874        self[f"{model_type}_model"] = model["name"]
875
876    @staticmethod
877    def validate_model(model_name):
878        return Config.model_names[model_name]["name"]
879
880    @staticmethod
881    def validate_device(device):
882        assert device in ["auto", "cpu"]
883
884        return device
885
886    @staticmethod
887    def convert_to_gb(space):
888        """Convert max RAM string to int
889
890        Output will be in gigabytes
891
892        If not specified, input is assumed to be in gigabytes
893
894        >>> Config.convert_to_gb("512")
895        512.0
896
897        >>> Config.convert_to_gb(".5")
898        0.5
899
900        >>> Config.convert_to_gb("4G")
901        4.0
902
903        >>> Config.convert_to_gb("256mb")
904        0.25
905
906        >>> Config.convert_to_gb("256M")
907        0.25
908
909        >>> Config.convert_to_gb("small")
910        0.2
911
912        >>> Config.convert_to_gb("base")
913        0.48
914
915        >>> Config.convert_to_gb("large")
916        1.0
917
918        >>> Config.convert_to_gb("xl")
919        4.0
920
921        >>> Config.convert_to_gb("xxl")
922        16.0
923        """
924
925        if isinstance(space, int) or isinstance(space, float):
926            return float(space)
927
928        size_names = {
929            "small": 0.2,
930            "base": 0.48,
931            "large": 1.0,
932            "xl": 4.0,
933            "xxl": 16.0,
934        }
935
936        if space.lower().strip() in size_names:
937            return size_names[space.lower().strip()]
938
939        multipliers = {
940            "g": 1.0,
941            "m": 2**-10,
942        }
943
944        space = space.lower()
945        space = space.rstrip("b")
946
947        if space[-1] in multipliers:
948            return float(space[:-1]) * multipliers[space[-1]]
949        else:
950            return float(space)
951
952
953Config.schema = {
954    "max_ram": ConfigItem(Config.convert_to_gb, 0.48),
955    "max_tokens": ConfigItem(int, 200),
956    "echo": ConfigItem(int, False),
957    "device": ConfigItem(Config.validate_device, "cpu"),
958    "model_license": ConfigItem(re.compile, ".*"),
959    "instruct_model": ConfigItem(Config.validate_model, "LaMini-Flan-T5-248M"),
960    "embedding_model": ConfigItem(Config.validate_model, "all-MiniLM-L6-v2"),
961    "code_model": ConfigItem(Config.validate_model, "codet5p-220m-py"),
962    "max_prompt_length": ConfigItem(int, 50_000),
963}
964
965config = Config()
966
967if "COLAB_GPU" in os.environ:
968    if len(os.environ["COLAB_GPU"]) > 0:
969        # We have a Colab GPU, so default to using it
970        config["device"] = "auto"
class ConfigItem(builtins.tuple):

ConfigItem(initfn, default)

ConfigItem(initfn, default)

Create new instance of ConfigItem(initfn, default)

initfn

Alias for field number 0

default

Alias for field number 1

class ModelFilterException(builtins.Exception):
22class ModelFilterException(Exception):
23    pass

Common base class for all non-exit exceptions.

models = [{'name': 'openchat-3.5-0106', 'tuning': 'instruct', 'datasets': ['mistral', 'openorca', 'flan'], 'params': 7000000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'apache-2.0', 'prompt_fmt': 'GPT4 Correct User: {instruction}<|end_of_turn|>GPT4 Correct Assistant:'}, {'name': 'Llama-3.1-8B-Instruct', 'tuning': 'instruct', 'revision': 'd02fc85', 'datasets': ['llama3'], 'params': 8000000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'llama3', 'prompt_fmt': '<|start_header_id|>user<|end_header_id|>\n\n{instruction}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n'}, {'name': 'Meta-Llama-3-8B-Instruct', 'tuning': 'instruct', 'datasets': ['llama3'], 'params': 8000000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'llama3', 'prompt_fmt': '<|start_header_id|>user<|end_header_id|>\n\n{instruction}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n'}, {'name': 'openchat-3.5-1210', 'tuning': 'instruct', 'datasets': ['mistral', 'openorca', 'flan'], 'params': 7000000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'apache-2.0', 'prompt_fmt': 'GPT4 Correct User: {instruction}<|end_of_turn|>GPT4 Correct Assistant:'}, {'name': 'WizardLM-2-7B', 'tuning': 'instruct', 'datasets': ['mistral', 'wizardlm'], 'params': 7000000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'apache-2.0', 'prompt_fmt': 'USER: {instruction} ASSISTANT:'}, {'name': 'neural-chat-7b-v3-1', 'tuning': 'instruct', 'datasets': ['mistral', 'slimorca'], 'params': 7000000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'apache-2.0', 'prompt_fmt': '### System:\nBe helpful\n### User:\n{instruction}\n### Assistant:\n'}, {'name': 'Mistral-7B-Instruct-v0.2', 'tuning': 'instruct', 'datasets': ['mistral'], 'params': 7000000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'apache-2.0', 'prompt_fmt': '<s>[INST] {instruction} [/INST]'}, {'name': 'flan-alpaca-gpt4-xl', 'tuning': 'instruct', 'datasets': ['c4', 'flan', 'gpt4-alpaca'], 'params': 3000000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-decoder-transformer', 'license': 'apache-2.0'}, {'name': 'flan-alpaca-xl', 'tuning': 'instruct', 'datasets': ['c4', 'flan', 'alpaca'], 'params': 3000000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-decoder-transformer', 'license': 'apache-2.0'}, {'name': 'flan-t5-xl', 'tuning': 'instruct', 'datasets': ['c4', 'flan'], 'params': 3000000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-decoder-transformer', 'license': 'apache-2.0'}, {'name': 'Llama-3.2-3B-Instruct', 'tuning': 'instruct', 'revision': '5da4ba8', 'datasets': ['llama3'], 'params': 1000000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'llama3.2', 'repetition_penalty': 1.1, 'prompt_fmt': '<|start_header_id|>user<|end_header_id|>\n\n{instruction}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n'}, {'name': 'fastchat-t5-3b-v1.0', 'tuning': 'instruct', 'datasets': ['c4', 'flan', 'sharegpt'], 'params': 3000000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-decoder-transformer', 'license': 'apache-2.0'}, {'name': 'LaMini-Flan-T5-783M', 'tuning': 'instruct', 'revision': 'e5e20a1', 'datasets': ['c4', 'flan', 'lamini'], 'params': 783000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-decoder-transformer', 'license': 'cc-by-nc-4.0'}, {'name': 'flan-t5-large', 'tuning': 'instruct', 'datasets': ['c4', 'flan'], 'params': 783000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-decoder-transformer', 'license': 'apache-2.0'}, {'name': 'Llama-3.2-1B-Instruct', 'tuning': 'instruct', 'revision': '6e3e3a1', 'datasets': ['llama3'], 'params': 1000000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'llama3.2', 'repetition_penalty': 1.1, 'prompt_fmt': '<|start_header_id|>user<|end_header_id|>\n\n{instruction}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n'}, {'name': 'LaMini-Flan-T5-248M', 'tuning': 'instruct', 'revision': '96cfe99', 'datasets': ['c4', 'flan', 'lamini'], 'params': 248000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-decoder-transformer', 'license': 'cc-by-nc-4.0'}, {'name': 'flan-t5-base', 'tuning': 'instruct', 'datasets': ['c4', 'flan'], 'params': 248000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-decoder-transformer', 'license': 'apache-2.0'}, {'name': 'flan-alpaca-base', 'tuning': 'instruct', 'datasets': ['c4', 'flan', 'alpaca'], 'params': 248000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-decoder-transformer', 'license': 'apache-2.0'}, {'name': 'dialogstudio-t5-base-v1.0', 'tuning': 'instruct', 'datasets': ['c4', 'flan', 'dialogstudio'], 'params': 248000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-decoder-transformer', 'license': 'apache-2.0', 'prompt_fmt': 'Instruction: Be helpful. <USER> {instruction}'}, {'name': 'LaMini-Flan-T5-77M', 'tuning': 'instruct', 'datasets': ['c4', 'flan', 'lamini'], 'params': 77000000.0, 'backend': 'ct2', 'quantization': 'int8', 'architecture': 'encoder-decoder-transformer', 'license': 'cc-by-nc-4.0'}, {'name': 'flan-t5-small', 'tuning': 'instruct', 'datasets': ['c4', 'flan'], 'params': 77000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-decoder-transformer', 'license': 'apache-2.0'}, {'name': 'Phi-3-mini-4k-instruct-20240701', 'tuning': 'instruct', 'datasets': ['phi-3'], 'params': 3800000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'mit', 'prompt_fmt': '<|user|>\n{instruction}<|end|>\n<|assistant|>', 'repetition_penalty': 1.1}, {'name': 'Phi-3-mini-4k-instruct', 'tuning': 'instruct', 'datasets': ['phi-3'], 'params': 3800000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'mit', 'prompt_fmt': '<|user|>\n{instruction}<|end|>\n<|assistant|>', 'repetition_penalty': 1.1}, {'name': 'phi-2', 'tuning': 'instruct', 'datasets': ['phi-2'], 'params': 2700000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'microsoft-research-license', 'prompt_fmt': 'Instruct: {instruction}\nOutput:'}, {'name': 'gemma-2b-it', 'tuning': 'instruct', 'datasets': ['gemma'], 'params': 2500000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'gemma-terms-of-use', 'prompt_fmt': '<bos><start_of_turn>user\n{instruction}<end_of_turn>\n<start_of_turn>model'}, {'name': 'h2o-danube3-4b-chat', 'tuning': 'instruct', 'datasets': [], 'params': 4000000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'apache-2.0', 'prompt_fmt': '<|prompt|>{instruction}</s><|answer|>'}, {'name': 'h2o-danube2-1.8b-chat', 'tuning': 'instruct', 'datasets': [], 'params': 1800000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'other', 'prompt_fmt': '<|prompt|>{instruction}</s><|answer|>'}, {'name': 'h2o-danube-1.8b-chat', 'tuning': 'instruct', 'datasets': [], 'params': 1800000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'other', 'prompt_fmt': '<|prompt|>{instruction}</s><|answer|>'}, {'name': 'Falcon3-3B-Instruct', 'tuning': 'instruct', 'languages': ['en', 'fr', 'es', 'pt'], 'revision': 'b183d4d', 'datasets': [], 'params': 3230000000.0, 'quantization': 'int8', 'backend': 'ct2', 'context_length': 8192, 'repetition_penalty': 1.1, 'architecture': 'decoder-only-transformer', 'license': 'falcon', 'prompt_fmt': '<|system|>\nAnswer concisely.\n<|user|>\n{instruction}\n<|assistant|>\n'}, {'name': 'phi-1_5', 'tuning': 'instruct', 'datasets': ['phi-1_5'], 'params': 1400000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'other', 'prompt_fmt': '{instruction}\n\nAnswer:'}, {'name': 'h2o-danube3-500m-chat', 'tuning': 'instruct', 'datasets': [], 'params': 500000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'apache-2.0', 'prompt_fmt': '<|prompt|>{instruction}</s><|answer|>'}, {'name': 'SmolLM2-1.7B-Instruct', 'tuning': 'instruct', 'revision': '83b1658', 'datasets': [], 'params': 1700000000.0, 'quantization': 'int8', 'backend': 'ct2', 'context_length': 2048, 'repetition_penalty': 1.0, 'architecture': 'decoder-only-transformer', 'license': 'apache-2.0', 'prompt_fmt': '<|im_start|>system\nAnswer concisely.<|im_end|>\n<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n'}, {'name': 'SmolLM-1.7B-Instruct', 'tuning': 'instruct', 'revision': 'dc3dfe2', 'datasets': [], 'params': 1700000000.0, 'quantization': 'int8', 'backend': 'ct2', 'context_length': 2048, 'repetition_penalty': 1.1, 'architecture': 'decoder-only-transformer', 'license': 'apache-2.0', 'prompt_fmt': '<|im_start|>system\nAnswer concisely.<|im_end|>\n<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n'}, {'name': 'Falcon3-1B-Instruct', 'tuning': 'instruct', 'languages': ['en', 'fr', 'es', 'pt'], 'revision': '74391aa', 'datasets': [], 'params': 1700000000.0, 'quantization': 'int8', 'backend': 'ct2', 'context_length': 8192, 'repetition_penalty': 1.1, 'architecture': 'decoder-only-transformer', 'license': 'falcon', 'prompt_fmt': '<|system|>\nAnswer concisely.\n<|user|>\n{instruction}\n<|assistant|>\n'}, {'name': 'Qwen2.5-1.5B-Instruct', 'tuning': 'instruct', 'languages': ['zh', 'en', 'fr', 'es', 'pt', 'de', 'it', 'ru', 'ja', 'ko', 'vi', 'th', 'ar'], 'revision': '5de22ab', 'datasets': [], 'params': 1500000000.0, 'quantization': 'int8', 'backend': 'ct2', 'context_length': 32768, 'repetition_penalty': 1.1, 'architecture': 'decoder-only-transformer', 'license': 'apache-2.0', 'prompt_fmt': '<|im_start|>system\nAnswer concisely.<|im_end|>\n<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n'}, {'name': 'Qwen2.5-0.5B-Instruct', 'tuning': 'instruct', 'languages': ['zh', 'en', 'fr', 'es', 'pt', 'de', 'it', 'ru', 'ja', 'ko', 'vi', 'th', 'ar'], 'revision': '554ffe5', 'datasets': [], 'params': 500000000.0, 'quantization': 'int8', 'backend': 'ct2', 'context_length': 32768, 'repetition_penalty': 1.1, 'architecture': 'decoder-only-transformer', 'license': 'apache-2.0', 'prompt_fmt': '<|im_start|>system\nAnswer concisely.<|im_end|>\n<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n'}, {'name': 'SmolLM2-360M-Instruct', 'tuning': 'instruct', 'revision': 'ed9c4fe', 'datasets': [], 'params': 360000000.0, 'quantization': 'int8', 'backend': 'ct2', 'context_length': 2048, 'repetition_penalty': 1.0, 'architecture': 'decoder-only-transformer', 'license': 'apache-2.0', 'prompt_fmt': '<|im_start|>system\nAnswer concisely.<|im_end|>\n<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n'}, {'name': 'SmolLM-360M-Instruct', 'tuning': 'instruct', 'revision': '0b0e861', 'datasets': [], 'params': 360000000.0, 'quantization': 'int8', 'backend': 'ct2', 'context_length': 2048, 'repetition_penalty': 1.1, 'architecture': 'decoder-only-transformer', 'license': 'apache-2.0', 'prompt_fmt': '<|im_start|>system\nAnswer concisely.<|im_end|>\n<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n'}, {'name': 'SmolLM2-135M-Instruct', 'tuning': 'instruct', 'revision': 'e52a3dc', 'datasets': [], 'params': 135000000.0, 'quantization': 'int8', 'backend': 'ct2', 'context_length': 2048, 'repetition_penalty': 1.0, 'architecture': 'decoder-only-transformer', 'license': 'apache-2.0', 'prompt_fmt': '<|im_start|>system\nAnswer concisely.<|im_end|>\n<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n'}, {'name': 'SmolLM-135M-Instruct', 'tuning': 'instruct', 'revision': '90046ba', 'datasets': [], 'params': 135000000.0, 'quantization': 'int8', 'backend': 'ct2', 'context_length': 2048, 'repetition_penalty': 1.3, 'architecture': 'decoder-only-transformer', 'license': 'apache-2.0', 'prompt_fmt': '<|im_start|>system\nAnswer concisely.<|im_end|>\n<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n'}, {'name': 'LaMini-GPT-774M', 'tuning': 'instruct', 'datasets': ['webtext', 'lamini'], 'params': 774000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'mit', 'prompt_fmt': 'Below is an instruction that describes a task.\nWrite a response that completes the request.\n\n### Instruction:\n{instruction}\n\n### Response:'}, {'name': 'LaMini-GPT-124M', 'tuning': 'instruct', 'datasets': ['webtext', 'lamini'], 'params': 124000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'mit', 'prompt_fmt': 'Below is an instruction that describes a task.\nWrite a response that completes the request.\n\n### Instruction:\n{instruction}\n\n### Response:'}, {'name': 'TinyLlama-1.1B-Chat-v1.0', 'tuning': 'instruct', 'datasets': ['slimpajama', 'starcoderdata'], 'params': 1100000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'mit', 'prompt_fmt': '<|user|>{instruction}<|assistant|>'}, {'name': 'codet5p-770m-py', 'tuning': 'code', 'datasets': ['github-code'], 'params': 770000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-decoder-transformer', 'license': 'bsd-3-clause'}, {'name': 'codet5p-220m-py', 'tuning': 'code', 'datasets': ['github-code'], 'params': 220000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-decoder-transformer', 'license': 'bsd-3-clause'}, {'name': 'all-MiniLM-L6-v2', 'tuning': 'embedding', 'revision': '28efeb4', 'params': 22000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-only-transformer', 'license': 'apache-2.0'}, {'name': 'gte-tiny', 'tuning': 'embedding', 'params': 22000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-only-transformer', 'license': 'mit'}, {'name': 'gte-small', 'tuning': 'embedding', 'params': 33000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-only-transformer', 'license': 'mit'}, {'name': 'GIST-small-Embedding-v0', 'tuning': 'embedding', 'params': 33000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-only-transformer', 'license': 'mit'}, {'name': 'bge-small-en', 'tuning': 'embedding', 'query_prefix': 'Represent this sentence for searching relevant passages: ', 'params': 33000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-only-transformer', 'license': 'mit'}, {'name': 'e5-small-v2', 'tuning': 'embedding', 'params': 33000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-only-transformer', 'license': 'mit'}, {'name': 'granite-embedding-125m-english', 'tuning': 'embedding', 'params': 30000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-only-transformer', 'license': 'apache-2.0'}, {'name': 'granite-embedding-107m-multilingual', 'tuning': 'embedding', 'params': 30000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-only-transformer', 'license': 'apache-2.0'}, {'name': 'granite-embedding-30m-english', 'tuning': 'embedding', 'params': 30000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-only-transformer', 'license': 'apache-2.0'}, {'name': 'multilingual-e5-small', 'tuning': 'embedding', 'params': 120000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-only-transformer', 'license': 'mit'}]
class Config(builtins.dict):
744class Config(dict):
745    """
746    Store configuration information for the package.
747
748    This is a dictionary that provides data basic data validation.
749
750    Only appropriate keys and values are allowed to be set.
751
752    >>> c = Config({'max_ram': '4gb'})
753    >>> c
754    {...'max_ram': 4.0...}
755
756    >>> c = Config({'instruct_model': 'flan-t5-small'})
757    >>> c
758    {...'instruct_model': 'flan-t5-small'...}
759
760    >>> c = Config({'model_license': 'apache|mit|bsd'})
761    >>> c
762    {...'model_license': re.compile('apache|mit|bsd')...}
763
764    >>> c = Config({'instruct_model': 'flan-t5-bad'})
765    Traceback (most recent call last):
766      ...
767    KeyError: 'flan-t5-bad'
768
769    >>> c = Config({'bad_value': 1})
770    Traceback (most recent call last):
771      ...
772    KeyError: 'bad_value'
773
774    >>> c = Config()
775    >>> c.update({'bad_value': 1})
776    Traceback (most recent call last):
777      ...
778    KeyError: 'bad_value'
779
780    """
781
782    model_names = {m["name"]: m for m in models}
783
784    def __init__(self, config={}):
785        # Defaults are loaded first
786        for key in Config.schema:
787            self[key] = self.schema[key].default
788
789        # Environment variables override defaults
790        for key in Config.schema:
791            value = os.environ.get(f"LANGUAGEMODELS_{key.upper()}")
792            if value:
793                self[key] = value
794
795        # Any values passed in the config dict override environment vars
796        for key in config.keys():
797            self[key] = config[key]
798
799    def __setitem__(self, key, value):
800        super().__setitem__(key, Config.schema[key].initfn(value))
801
802        # Auto-adjust instruct_model when filters change
803        if key == "max_ram" or key == "model_license":
804            found = set()
805            for model in models:
806                if model["quantization"] == "int8":
807                    memsize = model["params"] / 1e9
808                elif model["quantization"] == "q3_k_m":
809                    memsize = model["params"] * 0.48 / 1e9
810                elif model["quantization"] == "q4_k_m":
811                    memsize = model["params"] * 0.59 / 1e9
812
813                sizefit = memsize < self["max_ram"]
814
815                if "model_license" in self:
816                    licensematch = self["model_license"].match(model["license"])
817                else:
818                    licensematch = True
819
820                if model["tuning"] not in found and sizefit and licensematch:
821                    self[model["tuning"] + "_model"] = model["name"]
822                    found.add(model["tuning"])
823
824            if len(found) < 3:
825                raise ModelFilterException("Unable to find models to match filters")
826
827    def update(self, other):
828        for key in other:
829            self[key] = other[key]
830
831    def use_hf_model(self, hf_path, revision, model_type="instruct"):
832        """Load and use a model from Huggingface
833
834        :param hf_path: Path for the model e.g. "org/model"
835        :param revision: The model git revision to load
836        :param model_type: Model type to load
837        """
838
839        assert "ct2" in hf_path.lower()
840        assert "int8" in hf_path.lower()
841
842        # We defer importing jinja2 until this point as it is only needed
843        # for interpolating hf model chat templates and does not need
844        # to be installed unless this method is used
845        from jinja2 import Environment, BaseLoader
846
847        tok_config = hf_hub_download(
848            hf_path, "tokenizer_config.json", revision=revision
849        )
850
851        with open(tok_config) as f:
852            chat_template = json.load(f)["chat_template"]
853
854        env = Environment(loader=BaseLoader())
855
856        template = env.from_string(chat_template)
857
858        prompt_fmt = template.render(
859            messages=[{"role": "user", "content": "{instruction}"}],
860            add_generation_prompt=True,
861        )
862
863        model = {
864            "name": hf_path,
865            "backend": "ct2",
866            "quantization": "int8",
867            "architecture": "decoder-only-transformer",
868            "max_tokens": 2048,
869            "params": 0,
870            "prompt_fmt": prompt_fmt,
871        }
872
873        models.insert(0, model)
874        self.model_names[model["name"]] = model
875        self[f"{model_type}_model"] = model["name"]
876
877    @staticmethod
878    def validate_model(model_name):
879        return Config.model_names[model_name]["name"]
880
881    @staticmethod
882    def validate_device(device):
883        assert device in ["auto", "cpu"]
884
885        return device
886
887    @staticmethod
888    def convert_to_gb(space):
889        """Convert max RAM string to int
890
891        Output will be in gigabytes
892
893        If not specified, input is assumed to be in gigabytes
894
895        >>> Config.convert_to_gb("512")
896        512.0
897
898        >>> Config.convert_to_gb(".5")
899        0.5
900
901        >>> Config.convert_to_gb("4G")
902        4.0
903
904        >>> Config.convert_to_gb("256mb")
905        0.25
906
907        >>> Config.convert_to_gb("256M")
908        0.25
909
910        >>> Config.convert_to_gb("small")
911        0.2
912
913        >>> Config.convert_to_gb("base")
914        0.48
915
916        >>> Config.convert_to_gb("large")
917        1.0
918
919        >>> Config.convert_to_gb("xl")
920        4.0
921
922        >>> Config.convert_to_gb("xxl")
923        16.0
924        """
925
926        if isinstance(space, int) or isinstance(space, float):
927            return float(space)
928
929        size_names = {
930            "small": 0.2,
931            "base": 0.48,
932            "large": 1.0,
933            "xl": 4.0,
934            "xxl": 16.0,
935        }
936
937        if space.lower().strip() in size_names:
938            return size_names[space.lower().strip()]
939
940        multipliers = {
941            "g": 1.0,
942            "m": 2**-10,
943        }
944
945        space = space.lower()
946        space = space.rstrip("b")
947
948        if space[-1] in multipliers:
949            return float(space[:-1]) * multipliers[space[-1]]
950        else:
951            return float(space)

Store configuration information for the package.

This is a dictionary that provides data basic data validation.

Only appropriate keys and values are allowed to be set.

>>> c = Config({'max_ram': '4gb'})
>>> c
{...'max_ram': 4.0...}
>>> c = Config({'instruct_model': 'flan-t5-small'})
>>> c
{...'instruct_model': 'flan-t5-small'...}
>>> c = Config({'model_license': 'apache|mit|bsd'})
>>> c
{...'model_license': re.compile('apache|mit|bsd')...}
>>> c = Config({'instruct_model': 'flan-t5-bad'})
Traceback (most recent call last):
  ...
KeyError: 'flan-t5-bad'
>>> c = Config({'bad_value': 1})
Traceback (most recent call last):
  ...
KeyError: 'bad_value'
>>> c = Config()
>>> c.update({'bad_value': 1})
Traceback (most recent call last):
  ...
KeyError: 'bad_value'
model_names = {'openchat-3.5-0106': {'name': 'openchat-3.5-0106', 'tuning': 'instruct', 'datasets': ['mistral', 'openorca', 'flan'], 'params': 7000000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'apache-2.0', 'prompt_fmt': 'GPT4 Correct User: {instruction}<|end_of_turn|>GPT4 Correct Assistant:'}, 'Llama-3.1-8B-Instruct': {'name': 'Llama-3.1-8B-Instruct', 'tuning': 'instruct', 'revision': 'd02fc85', 'datasets': ['llama3'], 'params': 8000000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'llama3', 'prompt_fmt': '<|start_header_id|>user<|end_header_id|>\n\n{instruction}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n'}, 'Meta-Llama-3-8B-Instruct': {'name': 'Meta-Llama-3-8B-Instruct', 'tuning': 'instruct', 'datasets': ['llama3'], 'params': 8000000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'llama3', 'prompt_fmt': '<|start_header_id|>user<|end_header_id|>\n\n{instruction}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n'}, 'openchat-3.5-1210': {'name': 'openchat-3.5-1210', 'tuning': 'instruct', 'datasets': ['mistral', 'openorca', 'flan'], 'params': 7000000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'apache-2.0', 'prompt_fmt': 'GPT4 Correct User: {instruction}<|end_of_turn|>GPT4 Correct Assistant:'}, 'WizardLM-2-7B': {'name': 'WizardLM-2-7B', 'tuning': 'instruct', 'datasets': ['mistral', 'wizardlm'], 'params': 7000000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'apache-2.0', 'prompt_fmt': 'USER: {instruction} ASSISTANT:'}, 'neural-chat-7b-v3-1': {'name': 'neural-chat-7b-v3-1', 'tuning': 'instruct', 'datasets': ['mistral', 'slimorca'], 'params': 7000000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'apache-2.0', 'prompt_fmt': '### System:\nBe helpful\n### User:\n{instruction}\n### Assistant:\n'}, 'Mistral-7B-Instruct-v0.2': {'name': 'Mistral-7B-Instruct-v0.2', 'tuning': 'instruct', 'datasets': ['mistral'], 'params': 7000000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'apache-2.0', 'prompt_fmt': '<s>[INST] {instruction} [/INST]'}, 'flan-alpaca-gpt4-xl': {'name': 'flan-alpaca-gpt4-xl', 'tuning': 'instruct', 'datasets': ['c4', 'flan', 'gpt4-alpaca'], 'params': 3000000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-decoder-transformer', 'license': 'apache-2.0'}, 'flan-alpaca-xl': {'name': 'flan-alpaca-xl', 'tuning': 'instruct', 'datasets': ['c4', 'flan', 'alpaca'], 'params': 3000000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-decoder-transformer', 'license': 'apache-2.0'}, 'flan-t5-xl': {'name': 'flan-t5-xl', 'tuning': 'instruct', 'datasets': ['c4', 'flan'], 'params': 3000000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-decoder-transformer', 'license': 'apache-2.0'}, 'Llama-3.2-3B-Instruct': {'name': 'Llama-3.2-3B-Instruct', 'tuning': 'instruct', 'revision': '5da4ba8', 'datasets': ['llama3'], 'params': 1000000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'llama3.2', 'repetition_penalty': 1.1, 'prompt_fmt': '<|start_header_id|>user<|end_header_id|>\n\n{instruction}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n'}, 'fastchat-t5-3b-v1.0': {'name': 'fastchat-t5-3b-v1.0', 'tuning': 'instruct', 'datasets': ['c4', 'flan', 'sharegpt'], 'params': 3000000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-decoder-transformer', 'license': 'apache-2.0'}, 'LaMini-Flan-T5-783M': {'name': 'LaMini-Flan-T5-783M', 'tuning': 'instruct', 'revision': 'e5e20a1', 'datasets': ['c4', 'flan', 'lamini'], 'params': 783000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-decoder-transformer', 'license': 'cc-by-nc-4.0'}, 'flan-t5-large': {'name': 'flan-t5-large', 'tuning': 'instruct', 'datasets': ['c4', 'flan'], 'params': 783000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-decoder-transformer', 'license': 'apache-2.0'}, 'Llama-3.2-1B-Instruct': {'name': 'Llama-3.2-1B-Instruct', 'tuning': 'instruct', 'revision': '6e3e3a1', 'datasets': ['llama3'], 'params': 1000000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'llama3.2', 'repetition_penalty': 1.1, 'prompt_fmt': '<|start_header_id|>user<|end_header_id|>\n\n{instruction}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n'}, 'LaMini-Flan-T5-248M': {'name': 'LaMini-Flan-T5-248M', 'tuning': 'instruct', 'revision': '96cfe99', 'datasets': ['c4', 'flan', 'lamini'], 'params': 248000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-decoder-transformer', 'license': 'cc-by-nc-4.0'}, 'flan-t5-base': {'name': 'flan-t5-base', 'tuning': 'instruct', 'datasets': ['c4', 'flan'], 'params': 248000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-decoder-transformer', 'license': 'apache-2.0'}, 'flan-alpaca-base': {'name': 'flan-alpaca-base', 'tuning': 'instruct', 'datasets': ['c4', 'flan', 'alpaca'], 'params': 248000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-decoder-transformer', 'license': 'apache-2.0'}, 'dialogstudio-t5-base-v1.0': {'name': 'dialogstudio-t5-base-v1.0', 'tuning': 'instruct', 'datasets': ['c4', 'flan', 'dialogstudio'], 'params': 248000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-decoder-transformer', 'license': 'apache-2.0', 'prompt_fmt': 'Instruction: Be helpful. <USER> {instruction}'}, 'LaMini-Flan-T5-77M': {'name': 'LaMini-Flan-T5-77M', 'tuning': 'instruct', 'datasets': ['c4', 'flan', 'lamini'], 'params': 77000000.0, 'backend': 'ct2', 'quantization': 'int8', 'architecture': 'encoder-decoder-transformer', 'license': 'cc-by-nc-4.0'}, 'flan-t5-small': {'name': 'flan-t5-small', 'tuning': 'instruct', 'datasets': ['c4', 'flan'], 'params': 77000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-decoder-transformer', 'license': 'apache-2.0'}, 'Phi-3-mini-4k-instruct-20240701': {'name': 'Phi-3-mini-4k-instruct-20240701', 'tuning': 'instruct', 'datasets': ['phi-3'], 'params': 3800000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'mit', 'prompt_fmt': '<|user|>\n{instruction}<|end|>\n<|assistant|>', 'repetition_penalty': 1.1}, 'Phi-3-mini-4k-instruct': {'name': 'Phi-3-mini-4k-instruct', 'tuning': 'instruct', 'datasets': ['phi-3'], 'params': 3800000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'mit', 'prompt_fmt': '<|user|>\n{instruction}<|end|>\n<|assistant|>', 'repetition_penalty': 1.1}, 'phi-2': {'name': 'phi-2', 'tuning': 'instruct', 'datasets': ['phi-2'], 'params': 2700000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'microsoft-research-license', 'prompt_fmt': 'Instruct: {instruction}\nOutput:'}, 'gemma-2b-it': {'name': 'gemma-2b-it', 'tuning': 'instruct', 'datasets': ['gemma'], 'params': 2500000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'gemma-terms-of-use', 'prompt_fmt': '<bos><start_of_turn>user\n{instruction}<end_of_turn>\n<start_of_turn>model'}, 'h2o-danube3-4b-chat': {'name': 'h2o-danube3-4b-chat', 'tuning': 'instruct', 'datasets': [], 'params': 4000000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'apache-2.0', 'prompt_fmt': '<|prompt|>{instruction}</s><|answer|>'}, 'h2o-danube2-1.8b-chat': {'name': 'h2o-danube2-1.8b-chat', 'tuning': 'instruct', 'datasets': [], 'params': 1800000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'other', 'prompt_fmt': '<|prompt|>{instruction}</s><|answer|>'}, 'h2o-danube-1.8b-chat': {'name': 'h2o-danube-1.8b-chat', 'tuning': 'instruct', 'datasets': [], 'params': 1800000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'other', 'prompt_fmt': '<|prompt|>{instruction}</s><|answer|>'}, 'Falcon3-3B-Instruct': {'name': 'Falcon3-3B-Instruct', 'tuning': 'instruct', 'languages': ['en', 'fr', 'es', 'pt'], 'revision': 'b183d4d', 'datasets': [], 'params': 3230000000.0, 'quantization': 'int8', 'backend': 'ct2', 'context_length': 8192, 'repetition_penalty': 1.1, 'architecture': 'decoder-only-transformer', 'license': 'falcon', 'prompt_fmt': '<|system|>\nAnswer concisely.\n<|user|>\n{instruction}\n<|assistant|>\n'}, 'phi-1_5': {'name': 'phi-1_5', 'tuning': 'instruct', 'datasets': ['phi-1_5'], 'params': 1400000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'other', 'prompt_fmt': '{instruction}\n\nAnswer:'}, 'h2o-danube3-500m-chat': {'name': 'h2o-danube3-500m-chat', 'tuning': 'instruct', 'datasets': [], 'params': 500000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'apache-2.0', 'prompt_fmt': '<|prompt|>{instruction}</s><|answer|>'}, 'SmolLM2-1.7B-Instruct': {'name': 'SmolLM2-1.7B-Instruct', 'tuning': 'instruct', 'revision': '83b1658', 'datasets': [], 'params': 1700000000.0, 'quantization': 'int8', 'backend': 'ct2', 'context_length': 2048, 'repetition_penalty': 1.0, 'architecture': 'decoder-only-transformer', 'license': 'apache-2.0', 'prompt_fmt': '<|im_start|>system\nAnswer concisely.<|im_end|>\n<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n'}, 'SmolLM-1.7B-Instruct': {'name': 'SmolLM-1.7B-Instruct', 'tuning': 'instruct', 'revision': 'dc3dfe2', 'datasets': [], 'params': 1700000000.0, 'quantization': 'int8', 'backend': 'ct2', 'context_length': 2048, 'repetition_penalty': 1.1, 'architecture': 'decoder-only-transformer', 'license': 'apache-2.0', 'prompt_fmt': '<|im_start|>system\nAnswer concisely.<|im_end|>\n<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n'}, 'Falcon3-1B-Instruct': {'name': 'Falcon3-1B-Instruct', 'tuning': 'instruct', 'languages': ['en', 'fr', 'es', 'pt'], 'revision': '74391aa', 'datasets': [], 'params': 1700000000.0, 'quantization': 'int8', 'backend': 'ct2', 'context_length': 8192, 'repetition_penalty': 1.1, 'architecture': 'decoder-only-transformer', 'license': 'falcon', 'prompt_fmt': '<|system|>\nAnswer concisely.\n<|user|>\n{instruction}\n<|assistant|>\n'}, 'Qwen2.5-1.5B-Instruct': {'name': 'Qwen2.5-1.5B-Instruct', 'tuning': 'instruct', 'languages': ['zh', 'en', 'fr', 'es', 'pt', 'de', 'it', 'ru', 'ja', 'ko', 'vi', 'th', 'ar'], 'revision': '5de22ab', 'datasets': [], 'params': 1500000000.0, 'quantization': 'int8', 'backend': 'ct2', 'context_length': 32768, 'repetition_penalty': 1.1, 'architecture': 'decoder-only-transformer', 'license': 'apache-2.0', 'prompt_fmt': '<|im_start|>system\nAnswer concisely.<|im_end|>\n<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n'}, 'Qwen2.5-0.5B-Instruct': {'name': 'Qwen2.5-0.5B-Instruct', 'tuning': 'instruct', 'languages': ['zh', 'en', 'fr', 'es', 'pt', 'de', 'it', 'ru', 'ja', 'ko', 'vi', 'th', 'ar'], 'revision': '554ffe5', 'datasets': [], 'params': 500000000.0, 'quantization': 'int8', 'backend': 'ct2', 'context_length': 32768, 'repetition_penalty': 1.1, 'architecture': 'decoder-only-transformer', 'license': 'apache-2.0', 'prompt_fmt': '<|im_start|>system\nAnswer concisely.<|im_end|>\n<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n'}, 'SmolLM2-360M-Instruct': {'name': 'SmolLM2-360M-Instruct', 'tuning': 'instruct', 'revision': 'ed9c4fe', 'datasets': [], 'params': 360000000.0, 'quantization': 'int8', 'backend': 'ct2', 'context_length': 2048, 'repetition_penalty': 1.0, 'architecture': 'decoder-only-transformer', 'license': 'apache-2.0', 'prompt_fmt': '<|im_start|>system\nAnswer concisely.<|im_end|>\n<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n'}, 'SmolLM-360M-Instruct': {'name': 'SmolLM-360M-Instruct', 'tuning': 'instruct', 'revision': '0b0e861', 'datasets': [], 'params': 360000000.0, 'quantization': 'int8', 'backend': 'ct2', 'context_length': 2048, 'repetition_penalty': 1.1, 'architecture': 'decoder-only-transformer', 'license': 'apache-2.0', 'prompt_fmt': '<|im_start|>system\nAnswer concisely.<|im_end|>\n<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n'}, 'SmolLM2-135M-Instruct': {'name': 'SmolLM2-135M-Instruct', 'tuning': 'instruct', 'revision': 'e52a3dc', 'datasets': [], 'params': 135000000.0, 'quantization': 'int8', 'backend': 'ct2', 'context_length': 2048, 'repetition_penalty': 1.0, 'architecture': 'decoder-only-transformer', 'license': 'apache-2.0', 'prompt_fmt': '<|im_start|>system\nAnswer concisely.<|im_end|>\n<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n'}, 'SmolLM-135M-Instruct': {'name': 'SmolLM-135M-Instruct', 'tuning': 'instruct', 'revision': '90046ba', 'datasets': [], 'params': 135000000.0, 'quantization': 'int8', 'backend': 'ct2', 'context_length': 2048, 'repetition_penalty': 1.3, 'architecture': 'decoder-only-transformer', 'license': 'apache-2.0', 'prompt_fmt': '<|im_start|>system\nAnswer concisely.<|im_end|>\n<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n'}, 'LaMini-GPT-774M': {'name': 'LaMini-GPT-774M', 'tuning': 'instruct', 'datasets': ['webtext', 'lamini'], 'params': 774000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'mit', 'prompt_fmt': 'Below is an instruction that describes a task.\nWrite a response that completes the request.\n\n### Instruction:\n{instruction}\n\n### Response:'}, 'LaMini-GPT-124M': {'name': 'LaMini-GPT-124M', 'tuning': 'instruct', 'datasets': ['webtext', 'lamini'], 'params': 124000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'mit', 'prompt_fmt': 'Below is an instruction that describes a task.\nWrite a response that completes the request.\n\n### Instruction:\n{instruction}\n\n### Response:'}, 'TinyLlama-1.1B-Chat-v1.0': {'name': 'TinyLlama-1.1B-Chat-v1.0', 'tuning': 'instruct', 'datasets': ['slimpajama', 'starcoderdata'], 'params': 1100000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'decoder-only-transformer', 'license': 'mit', 'prompt_fmt': '<|user|>{instruction}<|assistant|>'}, 'codet5p-770m-py': {'name': 'codet5p-770m-py', 'tuning': 'code', 'datasets': ['github-code'], 'params': 770000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-decoder-transformer', 'license': 'bsd-3-clause'}, 'codet5p-220m-py': {'name': 'codet5p-220m-py', 'tuning': 'code', 'datasets': ['github-code'], 'params': 220000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-decoder-transformer', 'license': 'bsd-3-clause'}, 'all-MiniLM-L6-v2': {'name': 'all-MiniLM-L6-v2', 'tuning': 'embedding', 'revision': '28efeb4', 'params': 22000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-only-transformer', 'license': 'apache-2.0'}, 'gte-tiny': {'name': 'gte-tiny', 'tuning': 'embedding', 'params': 22000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-only-transformer', 'license': 'mit'}, 'gte-small': {'name': 'gte-small', 'tuning': 'embedding', 'params': 33000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-only-transformer', 'license': 'mit'}, 'GIST-small-Embedding-v0': {'name': 'GIST-small-Embedding-v0', 'tuning': 'embedding', 'params': 33000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-only-transformer', 'license': 'mit'}, 'bge-small-en': {'name': 'bge-small-en', 'tuning': 'embedding', 'query_prefix': 'Represent this sentence for searching relevant passages: ', 'params': 33000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-only-transformer', 'license': 'mit'}, 'e5-small-v2': {'name': 'e5-small-v2', 'tuning': 'embedding', 'params': 33000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-only-transformer', 'license': 'mit'}, 'granite-embedding-125m-english': {'name': 'granite-embedding-125m-english', 'tuning': 'embedding', 'params': 30000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-only-transformer', 'license': 'apache-2.0'}, 'granite-embedding-107m-multilingual': {'name': 'granite-embedding-107m-multilingual', 'tuning': 'embedding', 'params': 30000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-only-transformer', 'license': 'apache-2.0'}, 'granite-embedding-30m-english': {'name': 'granite-embedding-30m-english', 'tuning': 'embedding', 'params': 30000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-only-transformer', 'license': 'apache-2.0'}, 'multilingual-e5-small': {'name': 'multilingual-e5-small', 'tuning': 'embedding', 'params': 120000000.0, 'quantization': 'int8', 'backend': 'ct2', 'architecture': 'encoder-only-transformer', 'license': 'mit'}}
def update(self, other):
827    def update(self, other):
828        for key in other:
829            self[key] = other[key]

D.update([E, ]**F) -> None. Update D from mapping/iterable E and F. If E is present and has a .keys() method, then does: for k in E.keys(): D[k] = E[k] If E is present and lacks a .keys() method, then does: for k, v in E: D[k] = v In either case, this is followed by: for k in F: D[k] = F[k]

def use_hf_model(self, hf_path, revision, model_type='instruct'):
831    def use_hf_model(self, hf_path, revision, model_type="instruct"):
832        """Load and use a model from Huggingface
833
834        :param hf_path: Path for the model e.g. "org/model"
835        :param revision: The model git revision to load
836        :param model_type: Model type to load
837        """
838
839        assert "ct2" in hf_path.lower()
840        assert "int8" in hf_path.lower()
841
842        # We defer importing jinja2 until this point as it is only needed
843        # for interpolating hf model chat templates and does not need
844        # to be installed unless this method is used
845        from jinja2 import Environment, BaseLoader
846
847        tok_config = hf_hub_download(
848            hf_path, "tokenizer_config.json", revision=revision
849        )
850
851        with open(tok_config) as f:
852            chat_template = json.load(f)["chat_template"]
853
854        env = Environment(loader=BaseLoader())
855
856        template = env.from_string(chat_template)
857
858        prompt_fmt = template.render(
859            messages=[{"role": "user", "content": "{instruction}"}],
860            add_generation_prompt=True,
861        )
862
863        model = {
864            "name": hf_path,
865            "backend": "ct2",
866            "quantization": "int8",
867            "architecture": "decoder-only-transformer",
868            "max_tokens": 2048,
869            "params": 0,
870            "prompt_fmt": prompt_fmt,
871        }
872
873        models.insert(0, model)
874        self.model_names[model["name"]] = model
875        self[f"{model_type}_model"] = model["name"]

Load and use a model from Huggingface

Parameters
  • hf_path: Path for the model e.g. "org/model"
  • revision: The model git revision to load
  • model_type: Model type to load
@staticmethod
def validate_model(model_name):
877    @staticmethod
878    def validate_model(model_name):
879        return Config.model_names[model_name]["name"]
@staticmethod
def validate_device(device):
881    @staticmethod
882    def validate_device(device):
883        assert device in ["auto", "cpu"]
884
885        return device
@staticmethod
def convert_to_gb(space):
887    @staticmethod
888    def convert_to_gb(space):
889        """Convert max RAM string to int
890
891        Output will be in gigabytes
892
893        If not specified, input is assumed to be in gigabytes
894
895        >>> Config.convert_to_gb("512")
896        512.0
897
898        >>> Config.convert_to_gb(".5")
899        0.5
900
901        >>> Config.convert_to_gb("4G")
902        4.0
903
904        >>> Config.convert_to_gb("256mb")
905        0.25
906
907        >>> Config.convert_to_gb("256M")
908        0.25
909
910        >>> Config.convert_to_gb("small")
911        0.2
912
913        >>> Config.convert_to_gb("base")
914        0.48
915
916        >>> Config.convert_to_gb("large")
917        1.0
918
919        >>> Config.convert_to_gb("xl")
920        4.0
921
922        >>> Config.convert_to_gb("xxl")
923        16.0
924        """
925
926        if isinstance(space, int) or isinstance(space, float):
927            return float(space)
928
929        size_names = {
930            "small": 0.2,
931            "base": 0.48,
932            "large": 1.0,
933            "xl": 4.0,
934            "xxl": 16.0,
935        }
936
937        if space.lower().strip() in size_names:
938            return size_names[space.lower().strip()]
939
940        multipliers = {
941            "g": 1.0,
942            "m": 2**-10,
943        }
944
945        space = space.lower()
946        space = space.rstrip("b")
947
948        if space[-1] in multipliers:
949            return float(space[:-1]) * multipliers[space[-1]]
950        else:
951            return float(space)

Convert max RAM string to int

Output will be in gigabytes

If not specified, input is assumed to be in gigabytes

>>> Config.convert_to_gb("512")
512.0
>>> Config.convert_to_gb(".5")
0.5
>>> Config.convert_to_gb("4G")
4.0
>>> Config.convert_to_gb("256mb")
0.25
>>> Config.convert_to_gb("256M")
0.25
>>> Config.convert_to_gb("small")
0.2
>>> Config.convert_to_gb("base")
0.48
>>> Config.convert_to_gb("large")
1.0
>>> Config.convert_to_gb("xl")
4.0
>>> Config.convert_to_gb("xxl")
16.0
schema = {'max_ram': ConfigItem(initfn=<function Config.convert_to_gb>, default=0.48), 'max_tokens': ConfigItem(initfn=<class 'int'>, default=200), 'echo': ConfigItem(initfn=<class 'int'>, default=False), 'device': ConfigItem(initfn=<function Config.validate_device>, default='cpu'), 'model_license': ConfigItem(initfn=<function compile>, default='.*'), 'instruct_model': ConfigItem(initfn=<function Config.validate_model>, default='LaMini-Flan-T5-248M'), 'embedding_model': ConfigItem(initfn=<function Config.validate_model>, default='all-MiniLM-L6-v2'), 'code_model': ConfigItem(initfn=<function Config.validate_model>, default='codet5p-220m-py'), 'max_prompt_length': ConfigItem(initfn=<class 'int'>, default=50000)}
config = {'max_ram': 0.48, 'instruct_model': 'LaMini-Flan-T5-248M', 'code_model': 'codet5p-220m-py', 'embedding_model': 'all-MiniLM-L6-v2', 'max_tokens': 200, 'echo': 0, 'device': 'cpu', 'model_license': re.compile('.*'), 'max_prompt_length': 50000}