languagemodels
1import requests 2import datetime 3import json 4import re 5from typing import overload 6 7from languagemodels.config import config 8from languagemodels.inference import ( 9 generate, 10 rank_instruct, 11 parse_chat, 12 list_tokens, 13) 14from languagemodels import embeddings 15 16docs = embeddings.RetrievalContext() 17 18 19def complete(prompt: str) -> str: 20 """Provide one completion for a given open-ended prompt 21 22 :param prompt: Prompt to use as input to the model 23 :return: Completion returned from the language model 24 25 Examples: 26 27 >>> complete("Luke thought that he") #doctest: +SKIP 28 'was going to be a doctor.' 29 30 >>> complete("There are many mythical creatures who") #doctest: +SKIP 31 'are able to fly' 32 33 >>> complete("She hid in her room until") #doctest: +SKIP 34 'she was sure she was safe' 35 """ 36 37 result = generate( 38 ["Write a sentence"], 39 prefix=prompt, 40 max_tokens=config["max_tokens"], 41 temperature=0.7, 42 topk=40, 43 )[0] 44 45 if result.startswith(prompt): 46 prefix_length = len(prompt) 47 return result[prefix_length:] 48 else: 49 return result 50 51 52@overload 53def do(prompt: list) -> list: 54 ... 55 56 57@overload 58def do(prompt: str) -> str: 59 ... 60 61 62def do(prompt, choices=None): 63 """Follow a single-turn instructional prompt 64 65 :param prompt: Instructional prompt(s) to follow 66 :param choices: If provided, outputs are restricted to values in choices 67 :return: Completion returned from the language model 68 69 Note that this function is overloaded to return a list of results if 70 a list if of prompts is provided and a single string if a single 71 prompt is provided as a string 72 73 Examples: 74 75 >>> do("Translate Spanish to English: Hola mundo!") #doctest: +SKIP 76 'Hello world!' 77 78 >>> do("Pick the planet from the list: baseball, Texas, Saturn") 79 '...Saturn...' 80 81 >>> do("Answer: What is the capital of England?") 82 '...London...' 83 84 >>> do(["Pick the planet from the list: baseball, Texas, Saturn"] * 2) 85 ['...Saturn...', '...Saturn...'] 86 87 >>> do(["Say red", "Say blue"], choices=["red", "blue"]) 88 ['red', 'blue'] 89 90 >>> do("Classify as positive or negative: LLMs are bad", 91 ... choices=["Positive", "Negative"]) 92 'Negative' 93 94 >>> do("Classify as positive or negative: LLMs are great", 95 ... choices=["Positive", "Negative"]) 96 'Positive' 97 """ 98 99 prompts = [prompt] if isinstance(prompt, str) else prompt 100 101 if choices: 102 results = [r[0] for r in rank_instruct(prompts, choices)] 103 else: 104 results = generate(prompts, max_tokens=config["max_tokens"], topk=1) 105 106 for i, result in enumerate(results): 107 if len(result.split()) == 1: 108 results[i] = result.title() 109 110 if result[-1] not in (".", "!", "?"): 111 results[i] = results[i] + "." 112 113 return results[0] if isinstance(prompt, str) else results 114 115 116@overload 117def embed(doc: list) -> list: 118 ... 119 120 121@overload 122def embed(doc: str) -> str: 123 ... 124 125 126def embed(doc): 127 """Create embedding for a document 128 129 :param doc: Document(s) to embed 130 :return: Embedding 131 132 Note that this function is overloaded to return a list of embeddings if 133 a list if of docs is provided and a single embedding if a single 134 doc is provided as a string 135 136 Examples: 137 138 >>> embed("Hello, world") 139 [-0.0...] 140 141 >>> embed(["Hello", "world"]) 142 [[-0.0...]] 143 """ 144 145 docs = [doc] if isinstance(doc, str) else doc 146 147 # Create embeddings and convert to lists of floats 148 emb = [[float(n) for n in e] for e in embeddings.embed(docs)] 149 150 return emb[0] if isinstance(doc, str) else emb 151 152 153def chat(prompt: str) -> str: 154 """Get new message from chat-optimized language model 155 156 The `prompt` for this model is provided as a series of messages as a single 157 plain-text string. Several special tokens are used to delineate chat 158 messages. 159 160 - `system:` - Indicates the start of a system message providing 161 instructions about how the assistant should behave. 162 - `user:` - Indicates the start of a prompter (typically user) 163 message. 164 - `assistant:` - Indicates the start of an assistant message. 165 166 A complete prompt may look something like this: 167 168 ``` 169 Assistant is helpful and harmless 170 171 User: What is the capital of Germany? 172 173 Assistant: The capital of Germany is Berlin. 174 175 User: How many people live there? 176 177 Assistant: 178 ``` 179 180 The completion from the language model is returned. 181 182 :param message: Prompt using formatting described above 183 :return: Completion returned from the language model 184 185 Examples: 186 187 >>> response = chat(''' 188 ... System: Respond as a helpful assistant. It is 5:00pm. 189 ... 190 ... User: What time is it? 191 ... 192 ... Assistant: 193 ... ''') # doctest: +SKIP 194 "It's 5:00pm." 195 """ 196 197 messages = parse_chat(prompt) 198 199 # Suppress starts of all assistant messages to avoid repeat generation 200 suppress = [ 201 "Assistant: " + m["content"].split(" ")[0] 202 for m in messages 203 if m["role"] in ["assistant", "user"] 204 ] 205 206 # Suppress all user messages to avoid repeating them 207 suppress += [m["content"] for m in messages if m["role"] == "user"] 208 209 system_msgs = [m for m in messages if m["role"] == "system"] 210 assistant_msgs = [m for m in messages if m["role"] == "assistant"] 211 user_msgs = [m for m in messages if m["role"] == "user"] 212 213 # The current model is tuned on instructions and tends to get 214 # lost if it sees too many questions 215 # Use only the most recent user and assistant message for context 216 # Keep all system messages 217 messages = system_msgs + assistant_msgs[-1:] + user_msgs[-1:] 218 219 rolemap = { 220 "system": "System", 221 "user": "Question", 222 "assistant": "Assistant", 223 } 224 225 messages = [f"{rolemap[m['role']]}: {m['content']}" for m in messages] 226 227 prompt = "\n\n".join(messages) + "\n\n" + "Assistant:" 228 229 if prompt.startswith("System:"): 230 prompt = prompt[7:].strip() 231 232 response = generate( 233 [prompt], 234 max_tokens=config["max_tokens"], 235 temperature=0.3, 236 topk=40, 237 prefix="Assistant:", 238 suppress=suppress, 239 )[0] 240 241 # Remove duplicate assistant being generated 242 if response.startswith("Assistant:"): 243 response = response[10:] 244 245 return response.strip() 246 247 248def code(prompt: str) -> str: 249 """Complete a code prompt 250 251 This assumes that users are expecting Python completions. Default models 252 are fine-tuned on Python where applicable. 253 254 :param prompt: Code context to complete 255 :return: Completion returned from the language model 256 257 Examples: 258 259 >>> code("# Print Hello, world!\\n") 260 'print("Hello, world!")\\n' 261 262 >>> code("def return_4():") 263 '...return 4...' 264 """ 265 return generate([prompt], max_tokens=config["max_tokens"], topk=1, model="code")[0] 266 267 268def extract_answer(question: str, context: str) -> str: 269 """Extract an answer to a `question` from a provided `context` 270 271 The returned answer will always be a substring extracted from `context`. 272 It may not always be a correct or meaningful answer, but it will never be 273 an arbitrary hallucination. 274 275 :param question: A question to answer using knowledge from context 276 :param context: Knowledge used to answer the question 277 :return: Answer to the question. 278 279 Examples: 280 281 >>> context = "There is a green ball and a red box" 282 >>> extract_answer("What color is the ball?", context).lower() 283 '...green...' 284 285 >>> extract_answer("Who created Python?", get_wiki('Python')) #doctest: +SKIP 286 '...Guido van Rossum...' 287 """ 288 289 return generate([f"{context}\n\n{question}"])[0] 290 291 292def classify(doc: str, label1: str, label2: str) -> str: 293 """Performs binary classification on an input 294 295 :param doc: A plain text input document to classify 296 :param label1: The first label to classify against 297 :param label2: The second label to classify against 298 :return: The closest matching class. The return value will always be 299 `label1` or `label2` 300 301 Examples: 302 303 >>> classify("That book was good.","positive","negative") 304 'positive' 305 >>> classify("That movie was terrible.","positive","negative") 306 'negative' 307 """ 308 309 return do( 310 f"Classify as {label1} or {label2}: {doc}\n\nClassification:", 311 choices=[label1, label2], 312 ) 313 314 315def store_doc(doc: str, name: str = "") -> None: 316 """Store document for later retrieval 317 318 :param doc: A plain text document to store. 319 :param name: Optional name for the document. This is used as a chunk prefix. 320 321 Examples: 322 323 >>> store_doc("The sky is blue.") 324 """ 325 docs.store(doc, name) 326 327 328def load_doc(query: str) -> str: 329 """Load a matching document 330 331 A single document that best matches `query` will be returned. 332 333 :param query: Query to compare to stored documents 334 :return: Content of the closest matching document 335 336 Examples: 337 338 >>> store_doc("Paris is in France.") 339 >>> store_doc("The sky is blue.") 340 >>> load_doc("Where is Paris?") 341 'Paris is in France.' 342 """ 343 return docs.get_match(query) 344 345 346def get_doc_context(query: str) -> str: 347 """Loads context from documents 348 349 A string representing the most relevant content from all stored documents 350 will be returned. This may be a blend of chunks from multiple documents. 351 352 :param query: Query to compare to stored documents 353 :return: Up to 128 tokens of context 354 355 Examples: 356 357 >>> store_doc("Paris is in France.") 358 >>> store_doc("Paris is nice.") 359 >>> store_doc("The sky is blue.") 360 >>> get_doc_context("Where is Paris?") 361 'Paris is in France.\\n\\nParis is nice.' 362 """ 363 return docs.get_context(query) 364 365 366def get_wiki(topic: str) -> str: 367 """ 368 Return Wikipedia summary for a topic 369 370 This function ignores the complexity of disambiguation pages and simply 371 returns the first result that is not a disambiguation page 372 373 :param topic: Topic to search for on Wikipedia 374 :return: Text content of the lead section of the most popular matching article 375 376 Examples: 377 378 >>> get_wiki('Python language') 379 'Python is a high-level...' 380 381 >>> get_wiki('Chemistry') 382 'Chemistry is the scientific study...' 383 """ 384 385 url = "https://api.wikimedia.org/core/v1/wikipedia/en/search/title" 386 response = requests.get(url, params={"q": topic, "limit": 5}) 387 response = json.loads(response.text) 388 389 for page in response["pages"]: 390 wiki_result = requests.get( 391 f"https://en.wikipedia.org/w/api.php?action=query&prop=extracts|pageprops&" 392 f"exintro&redirects=1&titles={page['title']}&format=json" 393 ).json() 394 395 first = wiki_result["query"]["pages"].popitem()[1] 396 if "disambiguation" in first["pageprops"]: 397 continue 398 399 summary = first["extract"] 400 401 cutoffs = [ 402 "See_also", 403 "Notes", 404 "References", 405 "Further_reading", 406 "External_links", 407 ] 408 409 for cutoff in cutoffs: 410 summary = summary.split(f'<span id="{cutoff}">', 1)[0] 411 412 summary = re.sub(r"<p>", "\n\n", summary, flags=re.I) 413 summary = re.sub(r"<!\-\-.*?\-\->", "", summary, flags=re.I | re.DOTALL) 414 summary = re.sub(r"<.*?>", "", summary, flags=re.I) 415 summary = re.sub(r"\s*[\n\r]+\s*[\r\n]+[\s\r\n]*", "\n\n", summary, flags=re.I) 416 summary = summary.strip() 417 return summary 418 else: 419 return "No matching wiki page found." 420 421 422def get_weather(latitude, longitude): 423 """Fetch the current weather for a supplied longitude and latitude 424 425 Weather is provided by the US government and this function only supports 426 locations in the United States. 427 428 :param latitude: Latitude value representing this location 429 :param longitude: Longitude value representing this location 430 :return: Plain text description of the current weather forecast 431 432 Examples: 433 434 >>> get_weather(41.8, -87.6) # doctest: +SKIP 435 'Scattered showers and thunderstorms before 1pm with a high of 73.' 436 """ 437 438 res = requests.get(f"https://api.weather.gov/points/{latitude},{longitude}") 439 points = json.loads(res.text) 440 forecast_url = points["properties"]["forecast"] 441 442 res = requests.get(forecast_url) 443 forecast = json.loads(res.text) 444 current = forecast["properties"]["periods"][0] 445 446 return current["detailedForecast"] 447 448 449def get_date() -> str: 450 """Returns the current date and time in natural language 451 452 >>> get_date() # doctest: +SKIP 453 'Friday, May 12, 2023 at 09:27AM' 454 """ 455 456 now = datetime.datetime.now() 457 458 return now.strftime("%A, %B %d, %Y at %I:%M%p") 459 460 461def print_tokens(prompt: str) -> None: 462 """Prints a list of tokens in a prompt 463 464 :param prompt: Prompt to use as input to tokenizer 465 :return: Nothing 466 467 Examples: 468 469 >>> print_tokens("Hello world") 470 ' Hello' (token 8774) 471 ' world' (token 296) 472 473 >>> print_tokens("Hola mundo") 474 ' Hol' (token 5838) 475 'a' (token 9) 476 ' mun' (token 13844) 477 'd' (token 26) 478 'o' (token 32) 479 """ 480 481 tokens = list_tokens(prompt) 482 483 for token in tokens: 484 print(f"'{token[0].replace('▁',' ')}' (token {token[1]})") 485 486 487def count_tokens(prompt: str) -> None: 488 """Counts tokens in a prompt 489 490 :param prompt: Prompt to use as input to tokenizer 491 :return: Nothing 492 493 Examples: 494 495 >>> count_tokens("Hello world") 496 2 497 498 >>> count_tokens("Hola mundo") 499 5 500 """ 501 502 return len(list_tokens(prompt)) 503 504 505def set_max_ram(value): 506 """Sets max allowed RAM 507 508 This value takes priority over environment variables 509 510 Returns the numeric value set in GB 511 512 >>> set_max_ram(16) 513 16.0 514 515 >>> set_max_ram('512mb') 516 0.5 517 """ 518 519 config["max_ram"] = value 520 521 return config["max_ram"] 522 523 524def require_model_license(match_re): 525 """Require models to match supplied regex 526 527 This can be used to enforce certain licensing constraints when using this 528 package. 529 """ 530 config["model_license"] = match_re
20def complete(prompt: str) -> str: 21 """Provide one completion for a given open-ended prompt 22 23 :param prompt: Prompt to use as input to the model 24 :return: Completion returned from the language model 25 26 Examples: 27 28 >>> complete("Luke thought that he") #doctest: +SKIP 29 'was going to be a doctor.' 30 31 >>> complete("There are many mythical creatures who") #doctest: +SKIP 32 'are able to fly' 33 34 >>> complete("She hid in her room until") #doctest: +SKIP 35 'she was sure she was safe' 36 """ 37 38 result = generate( 39 ["Write a sentence"], 40 prefix=prompt, 41 max_tokens=config["max_tokens"], 42 temperature=0.7, 43 topk=40, 44 )[0] 45 46 if result.startswith(prompt): 47 prefix_length = len(prompt) 48 return result[prefix_length:] 49 else: 50 return result
Provide one completion for a given open-ended prompt
Parameters
- prompt: Prompt to use as input to the model
Returns
Completion returned from the language model
Examples:
>>> complete("Luke thought that he") #doctest: +SKIP
'was going to be a doctor.'
>>> complete("There are many mythical creatures who") #doctest: +SKIP
'are able to fly'
>>> complete("She hid in her room until") #doctest: +SKIP
'she was sure she was safe'
63def do(prompt, choices=None): 64 """Follow a single-turn instructional prompt 65 66 :param prompt: Instructional prompt(s) to follow 67 :param choices: If provided, outputs are restricted to values in choices 68 :return: Completion returned from the language model 69 70 Note that this function is overloaded to return a list of results if 71 a list if of prompts is provided and a single string if a single 72 prompt is provided as a string 73 74 Examples: 75 76 >>> do("Translate Spanish to English: Hola mundo!") #doctest: +SKIP 77 'Hello world!' 78 79 >>> do("Pick the planet from the list: baseball, Texas, Saturn") 80 '...Saturn...' 81 82 >>> do("Answer: What is the capital of England?") 83 '...London...' 84 85 >>> do(["Pick the planet from the list: baseball, Texas, Saturn"] * 2) 86 ['...Saturn...', '...Saturn...'] 87 88 >>> do(["Say red", "Say blue"], choices=["red", "blue"]) 89 ['red', 'blue'] 90 91 >>> do("Classify as positive or negative: LLMs are bad", 92 ... choices=["Positive", "Negative"]) 93 'Negative' 94 95 >>> do("Classify as positive or negative: LLMs are great", 96 ... choices=["Positive", "Negative"]) 97 'Positive' 98 """ 99 100 prompts = [prompt] if isinstance(prompt, str) else prompt 101 102 if choices: 103 results = [r[0] for r in rank_instruct(prompts, choices)] 104 else: 105 results = generate(prompts, max_tokens=config["max_tokens"], topk=1) 106 107 for i, result in enumerate(results): 108 if len(result.split()) == 1: 109 results[i] = result.title() 110 111 if result[-1] not in (".", "!", "?"): 112 results[i] = results[i] + "." 113 114 return results[0] if isinstance(prompt, str) else results
Follow a single-turn instructional prompt
Parameters
- prompt: Instructional prompt(s) to follow
- choices: If provided, outputs are restricted to values in choices
Returns
Completion returned from the language model
Note that this function is overloaded to return a list of results if a list if of prompts is provided and a single string if a single prompt is provided as a string
Examples:
>>> do("Translate Spanish to English: Hola mundo!") #doctest: +SKIP
'Hello world!'
>>> do("Pick the planet from the list: baseball, Texas, Saturn")
'...Saturn...'
>>> do("Answer: What is the capital of England?")
'...London...'
>>> do(["Pick the planet from the list: baseball, Texas, Saturn"] * 2)
['...Saturn...', '...Saturn...']
>>> do(["Say red", "Say blue"], choices=["red", "blue"])
['red', 'blue']
>>> do("Classify as positive or negative: LLMs are bad",
... choices=["Positive", "Negative"])
'Negative'
>>> do("Classify as positive or negative: LLMs are great",
... choices=["Positive", "Negative"])
'Positive'
127def embed(doc): 128 """Create embedding for a document 129 130 :param doc: Document(s) to embed 131 :return: Embedding 132 133 Note that this function is overloaded to return a list of embeddings if 134 a list if of docs is provided and a single embedding if a single 135 doc is provided as a string 136 137 Examples: 138 139 >>> embed("Hello, world") 140 [-0.0...] 141 142 >>> embed(["Hello", "world"]) 143 [[-0.0...]] 144 """ 145 146 docs = [doc] if isinstance(doc, str) else doc 147 148 # Create embeddings and convert to lists of floats 149 emb = [[float(n) for n in e] for e in embeddings.embed(docs)] 150 151 return emb[0] if isinstance(doc, str) else emb
Create embedding for a document
Parameters
- doc: Document(s) to embed
Returns
Embedding
Note that this function is overloaded to return a list of embeddings if a list if of docs is provided and a single embedding if a single doc is provided as a string
Examples:
>>> embed("Hello, world")
[-0.0...]
>>> embed(["Hello", "world"])
[[-0.0...]]
154def chat(prompt: str) -> str: 155 """Get new message from chat-optimized language model 156 157 The `prompt` for this model is provided as a series of messages as a single 158 plain-text string. Several special tokens are used to delineate chat 159 messages. 160 161 - `system:` - Indicates the start of a system message providing 162 instructions about how the assistant should behave. 163 - `user:` - Indicates the start of a prompter (typically user) 164 message. 165 - `assistant:` - Indicates the start of an assistant message. 166 167 A complete prompt may look something like this: 168 169 ``` 170 Assistant is helpful and harmless 171 172 User: What is the capital of Germany? 173 174 Assistant: The capital of Germany is Berlin. 175 176 User: How many people live there? 177 178 Assistant: 179 ``` 180 181 The completion from the language model is returned. 182 183 :param message: Prompt using formatting described above 184 :return: Completion returned from the language model 185 186 Examples: 187 188 >>> response = chat(''' 189 ... System: Respond as a helpful assistant. It is 5:00pm. 190 ... 191 ... User: What time is it? 192 ... 193 ... Assistant: 194 ... ''') # doctest: +SKIP 195 "It's 5:00pm." 196 """ 197 198 messages = parse_chat(prompt) 199 200 # Suppress starts of all assistant messages to avoid repeat generation 201 suppress = [ 202 "Assistant: " + m["content"].split(" ")[0] 203 for m in messages 204 if m["role"] in ["assistant", "user"] 205 ] 206 207 # Suppress all user messages to avoid repeating them 208 suppress += [m["content"] for m in messages if m["role"] == "user"] 209 210 system_msgs = [m for m in messages if m["role"] == "system"] 211 assistant_msgs = [m for m in messages if m["role"] == "assistant"] 212 user_msgs = [m for m in messages if m["role"] == "user"] 213 214 # The current model is tuned on instructions and tends to get 215 # lost if it sees too many questions 216 # Use only the most recent user and assistant message for context 217 # Keep all system messages 218 messages = system_msgs + assistant_msgs[-1:] + user_msgs[-1:] 219 220 rolemap = { 221 "system": "System", 222 "user": "Question", 223 "assistant": "Assistant", 224 } 225 226 messages = [f"{rolemap[m['role']]}: {m['content']}" for m in messages] 227 228 prompt = "\n\n".join(messages) + "\n\n" + "Assistant:" 229 230 if prompt.startswith("System:"): 231 prompt = prompt[7:].strip() 232 233 response = generate( 234 [prompt], 235 max_tokens=config["max_tokens"], 236 temperature=0.3, 237 topk=40, 238 prefix="Assistant:", 239 suppress=suppress, 240 )[0] 241 242 # Remove duplicate assistant being generated 243 if response.startswith("Assistant:"): 244 response = response[10:] 245 246 return response.strip()
Get new message from chat-optimized language model
The prompt
for this model is provided as a series of messages as a single
plain-text string. Several special tokens are used to delineate chat
messages.
system:
- Indicates the start of a system message providing instructions about how the assistant should behave.user:
- Indicates the start of a prompter (typically user) message.assistant:
- Indicates the start of an assistant message.
A complete prompt may look something like this:
Assistant is helpful and harmless
User: What is the capital of Germany?
Assistant: The capital of Germany is Berlin.
User: How many people live there?
Assistant:
The completion from the language model is returned.
Parameters
- message: Prompt using formatting described above
Returns
Completion returned from the language model
Examples:
>>> response = chat('''
... System: Respond as a helpful assistant. It is 5:00pm.
...
... User: What time is it?
...
... Assistant:
... ''') # doctest: +SKIP
"It's 5:00pm."
249def code(prompt: str) -> str: 250 """Complete a code prompt 251 252 This assumes that users are expecting Python completions. Default models 253 are fine-tuned on Python where applicable. 254 255 :param prompt: Code context to complete 256 :return: Completion returned from the language model 257 258 Examples: 259 260 >>> code("# Print Hello, world!\\n") 261 'print("Hello, world!")\\n' 262 263 >>> code("def return_4():") 264 '...return 4...' 265 """ 266 return generate([prompt], max_tokens=config["max_tokens"], topk=1, model="code")[0]
Complete a code prompt
This assumes that users are expecting Python completions. Default models are fine-tuned on Python where applicable.
Parameters
- prompt: Code context to complete
Returns
Completion returned from the language model
Examples:
>>> code("# Print Hello, world!\n")
'print("Hello, world!")\n'
>>> code("def return_4():")
'...return 4...'
269def extract_answer(question: str, context: str) -> str: 270 """Extract an answer to a `question` from a provided `context` 271 272 The returned answer will always be a substring extracted from `context`. 273 It may not always be a correct or meaningful answer, but it will never be 274 an arbitrary hallucination. 275 276 :param question: A question to answer using knowledge from context 277 :param context: Knowledge used to answer the question 278 :return: Answer to the question. 279 280 Examples: 281 282 >>> context = "There is a green ball and a red box" 283 >>> extract_answer("What color is the ball?", context).lower() 284 '...green...' 285 286 >>> extract_answer("Who created Python?", get_wiki('Python')) #doctest: +SKIP 287 '...Guido van Rossum...' 288 """ 289 290 return generate([f"{context}\n\n{question}"])[0]
Extract an answer to a question
from a provided context
The returned answer will always be a substring extracted from context
.
It may not always be a correct or meaningful answer, but it will never be
an arbitrary hallucination.
Parameters
- question: A question to answer using knowledge from context
- context: Knowledge used to answer the question
Returns
Answer to the question.
Examples:
>>> context = "There is a green ball and a red box"
>>> extract_answer("What color is the ball?", context).lower()
'...green...'
>>> extract_answer("Who created Python?", get_wiki('Python')) #doctest: +SKIP
'...Guido van Rossum...'
293def classify(doc: str, label1: str, label2: str) -> str: 294 """Performs binary classification on an input 295 296 :param doc: A plain text input document to classify 297 :param label1: The first label to classify against 298 :param label2: The second label to classify against 299 :return: The closest matching class. The return value will always be 300 `label1` or `label2` 301 302 Examples: 303 304 >>> classify("That book was good.","positive","negative") 305 'positive' 306 >>> classify("That movie was terrible.","positive","negative") 307 'negative' 308 """ 309 310 return do( 311 f"Classify as {label1} or {label2}: {doc}\n\nClassification:", 312 choices=[label1, label2], 313 )
Performs binary classification on an input
Parameters
- doc: A plain text input document to classify
- label1: The first label to classify against
- label2: The second label to classify against
Returns
The closest matching class. The return value will always be
label1
orlabel2
Examples:
>>> classify("That book was good.","positive","negative")
'positive'
>>> classify("That movie was terrible.","positive","negative")
'negative'
316def store_doc(doc: str, name: str = "") -> None: 317 """Store document for later retrieval 318 319 :param doc: A plain text document to store. 320 :param name: Optional name for the document. This is used as a chunk prefix. 321 322 Examples: 323 324 >>> store_doc("The sky is blue.") 325 """ 326 docs.store(doc, name)
Store document for later retrieval
Parameters
- doc: A plain text document to store.
- name: Optional name for the document. This is used as a chunk prefix.
Examples:
>>> store_doc("The sky is blue.")
329def load_doc(query: str) -> str: 330 """Load a matching document 331 332 A single document that best matches `query` will be returned. 333 334 :param query: Query to compare to stored documents 335 :return: Content of the closest matching document 336 337 Examples: 338 339 >>> store_doc("Paris is in France.") 340 >>> store_doc("The sky is blue.") 341 >>> load_doc("Where is Paris?") 342 'Paris is in France.' 343 """ 344 return docs.get_match(query)
Load a matching document
A single document that best matches query
will be returned.
Parameters
- query: Query to compare to stored documents
Returns
Content of the closest matching document
Examples:
>>> store_doc("Paris is in France.")
>>> store_doc("The sky is blue.")
>>> load_doc("Where is Paris?")
'Paris is in France.'
347def get_doc_context(query: str) -> str: 348 """Loads context from documents 349 350 A string representing the most relevant content from all stored documents 351 will be returned. This may be a blend of chunks from multiple documents. 352 353 :param query: Query to compare to stored documents 354 :return: Up to 128 tokens of context 355 356 Examples: 357 358 >>> store_doc("Paris is in France.") 359 >>> store_doc("Paris is nice.") 360 >>> store_doc("The sky is blue.") 361 >>> get_doc_context("Where is Paris?") 362 'Paris is in France.\\n\\nParis is nice.' 363 """ 364 return docs.get_context(query)
Loads context from documents
A string representing the most relevant content from all stored documents will be returned. This may be a blend of chunks from multiple documents.
Parameters
- query: Query to compare to stored documents
Returns
Up to 128 tokens of context
Examples:
>>> store_doc("Paris is in France.")
>>> store_doc("Paris is nice.")
>>> store_doc("The sky is blue.")
>>> get_doc_context("Where is Paris?")
'Paris is in France.\n\nParis is nice.'
367def get_wiki(topic: str) -> str: 368 """ 369 Return Wikipedia summary for a topic 370 371 This function ignores the complexity of disambiguation pages and simply 372 returns the first result that is not a disambiguation page 373 374 :param topic: Topic to search for on Wikipedia 375 :return: Text content of the lead section of the most popular matching article 376 377 Examples: 378 379 >>> get_wiki('Python language') 380 'Python is a high-level...' 381 382 >>> get_wiki('Chemistry') 383 'Chemistry is the scientific study...' 384 """ 385 386 url = "https://api.wikimedia.org/core/v1/wikipedia/en/search/title" 387 response = requests.get(url, params={"q": topic, "limit": 5}) 388 response = json.loads(response.text) 389 390 for page in response["pages"]: 391 wiki_result = requests.get( 392 f"https://en.wikipedia.org/w/api.php?action=query&prop=extracts|pageprops&" 393 f"exintro&redirects=1&titles={page['title']}&format=json" 394 ).json() 395 396 first = wiki_result["query"]["pages"].popitem()[1] 397 if "disambiguation" in first["pageprops"]: 398 continue 399 400 summary = first["extract"] 401 402 cutoffs = [ 403 "See_also", 404 "Notes", 405 "References", 406 "Further_reading", 407 "External_links", 408 ] 409 410 for cutoff in cutoffs: 411 summary = summary.split(f'<span id="{cutoff}">', 1)[0] 412 413 summary = re.sub(r"<p>", "\n\n", summary, flags=re.I) 414 summary = re.sub(r"<!\-\-.*?\-\->", "", summary, flags=re.I | re.DOTALL) 415 summary = re.sub(r"<.*?>", "", summary, flags=re.I) 416 summary = re.sub(r"\s*[\n\r]+\s*[\r\n]+[\s\r\n]*", "\n\n", summary, flags=re.I) 417 summary = summary.strip() 418 return summary 419 else: 420 return "No matching wiki page found."
Return Wikipedia summary for a topic
This function ignores the complexity of disambiguation pages and simply returns the first result that is not a disambiguation page
Parameters
- topic: Topic to search for on Wikipedia
Returns
Text content of the lead section of the most popular matching article
Examples:
>>> get_wiki('Python language')
'Python is a high-level...'
>>> get_wiki('Chemistry')
'Chemistry is the scientific study...'
423def get_weather(latitude, longitude): 424 """Fetch the current weather for a supplied longitude and latitude 425 426 Weather is provided by the US government and this function only supports 427 locations in the United States. 428 429 :param latitude: Latitude value representing this location 430 :param longitude: Longitude value representing this location 431 :return: Plain text description of the current weather forecast 432 433 Examples: 434 435 >>> get_weather(41.8, -87.6) # doctest: +SKIP 436 'Scattered showers and thunderstorms before 1pm with a high of 73.' 437 """ 438 439 res = requests.get(f"https://api.weather.gov/points/{latitude},{longitude}") 440 points = json.loads(res.text) 441 forecast_url = points["properties"]["forecast"] 442 443 res = requests.get(forecast_url) 444 forecast = json.loads(res.text) 445 current = forecast["properties"]["periods"][0] 446 447 return current["detailedForecast"]
Fetch the current weather for a supplied longitude and latitude
Weather is provided by the US government and this function only supports locations in the United States.
Parameters
- latitude: Latitude value representing this location
- longitude: Longitude value representing this location
Returns
Plain text description of the current weather forecast
Examples:
>>> get_weather(41.8, -87.6) # doctest: +SKIP
'Scattered showers and thunderstorms before 1pm with a high of 73.'
450def get_date() -> str: 451 """Returns the current date and time in natural language 452 453 >>> get_date() # doctest: +SKIP 454 'Friday, May 12, 2023 at 09:27AM' 455 """ 456 457 now = datetime.datetime.now() 458 459 return now.strftime("%A, %B %d, %Y at %I:%M%p")
Returns the current date and time in natural language
>>> get_date() # doctest: +SKIP
'Friday, May 12, 2023 at 09:27AM'
462def print_tokens(prompt: str) -> None: 463 """Prints a list of tokens in a prompt 464 465 :param prompt: Prompt to use as input to tokenizer 466 :return: Nothing 467 468 Examples: 469 470 >>> print_tokens("Hello world") 471 ' Hello' (token 8774) 472 ' world' (token 296) 473 474 >>> print_tokens("Hola mundo") 475 ' Hol' (token 5838) 476 'a' (token 9) 477 ' mun' (token 13844) 478 'd' (token 26) 479 'o' (token 32) 480 """ 481 482 tokens = list_tokens(prompt) 483 484 for token in tokens: 485 print(f"'{token[0].replace('▁',' ')}' (token {token[1]})")
Prints a list of tokens in a prompt
Parameters
- prompt: Prompt to use as input to tokenizer
Returns
Nothing
Examples:
>>> print_tokens("Hello world")
' Hello' (token 8774)
' world' (token 296)
>>> print_tokens("Hola mundo")
' Hol' (token 5838)
'a' (token 9)
' mun' (token 13844)
'd' (token 26)
'o' (token 32)
488def count_tokens(prompt: str) -> None: 489 """Counts tokens in a prompt 490 491 :param prompt: Prompt to use as input to tokenizer 492 :return: Nothing 493 494 Examples: 495 496 >>> count_tokens("Hello world") 497 2 498 499 >>> count_tokens("Hola mundo") 500 5 501 """ 502 503 return len(list_tokens(prompt))
Counts tokens in a prompt
Parameters
- prompt: Prompt to use as input to tokenizer
Returns
Nothing
Examples:
>>> count_tokens("Hello world")
2
>>> count_tokens("Hola mundo")
5
506def set_max_ram(value): 507 """Sets max allowed RAM 508 509 This value takes priority over environment variables 510 511 Returns the numeric value set in GB 512 513 >>> set_max_ram(16) 514 16.0 515 516 >>> set_max_ram('512mb') 517 0.5 518 """ 519 520 config["max_ram"] = value 521 522 return config["max_ram"]
Sets max allowed RAM
This value takes priority over environment variables
Returns the numeric value set in GB
>>> set_max_ram(16)
16.0
>>> set_max_ram('512mb')
0.5
525def require_model_license(match_re): 526 """Require models to match supplied regex 527 528 This can be used to enforce certain licensing constraints when using this 529 package. 530 """ 531 config["model_license"] = match_re
Require models to match supplied regex
This can be used to enforce certain licensing constraints when using this package.