Use Nvidia NIMs API to Quickly Access small LLMs (Code Only)


Stock King Nvidia

Create an account here for free: Try NVIDIA NIM APIs.
And generate an API Key for free!

Call Google GEMMA 7b Model

rom openai import OpenAI

client = OpenAI(
base_url = "",
api_key = "$API_KEY_REQUIRED_IF_EXECUTING_OUTSIDE_NGC" # Replace with your api key!

completion =
messages=[{"role":"user","content":"Hi, Its a me, Mario!"}],

for chunk in completion:
if chunk.choices[0].delta.content is not None:
print(chunk.choices[0].delta.content, end="")

Call Google PaliGemma Model

import requests, base64

# Initial part just to grab image
with open("dog.jpeg", "rb") as f:
image_b64 = base64.b64encode(

assert len(image_b64) < 180_000, \
"To upload larger images, use the assets API (see docs)"

# Invoke Model

invoke_url = ""
stream = True

headers = {
"Authorization": "Bearer $API_KEY_REQUIRED_IF_EXECUTING_OUTSIDE_NGC", # Replace with API key keep 'Bearer'
"Accept": "text/event-stream" if stream else "application/json"

payload = {
"messages": [