Use Nvidia NIMs API to Quickly Access small LLMs (Code Only)

--

Stock King Nvidia

Create an account here for free: Try NVIDIA NIM APIs.
And generate an API Key for free!

Call Google GEMMA 7b Model

rom openai import OpenAI

client = OpenAI(
base_url = "https://integrate.api.nvidia.com/v1",
api_key = "$API_KEY_REQUIRED_IF_EXECUTING_OUTSIDE_NGC" # Replace with your api key!
)

completion = client.chat.completions.create(
model="google/gemma-7b",
messages=[{"role":"user","content":"Hi, Its a me, Mario!"}],
temperature=0.5,
top_p=1,
max_tokens=1024,
stream=True
)

for chunk in completion:
if chunk.choices[0].delta.content is not None:
print(chunk.choices[0].delta.content, end="")

Call Google PaliGemma Model


import requests, base64

# Initial part just to grab image
with open("dog.jpeg", "rb") as f:
image_b64 = base64.b64encode(f.read()).decode()

assert len(image_b64) < 180_000, \
"To upload larger images, use the assets API (see docs)"

# Invoke Model

invoke_url = "https://ai.api.nvidia.com/v1/vlm/google/paligemma"
stream = True

headers = {
"Authorization": "Bearer $API_KEY_REQUIRED_IF_EXECUTING_OUTSIDE_NGC", # Replace with API key keep 'Bearer'
"Accept": "text/event-stream" if stream else "application/json"
}

payload = {
"messages": [
{…

--

--