To continue from:
Image Generation
import os
import shutil
import requests
from openai import OpenAI
client = OpenAI()
response = client.images.generate(
model="dall-e-3",
prompt="a white siamese cat",
size="1792x1024",
quality="hd",
n=1
)
url = response.data[0].url
output_image_file = 'cat.png'
r = requests.get(url, stream=True)
if r.status_code == 200:
with open(output_image_file, 'wb') as f:
r.raw.decode_content = True
shutil.copyfileobj(r.raw, f)
Image Recognition / Vision
import os
import base64
import requests
from openai import OpenAI
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
base64_image = encode_image('cat.png')
client = OpenAI()
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "What’s in this image?"},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
},
],
}
],
max_tokens=300,
)
print(response.choices[0])
Image Comparison / Vision
import os
import base64
from openai import OpenAI
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
base64_image1 = encode_image('data/sample2.png')
base64_image2 = encode_image('data/sample3.png')
client = OpenAI()
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "What are in these images? Is there any difference between them?"},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image1}"
}
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image2}"
}
},
],
}
],
max_tokens=300,
)
print(response.choices[0])
Text to Speech
import os
from openai import OpenAI
client = OpenAI()
speech_file_path = "speech.mp3"
response = client.audio.speech.create(
model="tts-1",
voice="alloy", # alloy, echo, fable, onyx, nova, and shimmer
input="Today is a wonderful day to build something people love!"
)
response.write_to_file(speech_file_path)
Speech to Text
import os
from openai import OpenAI
client = OpenAI()
speech_file_path = "speech.mp3"
audio_file= open(speech_file_path, "rb")
transcription = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file
)
print(transcription.text)
For assistant examples, please continue with below article:
Enjoy!