Python
from openai import OpenAI
client = OpenAI(api_key=os.environ['OPENAI_API_KEY'])
def get_embedding(text, model="text-embedding-3-small"):
text = text.replace("\n", " ")
res = client.embeddings.create(input = [text], model=model)
return res.data[0].embedding
# https://platform.openai.com/docs/guides/embeddings/obtaining-the-embeddings
Async version:
Python
import asyncio
from openai import AsyncOpenAI
client = AsyncOpenAI(
# This is the default and can be omitted
# api_key=os.environ.get("OPENAI_API_KEY"),
)
async def get_embedding_call(text, model="text-embedding-3-large"):
text = text.replace("\n", " ")
# dimension parameter introduced here
# https://platform.openai.com/docs/api-reference/embeddings/create#embeddings-create-dimensions
# actual model dimensions are listed here
# https://openai.com/blog/new-embedding-models-and-api-updates
# text-embedding-3-large has options: 256, 1024, 3072
return await client.embeddings.create(
input=[text],
model=model,
dimensions=1024
)
async def main():
# your data
some_text = ["kale", "brok", "cali"]
# create a list of awaitables
calls = [get_embedding_call(text) for text in some_text]
# gather and await
response = await asyncio.gather(*calls)
# extract the important part (the embedding)
embeddings = response.map(lambda x: x.data[0].embedding)
return embeddings
embeddings = asyncio.run(main())
Sources
- https://platform.openai.com/docs/guides/embeddings
- https://github.com/openai/openai-cookbook/blob/main/examples/Semantic_text_search_using_embeddings.ipynb