✅ Create an index
✅ Convert a document into embeddings
✅ Upsert vectors with IDs
✅ Query similar content
Make sure you have the following installed:
pip install pinecone-client langchain openai
Also, set your environment variables (via .env
or manually):
export PINECONE_API_KEY=your_pinecone_key
export OPENAI_API_KEY=your_openai_key
import os
import pinecone
from pinecone import ServerlessSpec
# Load your API key
pinecone.init(api_key=os.getenv("PINECONE_API_KEY"))
# Define index name and embedding dimension
index_name = "demo-index"
dimension = 1536 # For OpenAI embeddings
# Check if it exists
if index_name not in [i.name for i in pinecone.list_indexes()]:
pinecone.create_index(
name=index_name,
dimension=dimension,
metric="cosine",
spec=ServerlessSpec(cloud="aws", region="us-east-1"),
)
# Connect to the index
index = pinecone.Index(index_name)
from langchain.embeddings import OpenAIEmbeddings
embedding_model = OpenAIEmbeddings()
texts = [
"AI is transforming the world.",
"Pinecone helps you do similarity search.",
"LangChain connects LLMs with data and tools.",
]
# Embed texts
embeddings = embedding_model.embed_documents(texts)
# Create ID-value pairs
to_upsert = [
{"id": f"text-{i}", "values": vector, "metadata": {"content": texts[i]}}
for i, vector in enumerate(embeddings)
]
# Upload to Pinecone
index.upsert(vectors=to_upsert)
query_text = "What does Pinecone do?"
query_embedding = embedding_model.embed_query(query_text)
# Search
result = index.query(vector=query_embedding, top_k=2, include_metadata=True)
# Display result
for match in result.matches:
print(f"Score: {match.score:.2f}")
print("Matched content:", match.metadata['content'])
# Delete the index if needed
# pinecone.delete_index(index_name)
Score: 0.87
Matched content: Pinecone helps you do similarity search.
Score: 0.63
Matched content: LangChain connects LLMs with data and tools.