ai / services /embedding_models /gemini_test.py
Ahmed Tarek
Add application file
61d9463
# from embedding_model import ONNXDistilUSEModel
# from services.embedding_models.embedding_e5_model import ONNXE5Model
from services.vector_db.similarity_model import VectorDB # Changed to absolute import
from services.embedding_models.google_vertex_embedding import GoogleVertexAIEmbeddingModel
import traceback
import os
import time
# Set OpenMP environment variable to avoid runtime conflicts
os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
def main():
data = [
{
"id": "1",
"location": "Cairo",
"description": "Enjoy the cultural festivals in Cairo this weekend!",
"itineraries": [
{
"dayNumber": 1,
"activities": [
"Egyptian Museum guided tour",
"Traditional music performance",
"Nile dinner cruise"
]
}
]
},
{
"id": "2",
"location": "Alexandria",
"description": "Visit the stunning beaches of Alexandria",
"itineraries": [
{
"dayNumber": 1,
"activities": [
"Bibliotheca Alexandrina tour",
"Mediterranean beach visit",
"Seafood dining experience"
]
}
]
},
{
"id": "3",
"location": "Cairo",
"description": "Music and dance night at Cairo Opera House",
"itineraries": [
{
"dayNumber": 1,
"activities": [
"Opera house tour",
"Classical concert",
"Downtown Cairo nightlife"
]
}
]
}
]
updated_data = [{
"id": "1",
"location": "Cairo",
"description": "Updated festival description with new exiting sessions!",
"itineraries": [
{
"dayNumber": 1,
"activities": [
"New activity 1",
"New activity 2",
"Additional activity"
]
}
]
}]
try:
print("Step 1: Starting model initialization...")
model = GoogleVertexAIEmbeddingModel(project="YOUR_PROJECT_ID")
print("Model initialization completed successfully")
print("\nStep 2: Starting vector database initialization...")
vector_db = VectorDB()
print("Vector database initialization completed successfully")
# Correct way to get embeddings (using encode() instead of calling model directly)
print("\nTesting model encoding...")
embeddings = model.encode([item["description"] for item in data])
print("Embeddings shape:", embeddings.shape) # Should be (3, 384) for MiniLM
print("Testing completed successfully")
print("\nStep 3: Starting embeddings storage...")
vector_db.store_embeddings(data, model)
print("Initial data stored successfully")
print("\nStep 4: Updating embeddings...")
vector_db.update_embeddings(updated_data, model)
print("Updates completed successfully")
print("\nStep 5: Starting recommendation test...")
start_time = time.time()
response = vector_db.get_similar_by_ids(["1"], top_k=2)
end_time = time.time()
print(f"Recommendation test completed in {end_time - start_time:.4f} seconds")
if response is None:
raise Exception("No response received from recommendation query")
print("\nResults:")
print("Query:", response.get("query", "No query found"))
print("\nRecommendations:")
for item in response.get("recommendations", []):
print("-" * 50)
print(f"ID: {item['id']}")
print(f"Similarity Score: {1 - item['distance']:.4f}")
print("\nStep 6: Testing delete functionality...")
try:
vector_db.delete_items(["2"])
print("Delete operation completed successfully")
print("\nGetting recommendations after deletion...")
response = vector_db.get_similar_by_ids(["1"], top_k=1)
print("\nUpdated Results:")
for item in response.get("recommendations", []):
print("-" * 50)
print(f"ID: {item['id']}")
print(f"Similarity Score: {1 - item['distance']:.4f}")
except Exception as e:
print(f"Error during deletion test: {str(e)}")
except Exception as e:
print(f"\nError occurred: {str(e)}")
traceback.print_exc()
if __name__ == "__main__":
main()