Spaces:
Sleeping
Sleeping
File size: 1,708 Bytes
dbd74e6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
import os
import numpy as np
from google.cloud import aiplatform
# --- Configuration ---
PROJECT_ID = "your-gcp-project-id" # Replace with your Google Cloud project ID
REGION = "your-gcp-region" # Replace with your Google Cloud region (e.g., "us-central1")
EMBEDDING_MODEL_NAME = "textembedding-gecko@004" # Or the latest version
# --- Initialize Vertex AI ---
aiplatform.init(project=PROJECT_ID, location=REGION)
class GoogleTextEmbedding004:
def __init__(self, model_name=EMBEDDING_MODEL_NAME):
self.model_name = model_name
self.embedding_model = aiplatform.TextEmbeddingModel.from_pretrained(self.model_name)
print(f"[INFO] Loaded Google Text Embedding Model: {self.model_name}")
def encode(self, texts):
"""
Generates embeddings for the given list of texts using the Google Text Embedding API.
Args:
texts (list of str): A list of strings to embed.
Returns:
numpy.ndarray: A 2D numpy array where each row represents the embedding
for the corresponding input text.
"""
embeddings_response = self.embedding_model.get_embeddings(texts)
embeddings = np.array([e.values for e in embeddings_response])
return embeddings
if __name__ == "__main__":
# Example Usage
google_embedder = GoogleTextEmbedding004()
sentences = [
"This is a sample sentence for embedding.",
"Another example text to generate a vector for.",
"Google's powerful language models."
]
embeddings = google_embedder.encode(sentences)
print("Embeddings shape:", embeddings.shape)
print("Embeddings (first 2 vectors):\n", embeddings[:2]) |