Spaces:
Sleeping
Sleeping
| import os | |
| import numpy as np | |
| from google.cloud import aiplatform | |
| # --- Configuration --- | |
| PROJECT_ID = "your-gcp-project-id" # Replace with your Google Cloud project ID | |
| REGION = "your-gcp-region" # Replace with your Google Cloud region (e.g., "us-central1") | |
| EMBEDDING_MODEL_NAME = "textembedding-gecko@004" # Or the latest version | |
| # --- Initialize Vertex AI --- | |
| aiplatform.init(project=PROJECT_ID, location=REGION) | |
| class GoogleTextEmbedding004: | |
| def __init__(self, model_name=EMBEDDING_MODEL_NAME): | |
| self.model_name = model_name | |
| self.embedding_model = aiplatform.TextEmbeddingModel.from_pretrained(self.model_name) | |
| print(f"[INFO] Loaded Google Text Embedding Model: {self.model_name}") | |
| def encode(self, texts): | |
| """ | |
| Generates embeddings for the given list of texts using the Google Text Embedding API. | |
| Args: | |
| texts (list of str): A list of strings to embed. | |
| Returns: | |
| numpy.ndarray: A 2D numpy array where each row represents the embedding | |
| for the corresponding input text. | |
| """ | |
| embeddings_response = self.embedding_model.get_embeddings(texts) | |
| embeddings = np.array([e.values for e in embeddings_response]) | |
| return embeddings | |
| if __name__ == "__main__": | |
| # Example Usage | |
| google_embedder = GoogleTextEmbedding004() | |
| sentences = [ | |
| "This is a sample sentence for embedding.", | |
| "Another example text to generate a vector for.", | |
| "Google's powerful language models." | |
| ] | |
| embeddings = google_embedder.encode(sentences) | |
| print("Embeddings shape:", embeddings.shape) | |
| print("Embeddings (first 2 vectors):\n", embeddings[:2]) |