File size: 1,708 Bytes
dbd74e6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import os
import numpy as np
from google.cloud import aiplatform

# --- Configuration ---
PROJECT_ID = "your-gcp-project-id"  # Replace with your Google Cloud project ID
REGION = "your-gcp-region"  # Replace with your Google Cloud region (e.g., "us-central1")
EMBEDDING_MODEL_NAME = "textembedding-gecko@004"  # Or the latest version

# --- Initialize Vertex AI ---
aiplatform.init(project=PROJECT_ID, location=REGION)

class GoogleTextEmbedding004:
    def __init__(self, model_name=EMBEDDING_MODEL_NAME):
        self.model_name = model_name
        self.embedding_model = aiplatform.TextEmbeddingModel.from_pretrained(self.model_name)
        print(f"[INFO] Loaded Google Text Embedding Model: {self.model_name}")

    def encode(self, texts):
        """
        Generates embeddings for the given list of texts using the Google Text Embedding API.

        Args:
            texts (list of str): A list of strings to embed.

        Returns:
            numpy.ndarray: A 2D numpy array where each row represents the embedding
                           for the corresponding input text.
        """
        embeddings_response = self.embedding_model.get_embeddings(texts)
        embeddings = np.array([e.values for e in embeddings_response])
        return embeddings

if __name__ == "__main__":
    # Example Usage
    google_embedder = GoogleTextEmbedding004()
    sentences = [
        "This is a sample sentence for embedding.",
        "Another example text to generate a vector for.",
        "Google's powerful language models."
    ]
    embeddings = google_embedder.encode(sentences)
    print("Embeddings shape:", embeddings.shape)
    print("Embeddings (first 2 vectors):\n", embeddings[:2])