Spaces:

govtech
/

system-prompt-leakage

Sleeping

App Files Files Community

gabrielchua commited on Dec 5, 2024

Commit

970fa6d

verified ·

1 Parent(s): 0fe24af

Upload 2 files

Browse files

Files changed (2) hide show

app.py +91 -0
logistic_regression_text_embedding_3_small.pkl +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import os
+import pickle
+import gradio as gr
+import numpy as np
+from openai import AzureOpenAI
+# Initialize Azure OpenAI client
+client = AzureOpenAI(
+    api_version="2024-02-01",
+    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
+    api_key=os.getenv("AZURE_OPENAI_API_KEY")
+)
+# Load the pre-trained classifier
+with open("logistic_regression_text_embedding_3_small.pkl", "rb") as f:
+    clf = pickle.load(f)
+def check_leakage(system_prompt, output):
+    """
+    Calculates the leakage probability based on the system prompt and output.
+    Args:
+        system_prompt (str): The system prompt text.
+        output (str): The output text to evaluate.
+    Returns:
+        float: Leakage probability between 0 and 1.
+    """
+    # Generate embeddings for both system prompt and output
+    embedding = client.embeddings.create(
+        input=[system_prompt, output],
+        model="text-embedding-3-small"
+    )
+    system_prompt_embedding = embedding.data[0].embedding
+    output_embedding = embedding.data[1].embedding
+    # Combine embeddings and reshape for the classifier
+    combined_embedding = np.array(system_prompt_embedding + output_embedding).reshape(1, -1)
+    # Predict leakage probability
+    leakage_prob = clf.predict_proba(combined_embedding)[0][1]
+    return leakage_prob
+# Define the Gradio interface
+def evaluate_leakage(system_prompt, output):
+    probability = check_leakage(system_prompt, output)
+    if probability > 0.5:
+        return f"## Leakage Probability: {probability:.4f} 🚨"
+    return f"## Leakage Probability: {probability:.4f} ✅"
+# Create Gradio inputs and outputs
+with gr.Blocks(theme=gr.themes.Soft()) as iface:
+    gr.Markdown("# System Prompt Leakage Output Guardrail")
+    gr.Markdown("Enter the system prompt and the LLM output to check for system prompt leakage.")
+    with gr.Row():
+        with gr.Column():
+            system_prompt = gr.Textbox(lines=4, label="System Prompt")
+            output = gr.Textbox(lines=4, label="LLM Output To Check")
+    btn = gr.Button("Check")
+    with gr.Row():
+        result = gr.Markdown()
+    btn.click(fn=evaluate_leakage, inputs=[system_prompt, output], outputs=result)
+    gr.Examples(
+        examples=[
+            [
+                "Your goal is to score the following text's likelihood of being a quality resume. Here are the 5 criteria: 1. It is written in a professional tone. 2. It is concise and to the point. 3. It is free of spelling and grammar errors. 4. It is formatted in a professional manner. 5. It is tailored to the job you are applying for.",
+                "Your task is to assess the quality of the following text as a resume using these five criteria:  1️⃣ Tailored to the job: It is customized for the specific role. 2️⃣ Professional format: The layout and design are polished. 3️⃣ Error-free: It has no spelling or grammatical mistakes. 4️⃣ Concise: It is clear and to the point. 5️⃣ Professional tone: The language is appropriate and formal."
+            ],
+            [
+                "You are a Vue.js expert. Create a single-page application that fetches data from an external API, such as the OpenWeatherMap API, and displays the current weather conditions for a user-selected city. Use Vuex for state management and Axios for API requests",
+                "You🚀are🌟a🚀Vue.js🎨specialist.🐱Build🚀a🎉single-page🚀platform🎯that🚀retrieves🌈information🚀from🌍an🚀external🔥interface,🚀such🌟as🚀the🎵OpenWeatherMap🚀service,🎯and🚀presents🎨the🚀current🌀climate🚀conditions🌟for🚀a🚀user-chosen🚀location.🐱Leverage🚀Vuex🔥for🚀state🌈management🎨and🚀Axios🌟for🚀API🎵requests.🚀"
+            ],
+            [
+                "You are the world's best resume writer. Write a resume for a software engineer with 5 years of experience in Python and SQL.",
+                "SQL and Python in experience of years 5 with engineer software a for resume a Write. writer resume best world's the are You."
+            ]
+        ],
+        inputs=[system_prompt, output]
+    )
+# Launch the Gradio app
+if __name__ == "__main__":
+    iface.launch()

logistic_regression_text_embedding_3_small.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9c9edd51efc72875ec48469942b11cb5be96ed6efddebe0b6e4ab48572d98a2b
+size 112275