qwen3-0.6b / app.py
github-actions[bot]
Automatically deploy
f41e246
#
# SPDX-FileCopyrightText: Hadad <[email protected]>
# SPDX-License-Identifier: Apache-2.0
#
import os
from ollama import AsyncClient
import gradio as gr
async def playground(
message,
history,
num_ctx,
temperature,
repeat_penalty,
min_p,
top_k,
top_p,
presence_penalty
):
if not isinstance(message, str) or not message.strip():
yield []
return
client = AsyncClient(
host=os.getenv("OLLAMA_API_BASE_URL"),
headers={
"Authorization": f"Bearer {os.getenv('OLLAMA_API_KEY')}"
}
)
messages = []
for item in history:
if isinstance(item, dict) and "role" in item and "content" in item:
messages.append({
"role": item["role"],
"content": item["content"]
})
messages.append({"role": "user", "content": message})
response = ""
async for part in await client.chat(
model="qwen3:0.6b",
messages=messages,
options={
"num_ctx": int(num_ctx),
"temperature": float(temperature),
"repeat_penalty": float(repeat_penalty),
"min_p": float(min_p),
"top_k": int(top_k),
"top_p": float(top_p),
"presence_penalty": float(presence_penalty)
},
stream=True
):
response += part.get("message", {}).get("content", "")
yield response
with gr.Blocks(
fill_height=True,
fill_width=True
) as app:
with gr.Sidebar():
gr.Markdown("## Ollama Playground by UltimaX Intelligence")
gr.HTML(
"""
This space run the <b><a href=
"https://huggingface.co/Qwen/Qwen3-0.6B"
target="_blank">Qwen 3 (0.6B)</a></b> model from
<b>Alibaba Cloud</b>, hosted on a server using <b>Ollama</b>
and accessed via the <b>Ollama Python SDK</b>.<br><br>
Official <b>documentation</b> for using Ollama with the
Python SDK can be found
<b><a href="https://github.com/ollama/ollama-python"
target="_blank">here</a></b>.<br><br>
Qwen 3 (0.6B) runs entirely on a <b>dual-core CPU</b>.
Thanks to its small size, the model can
operate efficiently on minimal hardware.<br><br>
The Qwen 3 (0.6B) model can also be viewed or downloaded
from the official Ollama website
<b><a href="https://ollama.com/library/qwen3:0.6b"
target="_blank">here</a></b>.<br><br>
<b>Like this project? You can support me by buying a
<a href="https://ko-fi.com/hadad" target="_blank">
coffee</a></b>.
"""
)
gr.Markdown("---")
gr.Markdown("## Model Parameters")
num_ctx = gr.Slider(
minimum=512,
maximum=1024,
value=512,
step=128,
label="Context Length (num_ctx)",
info="Maximum context window size. Limited to CPU usage."
)
gr.Markdown("")
temperature = gr.Slider(
minimum=0.1,
maximum=2.0,
value=0.6,
step=0.1,
label="Temperature",
info="Controls randomness in generation"
)
gr.Markdown("")
repeat_penalty = gr.Slider(
minimum=0.1,
maximum=2.0,
value=1.0,
step=0.1,
label="Repeat Penalty",
info="Penalty for repeating tokens"
)
gr.Markdown("")
min_p = gr.Slider(
minimum=0.0,
maximum=1.0,
value=0.00,
step=0.01,
label="Min P",
info="Minimum probability threshold"
)
gr.Markdown("")
top_k = gr.Slider(
minimum=0,
maximum=100,
value=20,
step=1,
label="Top K",
info="Number of top tokens to consider"
)
gr.Markdown("")
top_p = gr.Slider(
minimum=0.0,
maximum=1.0,
value=0.95,
step=0.05,
label="Top P",
info="Cumulative probability threshold"
)
gr.Markdown("")
presence_penalty = gr.Slider(
minimum=0.0,
maximum=2.0,
value=1.5,
step=0.1,
label="Presence Penalty",
info="Penalty for introducing new tokens"
)
gr.ChatInterface(
fn=playground,
additional_inputs=[
num_ctx,
temperature,
repeat_penalty,
min_p,
top_k,
top_p,
presence_penalty
],
chatbot=gr.Chatbot(
label="Ollama | Qwen 3 (0.6B)",
type="messages",
show_copy_button=True,
allow_tags=["think"],
scale=1
),
type="messages",
examples=[
["Please introduce yourself."],
["What caused World War II?"],
["Give me a short introduction to large language model."],
["Explain about quantum computers."]
],
cache_examples=False,
show_api=False
)
app.launch(
server_name="0.0.0.0",
pwa=True
)