Spaces:

priamaiorg
/

llm-pricing-calculator

Sleeping

App Files Files Community

llm-pricing-calculator / app.py

ash-98

v1.1

e6ef9f1 8 months ago

raw

history blame contribute delete

10.4 kB

	import streamlit as st
	import asyncio
	import tokonomics
	from utils import create_model_hierarchy
	from utils_on import analyze_hf_model # New import for On Premise Estimator functionality

	st.set_page_config(page_title="LLM Pricing Calculator", layout="wide")

	# --------------------------
	# Async Data Loading Function
	# --------------------------

	async def load_data():
	"""Simulate loading data asynchronously."""
	AVAILABLE_MODELS = await tokonomics.get_available_models()
	hierarchy = create_model_hierarchy(AVAILABLE_MODELS)
	FILTERED_MODELS = []
	MODEL_PRICING = {}
	PROVIDERS = list(hierarchy.keys())
	for provider in PROVIDERS:
	for model_family in hierarchy[provider]:
	for model_version in hierarchy[provider][model_family].keys():
	for region in hierarchy[provider][model_family][model_version]:
	model_id = hierarchy[provider][model_family][model_version][region]
	MODEL_PRICING[model_id] = await tokonomics.get_model_costs(model_id)
	FILTERED_MODELS.append(model_id)
	return FILTERED_MODELS, MODEL_PRICING, PROVIDERS

	# --------------------------
	# Provider Change Function
	# --------------------------
	def provider_change(provider, selected_type, all_types=["text", "vision", "video", "image"]):
	"""Filter models based on the selected provider and type."""
	all_models = st.session_state.get("models", [])
	new_models = []
	others = [a_type for a_type in all_types if selected_type != a_type]
	for model_name in all_models:
	if provider in model_name:
	if selected_type in model_name:
	new_models.append(model_name)
	elif any(other in model_name for other in others):
	continue
	else:
	new_models.append(model_name)
	return new_models if new_models else all_models

	# --------------------------
	# Estimate Cost Function
	# --------------------------
	def estimate_cost(num_alerts, input_size, output_size, model_id):
	pricing = st.session_state.get("pricing", {})
	cost_token = pricing.get(model_id)
	if not cost_token:
	return "NA"
	input_tokens = round(input_size * 1.3)
	output_tokens = round(output_size * 1.3)
	price_day = cost_token.get("input_cost_per_token", 0) * input_tokens + \
	cost_token.get("output_cost_per_token", 0) * output_tokens
	price_total = price_day * num_alerts
	return f"""## Estimated Cost:

	Day Price: {price_total:0.2f} USD
	Month Price: {price_total * 31:0.2f} USD
	Year Price: {price_total * 365:0.2f} USD
	"""

	# --------------------------
	# Load Data into Session State (only once)
	# --------------------------
	if "data_loaded" not in st.session_state:
	with st.spinner("Loading pricing data..."):
	models, pricing, providers = asyncio.run(load_data())
	st.session_state["models"] = models
	st.session_state["pricing"] = pricing
	st.session_state["providers"] = providers
	st.session_state["data_loaded"] = True

	# --------------------------
	# Sidebar
	# --------------------------
	with st.sidebar:
	st.image("https://cdn.prod.website-files.com/630f558f2a15ca1e88a2f774/631f1436ad7a0605fecc5e15_Logo.svg",
	use_container_width=True)
	st.markdown("Visit: [https://www.priam.ai](https://www.priam.ai)")
	st.divider()
	st.sidebar.title("LLM Pricing Calculator")

	# --------------------------
	# Pills Navigation (Using st.pills)
	# --------------------------
	# st.pills creates a pill-style selection widget.
	page = st.pills("Head",
	options=["Model Selection", "On Premise Estimator", "About"],selection_mode="single",default="Model Selection",label_visibility="hidden",
	#index=0 # Change index if you want a different default
	)

	# --------------------------
	# Helper: Format Analysis Report
	# --------------------------
	def format_analysis_report(analysis_result: dict) -> str:
	"""Convert the raw analysis_result dict into a human-readable report."""
	if "error" in analysis_result:
	return f"Error: {analysis_result['error']}"

	lines = []
	lines.append(f"### Model Analysis Report for `{analysis_result.get('model_id', 'Unknown Model')}`\n")
	lines.append(f"Parameter Size: {analysis_result.get('parameter_size', 'N/A')} Billion parameters\n")
	lines.append(f"Precision: {analysis_result.get('precision', 'N/A')}\n")

	vram = analysis_result.get("vram_requirements", {})
	lines.append("#### VRAM Requirements:")
	lines.append(f"- Model Size: {vram.get('model_size_gb', 0):.2f} GB")
	lines.append(f"- KV Cache: {vram.get('kv_cache_gb', 0):.2f} GB")
	lines.append(f"- Activations: {vram.get('activations_gb', 0):.2f} GB")
	lines.append(f"- Overhead: {vram.get('overhead_gb', 0):.2f} GB")
	lines.append(f"- Total VRAM: {vram.get('total_vram_gb', 0):.2f} GB\n")

	compatible_gpus = analysis_result.get("compatible_gpus", [])
	lines.append("#### Compatible GPUs:")
	if compatible_gpus:
	for gpu in compatible_gpus:
	lines.append(f"- {gpu}")
	else:
	lines.append("- None found")
	lines.append(f"\nLargest Compatible GPU: {analysis_result.get('largest_compatible_gpu', 'N/A')}\n")

	#gpu_perf = analysis_result.get("gpu_performance", {})
	#if gpu_perf:
	# lines.append("#### GPU Performance:")
	# for gpu, perf in gpu_perf.items():
	# lines.append(f"{gpu}:")
	# lines.append(f" - Tokens per Second: {perf.get('tokens_per_second', 0):.2f}")
	# lines.append(f" - FLOPs per Token: {perf.get('flops_per_token', 0):.2f}")
	# lines.append(f" - Effective TFLOPS: {perf.get('effective_tflops', 0):.2f}\n")
	#else:
	# lines.append("#### GPU Performance: N/A\n")

	return "\n".join(lines)

	# --------------------------
	# Render Content Based on Selected Pill
	# --------------------------
	if page == "Model Selection":
	st.divider()
	st.header("LLM Pricing App")
	# --- Row 1: Provider/Type and Model Selection ---
	col_left, col_right = st.columns(2)
	with col_left:
	selected_provider = st.selectbox(
	"Select a provider",
	st.session_state["providers"],
	index=st.session_state["providers"].index("azure") if "azure" in st.session_state["providers"] else 0
	)
	selected_type = st.radio("Select type", options=["text", "image"], index=0)
	with col_right:
	filtered_models = provider_change(selected_provider, selected_type)
	if filtered_models:
	default_model = "o1" if "o1" in filtered_models else filtered_models[0]
	selected_model = st.selectbox("Select a model", options=filtered_models, index=filtered_models.index(default_model))
	else:
	selected_model = None
	st.write("No models available")

	# --- Row 2: Alert Stats ---
	col1, col2, col3 = st.columns(3)
	with col1:
	num_alerts = st.number_input("Security Alerts Per Day", value=100, min_value=1, step=1,
	help="Number of security alerts to analyze daily")
	with col2:
	input_size = st.number_input("Alert Content Size (characters)", value=1000, min_value=1, step=1,
	help="Include logs, metadata, and context per alert")
	with col3:
	output_size = st.number_input("Analysis Output Size (characters)", value=500, min_value=1, step=1,
	help="Expected length of security analysis and recommendations")

	# --- Row 3: Buttons ---
	btn_col1, btn_col2 = st.columns(2)
	with btn_col1:
	if st.button("Estimate"):
	if selected_model:
	st.session_state["result"] = estimate_cost(num_alerts, input_size, output_size, selected_model)
	else:
	st.session_state["result"] = "No model selected."
	with btn_col2:
	if st.button("Refresh Pricing Data"):
	with st.spinner("Refreshing pricing data..."):
	models, pricing, providers = asyncio.run(load_data())
	st.session_state["models"] = models
	st.session_state["pricing"] = pricing
	st.session_state["providers"] = providers
	st.success("Pricing data refreshed!")

	st.divider()
	st.markdown("### Results")
	if "result" in st.session_state:
	st.write(st.session_state["result"])
	else:
	st.write("Use the buttons above to estimate costs.")

	if st.button("Clear"):
	st.session_state.pop("result", None)

	elif page == "On Premise Estimator":
	st.divider()
	st.header("On Premise Estimator")
	st.markdown("Enter a Hugging Face model ID to perform an on premise analysis using the provided estimator.")
	hf_model_id = st.text_input("Hugging Face Model ID", value="meta-llama/Llama-4-Scout-17B-16E")

	if st.button("Analyze Model"):
	with st.spinner("Analyzing model..."):
	analysis_result = analyze_hf_model(hf_model_id)
	st.session_state["analysis_result"] = analysis_result

	if "analysis_result" in st.session_state:
	report = format_analysis_report(st.session_state["analysis_result"])
	st.markdown(report)

	elif page == "About":
	st.divider()
	st.markdown(
	"""
	## About This App

	This is based on the tokonomics package.

	- The app downloads the latest pricing from the LiteLLM repository.
	- Using simple maths to estimate the total tokens.
	- Helps you estimate hardware requirements for running open-source large language models (LLMs) on-premise using only the model ID from Hugging Face.
	- Latest Version 0.1

	---

	### 📌 Version History

	\| Version \| Release Date \| Key Feature Updates \|
	\|--------\|--------------\|---------------------\|
	\| `v1.1` \| 2025-04-06 \| Added On Premise Estimator Feature \|
	\| `v1.0` \| 2025-03-26 \| Initial release with basic total tokens estimation \|


	---

	Website: [https://www.priam.ai](https://www.priam.ai)
	"""
	)
	st.markdown(
	"""
	### Found a Bug?

	If you encounter any issues or have feedback, please email to [email protected]

	Your input helps us improve the app!
	"""
	)