Merge pull request #53 from valueonag/feat/power-bi-tool
feat: add valueon powerbi tool
This commit is contained in:
commit
e8e3b0c0db
1 changed files with 362 additions and 0 deletions
|
|
@ -0,0 +1,362 @@
|
|||
"""Power BI Query Tool for LangGraph.
|
||||
|
||||
This tool provides DAX query capabilities for Power BI datasets
|
||||
via the Power BI REST API. Only read-only queries are allowed.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import asyncio
|
||||
import os
|
||||
import re
|
||||
import functools
|
||||
from typing import Annotated
|
||||
|
||||
import anyio
|
||||
import httpx
|
||||
from langchain_core.tools import tool
|
||||
from msal import ConfidentialClientApplication, SerializableTokenCache
|
||||
|
||||
from modules.shared.configuration import APP_CONFIG
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Configuration constants - encapsulated in this file
|
||||
POWERBI_DATASET_ID = APP_CONFIG.get("VALUEON_POWERBI_DATASET_ID", "")
|
||||
POWERBI_CLIENT_ID = APP_CONFIG.get("VALUEON_POWERBI_CLIENT_ID", "")
|
||||
POWERBI_CLIENT_SECRET = APP_CONFIG.get("VALUEON_POWERBI_CLIENT_SECRET", "")
|
||||
POWERBI_TENANT_ID = APP_CONFIG.get("VALUEON_POWERBI_TENANT_ID", "")
|
||||
POWERBI_BASE_URL = "https://api.powerbi.com/v1.0/myorg"
|
||||
POWERBI_AUTHORITY_BASE = "https://login.microsoftonline.com"
|
||||
POWERBI_SCOPE = ["https://analysis.windows.net/powerbi/api/.default"]
|
||||
|
||||
# Limit results to prevent excessive context usage
|
||||
MAX_ROWS_LIMIT = 100
|
||||
|
||||
|
||||
def _validate_environment() -> tuple[bool, str]:
|
||||
"""Validate that all required environment variables are set.
|
||||
|
||||
Returns:
|
||||
A tuple of (is_valid, error_message)
|
||||
"""
|
||||
missing = []
|
||||
if not POWERBI_DATASET_ID:
|
||||
missing.append("POWERBI_DATASET_ID")
|
||||
if not POWERBI_CLIENT_ID:
|
||||
missing.append("POWERBI_CLIENT_ID")
|
||||
if not POWERBI_CLIENT_SECRET:
|
||||
missing.append("POWERBI_CLIENT_SECRET")
|
||||
if not POWERBI_TENANT_ID:
|
||||
missing.append("POWERBI_TENANT_ID")
|
||||
|
||||
if missing:
|
||||
return False, f"Missing required environment variables: {', '.join(missing)}"
|
||||
|
||||
return True, ""
|
||||
|
||||
|
||||
def _validate_dax_query(*, dax_query: str) -> tuple[bool, str]:
|
||||
"""Validate that the query is a valid DAX query.
|
||||
|
||||
Args:
|
||||
dax_query: The DAX query to validate
|
||||
|
||||
Returns:
|
||||
A tuple of (is_valid, error_message)
|
||||
"""
|
||||
# Remove leading/trailing whitespace
|
||||
normalized_query = dax_query.strip()
|
||||
|
||||
if not normalized_query:
|
||||
return False, "Query cannot be empty"
|
||||
|
||||
# DAX queries typically start with EVALUATE, DEFINE, or are table expressions
|
||||
# We'll be lenient and just check it's not trying to do something dangerous
|
||||
# DAX is read-only by nature, but we validate structure
|
||||
|
||||
# Check for minimum length
|
||||
if len(normalized_query) < 5:
|
||||
return False, "Query is too short to be valid"
|
||||
|
||||
return True, ""
|
||||
|
||||
|
||||
def _get_access_token_sync(
|
||||
*,
|
||||
tenant_id: str,
|
||||
client_id: str,
|
||||
client_secret: str,
|
||||
authority_base: str = POWERBI_AUTHORITY_BASE,
|
||||
cache: SerializableTokenCache | None = None,
|
||||
) -> str:
|
||||
"""Get Power BI access token using MSAL (synchronous).
|
||||
|
||||
Args:
|
||||
tenant_id: Azure AD tenant ID
|
||||
client_id: Application client ID
|
||||
client_secret: Application client secret
|
||||
authority_base: Azure AD authority base URL
|
||||
cache: Optional token cache for reuse
|
||||
|
||||
Returns:
|
||||
Access token string
|
||||
|
||||
Raises:
|
||||
RuntimeError: If token acquisition fails
|
||||
"""
|
||||
authority = f"{authority_base}/{tenant_id}"
|
||||
|
||||
app = ConfidentialClientApplication(
|
||||
client_id=client_id,
|
||||
authority=authority,
|
||||
client_credential=client_secret,
|
||||
token_cache=cache,
|
||||
)
|
||||
|
||||
# Try cache first; fall back to client credentials
|
||||
result = app.acquire_token_silent(
|
||||
POWERBI_SCOPE, account=None
|
||||
) or app.acquire_token_for_client(scopes=POWERBI_SCOPE)
|
||||
|
||||
if "access_token" not in result:
|
||||
raise RuntimeError(
|
||||
f"MSAL token error: {result.get('error')} - {result.get('error_description')}"
|
||||
)
|
||||
|
||||
return result["access_token"]
|
||||
|
||||
|
||||
async def _get_access_token_async(
|
||||
*,
|
||||
tenant_id: str,
|
||||
client_id: str,
|
||||
client_secret: str,
|
||||
**kwargs,
|
||||
) -> str:
|
||||
"""Get Power BI access token using MSAL (asynchronous).
|
||||
|
||||
Args:
|
||||
tenant_id: Azure AD tenant ID
|
||||
client_id: Application client ID
|
||||
client_secret: Application client secret
|
||||
**kwargs: Additional arguments for _get_access_token_sync
|
||||
|
||||
Returns:
|
||||
Access token string
|
||||
"""
|
||||
# Create a partial function with arguments pre-filled
|
||||
func = functools.partial(
|
||||
_get_access_token_sync,
|
||||
tenant_id=tenant_id,
|
||||
client_id=client_id,
|
||||
client_secret=client_secret,
|
||||
**kwargs,
|
||||
)
|
||||
# Offload the blocking MSAL HTTP call to a worker thread
|
||||
return await anyio.to_thread.run_sync(func)
|
||||
|
||||
|
||||
async def _execute_dax_query(
|
||||
*, dax_query: str, dataset_id: str, access_token: str
|
||||
) -> dict:
|
||||
"""Execute a DAX query against Power BI dataset.
|
||||
|
||||
Args:
|
||||
dax_query: The DAX query to execute
|
||||
dataset_id: Power BI dataset ID
|
||||
access_token: Access token for authentication
|
||||
|
||||
Returns:
|
||||
Dictionary containing query results
|
||||
|
||||
Raises:
|
||||
RuntimeError: If query execution fails
|
||||
"""
|
||||
url = f"{POWERBI_BASE_URL}/datasets/{dataset_id}/executeQueries"
|
||||
|
||||
body = {
|
||||
"queries": [{"query": dax_query}],
|
||||
"serializerSettings": {"includeNulls": True},
|
||||
}
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {access_token}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=60.0) as client:
|
||||
resp = await client.post(url, headers=headers, json=body)
|
||||
|
||||
if resp.status_code != 200:
|
||||
raise RuntimeError(
|
||||
f"Power BI executeQueries failed: {resp.status_code} - {resp.text}"
|
||||
)
|
||||
|
||||
payload = resp.json()
|
||||
|
||||
try:
|
||||
rows = payload["results"][0]["tables"][0]["rows"]
|
||||
except (KeyError, IndexError) as e:
|
||||
raise RuntimeError("Unexpected executeQueries response structure") from e
|
||||
|
||||
# Extract column names from the first row if available
|
||||
if rows:
|
||||
columns = list(rows[0].keys())
|
||||
else:
|
||||
columns = []
|
||||
|
||||
return {"columns": columns, "rows": rows}
|
||||
|
||||
|
||||
def _strip_table_qualifier(*, column_name: str) -> str:
|
||||
"""Strip table qualifier from column name.
|
||||
|
||||
Power BI often returns columns as 'Table[Column]'. This strips to 'Column'.
|
||||
|
||||
Args:
|
||||
column_name: The column name to process
|
||||
|
||||
Returns:
|
||||
Processed column name
|
||||
"""
|
||||
if "[" in column_name and column_name.endswith("]"):
|
||||
return column_name.split("[", 1)[1][:-1]
|
||||
return column_name
|
||||
|
||||
|
||||
def _format_results(*, columns: list[str], rows: list[dict], max_rows: int) -> str:
|
||||
"""Format query results into a readable string.
|
||||
|
||||
Args:
|
||||
columns: List of column names
|
||||
rows: List of row data (as dictionaries)
|
||||
max_rows: Maximum number of rows to display
|
||||
|
||||
Returns:
|
||||
Formatted string representation of the results
|
||||
"""
|
||||
total_rows = len(rows)
|
||||
|
||||
if total_rows == 0:
|
||||
return "Query executed successfully but returned no results."
|
||||
|
||||
# Strip table qualifiers from column names
|
||||
clean_columns = [_strip_table_qualifier(column_name=col) for col in columns]
|
||||
|
||||
# Limit rows to max_rows
|
||||
display_rows = rows[:max_rows]
|
||||
truncated = total_rows > max_rows
|
||||
|
||||
# Calculate column widths
|
||||
col_widths = [len(str(col)) for col in clean_columns]
|
||||
for row in display_rows:
|
||||
for i, col in enumerate(columns):
|
||||
value = row.get(col, "")
|
||||
col_widths[i] = max(col_widths[i], len(str(value)))
|
||||
|
||||
# Build header
|
||||
header_parts = []
|
||||
for col, width in zip(clean_columns, col_widths):
|
||||
header_parts.append(str(col).ljust(width))
|
||||
header = " | ".join(header_parts)
|
||||
separator = "-" * len(header)
|
||||
|
||||
# Build rows
|
||||
row_lines = []
|
||||
for row in display_rows:
|
||||
row_parts = []
|
||||
for col, width in zip(columns, col_widths):
|
||||
value = row.get(col, "")
|
||||
row_parts.append(str(value).ljust(width))
|
||||
row_lines.append(" | ".join(row_parts))
|
||||
|
||||
# Combine all parts
|
||||
result_parts = [
|
||||
f"Query returned {total_rows} row(s):",
|
||||
]
|
||||
|
||||
if truncated:
|
||||
result_parts.append(
|
||||
f"(Results limited to {max_rows} rows for context efficiency)\n"
|
||||
)
|
||||
else:
|
||||
result_parts.append("")
|
||||
|
||||
result_parts.extend([header, separator, "\n".join(row_lines)])
|
||||
|
||||
return "\n".join(result_parts)
|
||||
|
||||
|
||||
@tool
|
||||
async def query_powerbi_data(
|
||||
dax_query: Annotated[str, "The DAX query to execute against the Power BI dataset"],
|
||||
) -> str:
|
||||
"""Execute a DAX query against the Power BI dataset to access warehouse inventory data.
|
||||
|
||||
This tool provides access to a Power BI table called 'data_full' which contains
|
||||
articles available in the warehouse of the user. Use DAX (Data Analysis Expressions)
|
||||
queries to retrieve and analyze this inventory data.
|
||||
|
||||
Available table:
|
||||
- 'data_full': Contains warehouse inventory articles and their details
|
||||
|
||||
Common query patterns:
|
||||
- View all data: EVALUATE 'data_full'
|
||||
- With filter: EVALUATE FILTER('data_full', [Column] = "Value")
|
||||
- Top N rows: EVALUATE TOPN(10, 'data_full', [Column], DESC)
|
||||
- Calculated: EVALUATE SUMMARIZE('data_full', [Column1], "Total", SUM([Column2]))
|
||||
|
||||
Results are limited to 100 rows maximum for efficiency.
|
||||
|
||||
Args:
|
||||
dax_query: The DAX query to execute (e.g., "EVALUATE 'data_full'")
|
||||
|
||||
Returns:
|
||||
A formatted string containing the query results with columns and rows
|
||||
"""
|
||||
try:
|
||||
# Validate environment configuration
|
||||
is_valid_env, error_msg = _validate_environment()
|
||||
if not is_valid_env:
|
||||
logger.error(f"Environment validation failed: {error_msg}")
|
||||
return f"Configuration Error: {error_msg}"
|
||||
|
||||
# Validate the query
|
||||
is_valid_query, error_msg = _validate_dax_query(dax_query=dax_query)
|
||||
if not is_valid_query:
|
||||
logger.warning(f"Invalid query attempt: {dax_query[:100]}...")
|
||||
return f"Query Validation Error: {error_msg}"
|
||||
|
||||
logger.info(f"Executing Power BI query: {dax_query[:100]}...")
|
||||
|
||||
# Get access token
|
||||
access_token = await _get_access_token_async(
|
||||
tenant_id=POWERBI_TENANT_ID,
|
||||
client_id=POWERBI_CLIENT_ID,
|
||||
client_secret=POWERBI_CLIENT_SECRET,
|
||||
)
|
||||
|
||||
# Execute the query
|
||||
result = await _execute_dax_query(
|
||||
dax_query=dax_query,
|
||||
dataset_id=POWERBI_DATASET_ID,
|
||||
access_token=access_token,
|
||||
)
|
||||
|
||||
# Format and return results
|
||||
formatted_output = _format_results(
|
||||
columns=result["columns"], rows=result["rows"], max_rows=MAX_ROWS_LIMIT
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Query completed successfully, returned {len(result['rows'])} row(s)"
|
||||
)
|
||||
return formatted_output
|
||||
|
||||
except RuntimeError as e:
|
||||
logger.error(f"Runtime error in query_powerbi_data tool: {str(e)}")
|
||||
return f"Error executing query: {str(e)}"
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error in query_powerbi_data tool: {str(e)}")
|
||||
return f"Unexpected error: {str(e)}"
|
||||
Loading…
Reference in a new issue