Merge pull request #8 from valueonag/feat/powerbi-measures

feat: support powerbi measures
This commit is contained in:
Christopher Gondek 2025-11-05 08:36:01 +01:00 committed by GitHub
commit e7dd3ea999
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 146 additions and 3 deletions

View file

@ -13,22 +13,69 @@ class PowerBIReader:
table_name: str
base_url: str = settings.POWERBI_BASE_URL
include_nulls: bool = True
measures: list[str] = None
group_by_columns: list[str] = None
@classmethod
async def create(
cls, dataset_id: str, access_token: str, table_name: str, **kwargs
cls,
*,
dataset_id: str,
access_token: str,
table_name: str,
measures: list[str] = None,
group_by_columns: list[str] = None,
**kwargs,
):
return cls(
dataset_id=dataset_id,
access_token=access_token,
table_name=table_name,
measures=measures or [],
group_by_columns=group_by_columns or [],
**kwargs,
)
def _dax_query(self) -> str:
"""Generate DAX query based on configuration.
Generates different DAX queries depending on whether measures and/or
group_by_columns are specified:
1. No measures: EVALUATE 'TableName'
Returns all physical/calculated columns from the table.
2. Measures only: EVALUATE ADDCOLUMNS('TableName', "Measure1", [Measure1], ...)
Returns all columns plus the specified measures.
3. Measures + group_by_columns: EVALUATE SUMMARIZECOLUMNS('Table'[Col1], ..., "Measure1", [Measure1], ...)
Returns aggregated measures grouped by the specified columns.
Returns:
DAX query string to execute against Power BI.
"""
# Escape single quotes in table names per DAX rules
safe = self.table_name.replace("'", "''")
return f"EVALUATE '{safe}'"
safe_table = self.table_name.replace("'", "''")
# Case 1: No measures - simple table evaluation
if not self.measures:
return f"EVALUATE '{safe_table}'"
# Case 2: Measures without grouping - use ADDCOLUMNS
if not self.group_by_columns:
measure_clauses = ", ".join(
[f'"{measure}", [{measure}]' for measure in self.measures]
)
return f"EVALUATE ADDCOLUMNS('{safe_table}', {measure_clauses})"
# Case 3: Measures with grouping - use SUMMARIZECOLUMNS
group_cols = ", ".join(
[f"'{safe_table}'[{col}]" for col in self.group_by_columns]
)
measure_clauses = ", ".join(
[f'"{measure}", [{measure}]' for measure in self.measures]
)
return f"EVALUATE SUMMARIZECOLUMNS({group_cols}, {measure_clauses})"
async def read_data(self) -> pd.DataFrame:
"""

View file

@ -147,6 +147,8 @@ class TableConfig:
name: str
powerbi_table_name: str
measures: List[str] = field(default_factory=list)
group_by_columns: List[str] = field(default_factory=list)
steps: List[Dict[str, Any]] = field(default_factory=list)
@ -184,6 +186,8 @@ class Preprocessor:
table_config = TableConfig(
name=table_data.get("name", ""),
powerbi_table_name=table_data.get("powerbi_table_name", ""),
measures=table_data.get("measures", []),
group_by_columns=table_data.get("group_by_columns", []),
steps=table_data.get("steps", []),
)
table_configs.append(table_config)
@ -253,6 +257,8 @@ class Preprocessor:
table_config = TableConfig(
name=table_data.get("name", ""),
powerbi_table_name=table_data.get("powerbi_table_name", ""),
measures=table_data.get("measures", []),
group_by_columns=table_data.get("group_by_columns", []),
steps=table_data.get("steps", []),
)
table_configs.append(table_config)

View file

@ -50,6 +50,53 @@ async def update_db_with_config(
3. **Saves to local database**: The processed data is saved to the local SQLite
database with the specified table name.
## Power BI Measures Support
In addition to retrieving physical/calculated columns from Power BI tables, you can
now retrieve Power BI measures using the optional `measures` and `group_by_columns`
fields in your table configuration.
### Retrieving Measures
Power BI measures are calculated values that live only in the model and are computed
at query time. To retrieve them alongside your table data, add a `measures` array
to your table configuration:
```json
{
"name": "Einkaufspreis",
"powerbi_table_name": "Einkaufspreis",
"measures": ["EP in CHF", "Gesamtbetrag in CHF"],
"steps": [...]
}
```
This uses the DAX ADDCOLUMNS pattern: `EVALUATE ADDCOLUMNS('TableName', "MeasureName", [MeasureName], ...)`
### Grouping Measures
If your measures need to be aggregated by specific columns, add the `group_by_columns`
field. This is useful when measures are defined with aggregation functions:
```json
{
"name": "Einkaufspreis_Aggregated",
"powerbi_table_name": "Einkaufspreis",
"measures": ["EP in CHF", "Gesamtbetrag in CHF"],
"group_by_columns": ["m_Artikel"],
"steps": [...]
}
```
This uses the DAX SUMMARIZECOLUMNS pattern: `EVALUATE SUMMARIZECOLUMNS('Table'[Column], "MeasureName", [MeasureName], ...)`
### Measure Name Formatting
- Measure names with spaces are automatically handled (e.g., "EP in CHF" becomes `[EP in CHF]` in DAX)
- If `measures` is empty or not provided, the standard table evaluation is used
- If `measures` is provided without `group_by_columns`, ADDCOLUMNS is used
- If both `measures` and `group_by_columns` are provided, SUMMARIZECOLUMNS is used
## Available Preprocessing Steps
The following preprocessing steps are supported. Each step is specified as a

View file

@ -18,6 +18,8 @@ class TableConfigSchema(BaseModel):
Attributes:
name: The name to use for the table in the local SQLite database
powerbi_table_name: The name of the source table in Power BI dataset
measures: Optional list of Power BI measures to retrieve
group_by_columns: Optional list of columns to group by when retrieving measures
steps: List of preprocessing steps to apply to the table data
"""
@ -29,6 +31,16 @@ class TableConfigSchema(BaseModel):
description="Name of the table in the Power BI dataset",
example="data_full",
)
measures: List[str] = Field(
default_factory=list,
description="List of Power BI measure names to retrieve",
example=["EP in CHF", "Gesamtbetrag in CHF"],
)
group_by_columns: List[str] = Field(
default_factory=list,
description="Columns to group by when retrieving measures (triggers SUMMARIZECOLUMNS)",
example=["m_Artikel"],
)
steps: List[Dict[str, Any]] = Field(
default_factory=list,
description="List of preprocessing steps to apply",

View file

@ -54,6 +54,8 @@ class DataProcessorService:
dataset_id=settings.POWERBI_DATASET_ID,
access_token=self.access_token,
table_name=table_config.powerbi_table_name,
measures=table_config.measures,
group_by_columns=table_config.group_by_columns,
)
# Step 2: Read data from Power BI
@ -147,6 +149,8 @@ class DataProcessorService:
dataset_id=settings.POWERBI_DATASET_ID,
access_token=self.access_token,
table_name=table_config.powerbi_table_name,
measures=table_config.measures,
group_by_columns=table_config.group_by_columns,
)
# Step 2: Read data from Power BI

View file

@ -30,3 +30,30 @@ tables:
"Einheit",
"EP in CHF",
]
# Example: Retrieving Power BI measures with ADDCOLUMNS
# Uncomment to retrieve measures alongside all table columns
# - name: "Einkaufspreis_With_Measures"
# powerbi_table_name: "Einkaufspreis"
# measures:
# - "EP in CHF"
# - "Gesamtbetrag in CHF"
# steps:
# - to_numeric:
# column: "EP_CHF"
# errors: "coerce"
# - dropna:
# subset: ["EP_CHF"]
# Example: Retrieving aggregated measures with SUMMARIZECOLUMNS
# Uncomment to retrieve measures grouped by specific columns
# - name: "Einkaufspreis_Aggregated"
# powerbi_table_name: "Einkaufspreis"
# measures:
# - "EP in CHF"
# - "Gesamtbetrag in CHF"
# group_by_columns:
# - "m_Artikel"
# steps:
# - dropna:
# subset: ["m_Artikel"]