## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) library(data.table) library(DBmaps) ## ----customers-example, echo = TRUE------------------------------------------- customers <- data.table( customer_id = c("C001", "C002", "C003", "C004", "C005"), region = c("Asia", "Europe", "Asia", "Americas", "Europe") ) # Use table_info() to capture the metadata customers_info_dt <- table_info( table_name = "customers", source_identifier = "customers.csv", identifier_columns = "customer_id", key_outcome_specs = list( list( OutcomeName = "CustomerCount", ValueExpression = 1, # Each row = one customer AggregationMethods = list( # ** NOTE **: We only define aggregations that group by a variable. # The table-wide "TotalCustomers" was removed as it doesn't prepare # the table for a join on a key. list( AggregatedName = "CustomersByRegion", AggregationFunction = "sum", GroupingVariables = "region" # Grouped by region ) ) ) ) ) print(customers_info_dt) ## ----products-example, echo = TRUE-------------------------------------------- products <- data.table( product_id = c("P001", "P002", "P003", "P004", "P005", "P006"), category = c("A", "B", "A", "C", "B", "C") ) # Capture metadata via table_info(): products_info_dt <- table_info( table_name = "products", source_identifier = "products.csv", identifier_columns = "product_id", key_outcome_specs = list( list( OutcomeName = "ProductCount", ValueExpression = 1, # Each row = one product AggregationMethods = list( list( AggregatedName = "ProductsPerCategory", AggregationFunction = "sum", GroupingVariables = "category" ) ) ) ) ) print(products_info_dt) ## ----transactions-example, echo = TRUE---------------------------------------- # Tiny in-memory mimic of "transactions.csv": transactions <- data.table( transaction_id = c("T001", "T002", "T003", "T004", "T005"), customer_id = c("C001", "C002", "C001", "C003", "C004"), product_id = c("P001", "P002", "P001", "P003", "P002"), price = c(10, 20, 22, 11, 21), quantity = c(1, 2, 1, 3, 2) ) transactions_info_dt <- table_info( table_name = "transactions", source_identifier = "transactions.csv", identifier_columns = "transaction_id", key_outcome_specs = list( list( OutcomeName = "Revenue", ValueExpression = quote(price * quantity), AggregationMethods = list( list( AggregatedName = "RevenueByCustomer", AggregationFunction = "sum", GroupingVariables = "customer_id" ), list( AggregatedName = "RevenueByProduct", AggregationFunction = "sum", GroupingVariables = "product_id" ) ) ) ) ) print(transactions_info_dt) ## ----all-example, echo = TRUE------------------------------------------------- master_metadata_dt <- rbindlist( list(customers_info_dt, products_info_dt, transactions_info_dt) ) # Print the combined master metadata print(master_metadata_dt) # Show the structure of the combined data.table cat("\nStructure of the master metadata data.table:\n") str(master_metadata_dt)