Query Optimization di MariaDB dengan Window Functions dan CTE: Teknik Advanced
Query Optimization di MariaDB dengan Window Functions dan CTE: Teknik Advanced
Query yang kompleks dengan multiple subqueries seringkali menjadi bottleneck performa. MariaDB 10.2+ membawa window functions dan Common Table Expressions (CTEs) yang powerful untuk menulis query yang lebih readable dan lebih cepat. Artikel ini akan membahas teknik-teknik advanced ini.
Window Functions Overview
1. Apa itu Window Functions?
Window functions melakukan kalkulasi di set of rows yang related dengan current row, tanpa meng-group rows seperti aggregate functions.
-- Aggregate: menghasilkan 1 row per group
SELECT department, AVG(salary) FROM employees GROUP BY department;
-- Window: menghasilkan 1 row per input row
SELECT
name,
department,
salary,
AVG(salary) OVER (PARTITION BY department) as dept_avg
FROM employees;
2. Syntax Dasar
function_name([arguments]) OVER (
[PARTITION BY partition_expression]
[ORDER BY order_expression]
[frame_clause]
)
3. Jenis-Jenis Window Functions
Ranking Functions:
SELECT
name,
salary,
RANK() OVER (ORDER BY salary DESC) as salary_rank,
DENSE_RANK() OVER (ORDER BY salary DESC) as dense_rank,
ROW_NUMBER() OVER (ORDER BY salary DESC) as row_num,
NTILE(4) OVER (ORDER BY salary DESC) as quartile
FROM employees;
Aggregate Window Functions:
SELECT
name,
department,
salary,
SUM(salary) OVER () as total_salary,
SUM(salary) OVER (PARTITION BY department) as dept_total,
AVG(salary) OVER (PARTITION BY department) as dept_avg,
MIN(salary) OVER (PARTITION BY department) as dept_min,
MAX(salary) OVER (PARTITION BY department) as dept_max,
COUNT(*) OVER (PARTITION BY department) as dept_count
FROM employees;
Window Functions untuk Business Cases
1. Running Totals dan Moving Averages
-- Running total penjualan
SELECT
order_date,
daily_amount,
SUM(daily_amount) OVER (ORDER BY order_date) as running_total,
AVG(daily_amount) OVER (ORDER BY order_date ROWS 6 PRECEDING) as moving_avg_7d
FROM (
SELECT order_date, SUM(amount) as daily_amount
FROM sales
GROUP BY order_date
) daily_sales;
2. Top N per Category
Masalah: Ambil top 3 employee per department
-- Dari subquery yang lambat
SELECT * FROM employees e1
WHERE (
SELECT COUNT(*) FROM employees e2
WHERE e2.department = e1.department
AND e2.salary >= e1.salary
) <= 3;
-- Ke window function yang cepat
WITH ranked_employees AS (
SELECT
*,
RANK() OVER (PARTITION BY department ORDER BY salary DESC) as dept_rank
FROM employees
)
SELECT * FROM ranked_employees WHERE dept_rank <= 3;
3. Year-over-Year Comparison
WITH monthly_sales AS (
SELECT
DATE_FORMAT(order_date, '%Y-%m') as month,
SUM(amount) as monthly_revenue
FROM orders
WHERE order_date >= DATE_SUB(CURDATE(), INTERVAL 2 YEAR)
GROUP BY month
)
SELECT
month,
monthly_revenue,
LAG(monthly_revenue, 12) OVER (ORDER BY month) as revenue_last_year,
ROUND(
(monthly_revenue - LAG(monthly_revenue, 12) OVER (ORDER BY month)) /
LAG(monthly_revenue, 12) OVER (ORDER BY month) * 100,
2
) as yoy_growth_percent
FROM monthly_sales
ORDER BY month;
4. First/Last Value dalam Group
-- Customer first dan last order
SELECT DISTINCT
customer_id,
FIRST_VALUE(order_id) OVER (
PARTITION BY customer_id
ORDER BY order_date
) as first_order,
LAST_VALUE(order_id) OVER (
PARTITION BY customer_id
ORDER BY order_date
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) as last_order,
FIRST_VALUE(order_date) OVER (
PARTITION BY customer_id
ORDER BY order_date
) as first_order_date,
LAST_VALUE(order_date) OVER (
PARTITION BY customer_id
ORDER BY order_date
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) as last_order_date
FROM orders;
5. Gap Analysis
-- Temukan missing sequence numbers
WITH numbered AS (
SELECT
id,
invoice_number,
invoice_number - ROW_NUMBER() OVER (ORDER BY invoice_number) as grp
FROM invoices
WHERE invoice_date >= '2024-01-01'
)
SELECT
MIN(invoice_number) as range_start,
MAX(invoice_number) as range_end,
COUNT(*) as count
FROM numbered
GROUP BY grp
ORDER BY range_start;
Common Table Expressions (CTEs)
1. Non-Recursive CTEs
-- Bagi query kompleks jadi readable parts
WITH
monthly_revenue AS (
SELECT
DATE_FORMAT(order_date, '%Y-%m') as month,
SUM(total_amount) as revenue
FROM orders
WHERE order_date >= '2024-01-01'
GROUP BY month
),
top_customers AS (
SELECT
customer_id,
SUM(total_amount) as total_spent
FROM orders
WHERE order_date >= '2024-01-01'
GROUP BY customer_id
ORDER BY total_spent DESC
LIMIT 10
)
SELECT
r.month,
r.revenue,
COUNT(DISTINCT tc.customer_id) as top_customer_count
FROM monthly_revenue r
LEFT JOIN orders o ON DATE_FORMAT(o.order_date, '%Y-%m') = r.month
LEFT JOIN top_customers tc ON o.customer_id = tc.customer_id
GROUP BY r.month, r.revenue
ORDER BY r.month;
2. Recursive CTEs untuk Hierarchical Data
-- Employee hierarchy dengan recursive CTE
WITH RECURSIVE employee_hierarchy AS (
-- Anchor member: top-level managers
SELECT
id,
name,
manager_id,
0 as level,
CAST(name AS CHAR(1000)) as hierarchy_path
FROM employees
WHERE manager_id IS NULL
UNION ALL
-- Recursive member: direct reports
SELECT
e.id,
e.name,
e.manager_id,
eh.level + 1,
CONCAT(eh.hierarchy_path, ' > ', e.name)
FROM employees e
INNER JOIN employee_hierarchy eh ON e.manager_id = eh.id
)
SELECT
id,
REPEAT(' ', level) || name as indented_name,
level,
hierarchy_path
FROM employee_hierarchy
ORDER BY hierarchy_path;
3. Recursive CTE untuk Tree Traversal
-- Category tree dengan all descendants
WITH RECURSIVE category_tree AS (
SELECT
id,
name,
parent_id,
0 as depth,
CAST(id AS CHAR(1000)) as path
FROM categories
WHERE parent_id IS NULL
UNION ALL
SELECT
c.id,
c.name,
c.parent_id,
ct.depth + 1,
CONCAT(ct.path, ',', c.id)
FROM categories c
INNER JOIN category_tree ct ON c.parent_id = ct.id
)
SELECT
id,
REPEAT('→ ', depth) || name as tree_view,
depth,
path
FROM category_tree
ORDER BY path;
CTEs vs Subqueries: Performance Comparison
1. Multiple References
Subquery (executed multiple times):
SELECT
(SELECT AVG(salary) FROM employees WHERE department = 'Sales') as sales_avg,
(SELECT AVG(salary) FROM employees WHERE department = 'Sales') * 1.1 as target_avg,
(SELECT AVG(salary) FROM employees WHERE department = 'Sales') * 0.9 as min_avg
FROM dual;
CTE (executed once, referenced multiple times):
WITH sales_avg AS (
SELECT AVG(salary) as avg_sal
FROM employees
WHERE department = 'Sales'
)
SELECT
avg_sal,
avg_sal * 1.1 as target_avg,
avg_sal * 0.9 as min_avg
FROM sales_avg;
2. Readability dan Maintenance
-- Complex subquery (hard to read)
SELECT
c.customer_name,
(SELECT COUNT(*) FROM orders o WHERE o.customer_id = c.id AND o.status = 'completed') as total_orders,
(SELECT SUM(total) FROM orders o WHERE o.customer_id = c.id AND o.status = 'completed') as total_revenue,
(SELECT AVG(total) FROM orders o WHERE o.customer_id = c.id AND o.status = 'completed') as avg_order
FROM customers c
WHERE c.status = 'active';
-- CTE version (clean and readable)
WITH customer_stats AS (
SELECT
customer_id,
COUNT(*) as total_orders,
SUM(total) as total_revenue,
AVG(total) as avg_order
FROM orders
WHERE status = 'completed'
GROUP BY customer_id
)
SELECT
c.customer_name,
COALESCE(cs.total_orders, 0) as total_orders,
COALESCE(cs.total_revenue, 0) as total_revenue,
COALESCE(cs.avg_order, 0) as avg_order
FROM customers c
LEFT JOIN customer_stats cs ON c.id = cs.customer_id
WHERE c.status = 'active';
Performance Tips
1. Index untuk Window Functions
-- Buat index untuk partitioning dan ordering
CREATE INDEX idx_dept_salary ON employees(department, salary);
CREATE INDEX idx_order_date ON orders(order_date);
2. Materialized CTEs (MariaDB 10.6+)
-- Materialized CTE (hasil disimpan temporary)
WITH MATERIALIZED heavy_calculation AS (
SELECT /* expensive query */ ...
)
SELECT * FROM heavy_calculation WHERE ...;
3. Avoid Unnecessary Window Frames
-- Default frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
-- Gunakan eksplisit frame untuk kalkulasi yang lebih cepat
-- Untuk running total sampai row saat ini
SUM(amount) OVER (ORDER BY order_date ROWS UNBOUNDED PRECEDING)
-- Untuk moving average window kecil
AVG(amount) OVER (ORDER BY order_date ROWS 2 PRECEDING AND 2 FOLLOWING)
Use Cases Kompleks
1. Sessionization (Clickstream Analysis)
-- Identifikasi user sessions (30-minute gap)
WITH user_events AS (
SELECT
user_id,
event_timestamp,
LAG(event_timestamp) OVER (
PARTITION BY user_id
ORDER BY event_timestamp
) as prev_timestamp
FROM user_clicks
WHERE event_date = '2024-01-15'
)
SELECT
user_id,
event_timestamp,
SUM(CASE
WHEN prev_timestamp IS NULL
OR TIMESTAMPDIFF(MINUTE, prev_timestamp, event_timestamp) > 30
THEN 1
ELSE 0
END) OVER (
PARTITION BY user_id
ORDER BY event_timestamp
) as session_id
FROM user_events;
2. Cohort Analysis
-- Retention cohort analysis
WITH first_orders AS (
SELECT
customer_id,
MIN(order_date) as first_order_date,
DATE_FORMAT(MIN(order_date), '%Y-%m') as cohort_month
FROM orders
GROUP BY customer_id
),
order_months AS (
SELECT
customer_id,
DATE_FORMAT(order_date, '%Y-%m') as order_month,
PERIOD_DIFF(
DATE_FORMAT(order_date, '%Y%m'),
DATE_FORMAT(first_order_date, '%Y%m')
) as months_since_first
FROM orders
JOIN first_orders USING (customer_id)
)
SELECT
cohort_month,
months_since_first,
COUNT(DISTINCT customer_id) as active_customers,
ROUND(
COUNT(DISTINCT customer_id) * 100.0 /
FIRST_VALUE(COUNT(DISTINCT customer_id)) OVER (
PARTITION BY cohort_month
ORDER BY months_since_first
),
2
) as retention_rate
FROM order_months
GROUP BY cohort_month, months_since_first
ORDER BY cohort_month, months_since_first;
Kesimpulan
Window functions dan CTEs mengubah cara kita menulis SQL:
- Query kompleks menjadi readable
- Performa lebih baik dari nested subqueries
- Recursive queries untuk hierarchical data
- Analytical functions untuk business intelligence
Investasi waktu untuk belajar teknik ini akan sangat membayar dalam produktivitas dan performa query.
Artikel Terkait
Link Postingan : https://www.tirinfo.com/query-optimization-mariadb-window-functions-cte/