Data Wrangling in SQL
Table: customer_orders
Changes:
- Changing all the null and blank to Null
CREATE TEMPORARY TABLE customer_orders_cleaned
SELECT
order_id,
customer_id,
pizza_id,
CASE
WHEN exclusions = '' THEN NULL
WHEN exclusions = 'null' THEN NULL
ELSE exclusions
END AS exclusions_cleaned,
CASE
WHEN extras = '' THEN null
WHEN extras = 'NaN' THEN null
ELSE extras
END AS extras_cleaned,
order_time
FROM customer_orders;
select * from customer_orders_cleaned
Table: runner_orders
Changes:
- Changing all the null and blank to Null
- Removing 'km' from distance
- Removing anything after the numbers from duration
- Creating a clean temp table
CREATE TEMPORARY TABLE runner_orders_cleaned_1
SELECT
order_id,
runner_id,
CASE
WHEN pickup_time = 'null' THEN null
ELSE pickup_time
END AS pick_up_time,
CASE
WHEN distance = 'null' THEN null
ELSE regexp_replace(distance, '[a-z]+', '')
END AS distance_km,
CASE
WHEN duration = 'null' THEN null
ELSE regexp_replace(duration, '[a-z]+', '')
END AS duration_mins,
CASE
WHEN cancellation = '' THEN null
WHEN cancellation = 'null' THEN null
ELSE cancellation
END AS cancellation
FROM runner_orders;
select * from runner_orders_cleaned_1
Changing data types
For runner_orders table:
- Change pickup_time DATETIME
- Change distance to FLOAT
- Change duration to INT
CREATE TEMPORARY TABLE runner_orders_cleaned
SELECT
order_id,
runner_id,
pick_up_time,
CAST(distance_km AS DECIMAL(3,1)) AS distance_km,
CAST(duration_mins AS SIGNED INT) AS duration_mins,
cancellation
FROM runner_orders_cleaned_1;
Questions
How many pizzas were ordered?
SELECT COUNT(order_id) AS total_pizzas
FROM customer_orders_cleaned;