Skip to main content

Data Wrangling in SQL

Table: customer_orders

Changes:

  • Changing all the null and blank to Null
CREATE TEMPORARY TABLE customer_orders_cleaned
SELECT
order_id,
customer_id,
pizza_id,
CASE
WHEN exclusions = '' THEN NULL
WHEN exclusions = 'null' THEN NULL
ELSE exclusions
END AS exclusions_cleaned,
CASE
WHEN extras = '' THEN null
WHEN extras = 'NaN' THEN null
ELSE extras
END AS extras_cleaned,
order_time
FROM customer_orders;
select * from customer_orders_cleaned

Table: runner_orders

Changes:

  • Changing all the null and blank to Null
  • Removing 'km' from distance
  • Removing anything after the numbers from duration
  • Creating a clean temp table
CREATE TEMPORARY TABLE runner_orders_cleaned_1
SELECT
order_id,
runner_id,
CASE
WHEN pickup_time = 'null' THEN null
ELSE pickup_time
END AS pick_up_time,
CASE
WHEN distance = 'null' THEN null
ELSE regexp_replace(distance, '[a-z]+', '')
END AS distance_km,
CASE
WHEN duration = 'null' THEN null
ELSE regexp_replace(duration, '[a-z]+', '')
END AS duration_mins,
CASE
WHEN cancellation = '' THEN null
WHEN cancellation = 'null' THEN null
ELSE cancellation
END AS cancellation
FROM runner_orders;
select * from runner_orders_cleaned_1

Changing data types

For runner_orders table:

  • Change pickup_time DATETIME
  • Change distance to FLOAT
  • Change duration to INT
CREATE TEMPORARY TABLE runner_orders_cleaned
SELECT
order_id,
runner_id,
pick_up_time,
CAST(distance_km AS DECIMAL(3,1)) AS distance_km,
CAST(duration_mins AS SIGNED INT) AS duration_mins,
cancellation
FROM runner_orders_cleaned_1;

Questions

How many pizzas were ordered?

SELECT COUNT(order_id) AS total_pizzas
FROM customer_orders_cleaned;