import sqlite3
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")

conn = sqlite3.connect("mydatabase.db")

csv_files = {
    "orders": "orders.csv",
    "staffs": "staffs.csv",
    "customers": "customers.csv",
    "stores": "stores.csv",
    "order_items": "order_items.csv"
}

for table_name, csv_file in csv_files.items():
    df = pd.read_csv(csv_file)
    df.to_sql(table_name, conn, if_exists='replace', index=False)

cursor = conn.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
print(cursor.fetchall())

[('orders',), ('staffs',), ('customers',), ('stores',), ('order_items',)]

# A function to display any table by putting its name into the function
def view_table(name):
    df = pd.read_sql_query(f"SELECT * FROM {name} LIMIT 5", conn) 
    display(df)
    
view_table("orders")

# Defining a function to use to easily run SQL queries
def run_query(sql_query, conn):
    df = pd.read_sql_query(sql_query, conn)
    display(df)
    return df

# Joining orders + customers tables using an inner join

query1 = """
SELECT 
    orders.order_id,
    orders.order_date,
    customers.first_name,
    customers.last_name
FROM orders
JOIN customers ON orders.customer_id = customers.customer_id
LIMIT 10;
"""

df1 = run_query(query1, conn)

# Joining staffs + stores tables using inner join

query2 = """
SELECT 
    staffs.first_name,
    staffs.last_name,
    stores.store_name
FROM staffs
JOIN stores ON staffs.store_id = stores.store_id
LIMIT 10;
"""

df2 = run_query(query2, conn)

# Total revenue per order

query3 = """
SELECT 
    order_id,
    SUM(quantity * list_price * (1 - discount)) AS total_revenue
FROM order_items
GROUP BY order_id
LIMIT 10;
"""

df3 = run_query(query3, conn)

# Total revenue per customer

query4 = """
SELECT 
    customers.customer_id,
    customers.first_name,
    customers.last_name,
    SUM(order_items.quantity * order_items.list_price * (1 - order_items.discount)) AS total_spent
FROM customers
JOIN orders ON customers.customer_id = orders.customer_id
JOIN order_items ON orders.order_id = order_items.order_id
GROUP BY customers.customer_id, customers.first_name, customers.last_name
ORDER BY total_spent DESC
LIMIT 10;
"""

df4 = run_query(query4, conn)

# List the first and last names of customers and the dates of the orders they placed.

query5 = """
SELECT 
    customers.first_name,
    customers.last_name,
    orders.order_date
FROM customers
JOIN orders ON customers.customer_id = orders.customer_id
LIMIT 10;
"""

df5 = run_query(query5, conn)

# Where the staff work and their managers

query6 = """
SELECT 
    s.first_name AS staff_first,
    s.last_name AS staff_last,
    st.store_name,
    m.first_name AS manager_first,
    m.last_name AS manager_last
FROM staffs AS s
JOIN stores AS st ON s.store_id = st.store_id
JOIN staffs AS m ON s.manager_id = m.staff_id
LIMIT 10;
"""

df6 = run_query(query6, conn)

# Top-Spending Customers (from query4)

plt.figure(figsize=(10, 6))
sns.barplot(x='total_spent', y='first_name', data=df4)
plt.title('Top 10 Customers by Total Spending')
plt.xlabel('Total Spent (USD)')
plt.ylabel('Customer')
plt.tight_layout()
plt.show()

# Staff Count per Store (from query2)

plt.figure(figsize=(10, 6))
sns.countplot(data=df2, y='store_name', order=df2['store_name'].value_counts().index)
plt.title('Number of Staff per Store')
plt.xlabel('Number of Staff')
plt.ylabel('Store')
plt.tight_layout()
plt.show()

# Order Revenue Distribution (from query3)

plt.figure(figsize=(8, 5))
sns.histplot(df3['total_revenue'], bins=20)
plt.title('Distribution of Order Revenues')
plt.xlabel('Total Revenue per Order')
plt.ylabel('Frequency')
plt.tight_layout()
plt.show()

	order_id	customer_id	order_status	order_date	required_date	shipped_date	store_id	staff_id
0	1	259	4	2016-01-01	2016-01-03	2016-01-03	1	2
1	2	1212	4	2016-01-01	2016-01-04	2016-01-03	2	6
2	3	523	4	2016-01-02	2016-01-05	2016-01-03	2	7
3	4	175	4	2016-01-03	2016-01-04	2016-01-05	1	3
4	5	1324	4	2016-01-03	2016-01-06	2016-01-06	2	6

	order_id	order_date	first_name	last_name
0	1	2016-01-01	Johnathan	Velazquez
1	2	2016-01-01	Jaqueline	Cummings
2	3	2016-01-02	Joshua	Robertson
3	4	2016-01-03	Nova	Hess
4	5	2016-01-03	Arla	Ellis
5	6	2016-01-04	Sharyn	Hopkins
6	7	2016-01-04	Laureen	Paul
7	8	2016-01-04	Leslie	Higgins
8	9	2016-01-05	Neil	Mccall
9	10	2016-01-05	Alane	Munoz

	first_name	last_name	store_name
0	Fabiola	Jackson	Santa Cruz Bikes
1	Mireya	Copeland	Santa Cruz Bikes
2	Genna	Serrano	Santa Cruz Bikes
3	Virgie	Wiggins	Santa Cruz Bikes
4	Jannette	David	Baldwin Bikes
5	Marcelene	Boyer	Baldwin Bikes
6	Venita	Daniel	Baldwin Bikes
7	Kali	Vargas	Rowlett Bikes
8	Layla	Terrell	Rowlett Bikes
9	Bernardine	Houston	Rowlett Bikes

	order_id	total_revenue
0	1	10231.0464
1	2	1697.9717
2	3	1519.9810
3	4	1349.9820
4	5	3900.0607
5	6	9442.5048
6	7	2165.0817
7	8	1372.4719
8	9	7199.9820
9	10	242.9910

	customer_id	first_name	last_name	total_spent
0	94	Sharyn	Hopkins	34807.9392
1	10	Pamelia	Newman	33634.2604
2	75	Abby	Gamble	32803.0062
3	6	Lyndsey	Bean	32675.0725
4	16	Emmitt	Sanchez	31925.8857
5	73	Melanie	Hayes	31913.6902
6	1	Debra	Burks	27888.1834
7	61	Elinore	Aguilar	25636.4531
8	93	Corrina	Sawyer	25612.7021
9	122	Shena	Carter	24890.6244

Step 1: Connect to SQLite Database¶

Step 2: Database Keys and Relationships¶

Step 3: Writing SQL Queries¶

Step 3: Visualize Data¶

💸 Top Spending Customers – Insights¶

🏬 Staff Distribution Across Stores – Insights¶

💰 Total Revenue per Order – Insights¶

	first_name	last_name	order_date
0	Debra	Burks	2016-12-09
1	Debra	Burks	2018-04-18
2	Debra	Burks	2018-11-18
3	Kasha	Todd	2017-02-05
4	Kasha	Todd	2017-08-21
5	Kasha	Todd	2018-04-09
6	Tameka	Fisher	2018-03-27
7	Tameka	Fisher	2018-04-06
8	Tameka	Fisher	2018-10-21
9	Daryl	Spence	2017-02-07