NumPy (Numerical Python) is the fundamental library for scientific computing in Python. Originally developed by Travis Oliphant in 2005, it has become the backbone of virtually the entire modern Python data science ecosystem.

If you work with data analysis, machine learning, engineering, or any field requiring efficient numerical calculations, NumPy is essential. In this complete guide, you'll master everything from basic concepts to advanced array manipulation techniques.

📦 What is NumPy and Why Use It?

NumPy is a library that provides support for high-performance multidimensional arrays, along with high-level mathematical functions to work with those arrays. The main advantage of NumPy over traditional Python lists is speed: operations with NumPy arrays are tens to hundreds of times faster than equivalents with native lists.

This efficiency happens because NumPy uses contiguous memory and implements vectorized operations that leverage the data structure to perform calculations in parallel. Additionally, NumPy is the foundation for other essential libraries like Pandas, Matplotlib, Scikit-learn, and TensorFlow.

🚀 Installation and Setup

Installation via pip

# Standard installation
pip install numpy

# Check version
import numpy as np
print(np.__version__)

Installation via conda

# For Anaconda/Miniconda users
conda install numpy

🔧 NumPy Arrays: The Heart of the Library

Creating Basic Arrays

import numpy as np

# Array from a list
arr1 = np.array([1, 2, 3, 4, 5])
print(arr1)

# Array of zeros
zeros = np.zeros(5)
print(zeros)

# Array of ones
ones = np.ones((3, 3))  # 3x3 matrix
print(ones)

# Array with random values
random_values = np.random.rand(5)
print(random_values)

# Array with sequence
sequence = np.arange(0, 10, 2)  # from 0 to 10, step 2
print(sequence)

# Array with linear spacing
spaced = np.linspace(0, 1, 5)  # 5 values between 0 and 1
print(spaced)

Array Properties

import numpy as np

arr = np.array([[1, 2, 3], [4, 5, 6]])

print(f"Dimensions: {arr.ndim}")       # 2
print(f"Shape: {arr.shape}")           # (2, 3)
print(f"Total elements: {arr.size}")   # 6
print(f"Data type: {arr.dtype}")       # int64
print(f"Memory (bytes): {arr.itemsize * arr.size}")

📊 Mathematical Operations with NumPy

Arithmetic Operations

import numpy as np

arr = np.array([1, 2, 3, 4, 5])

# Basic operations - applied element by element
print(arr + 10)    # [11, 12, 13, 14, 15]
print(arr - 5)     # [-4, -3, -2, -1, 0]
print(arr * 2)     # [2, 4, 6, 8, 10]
print(arr / 2)     # [0.5, 1.0, 1.5, 2.0, 2.5]
print(arr ** 2)    # [1, 4, 9, 16, 25]

# Operations between arrays
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])
print(a + b)       # [5, 7, 9]
print(a * b)       # [4, 10, 18]

Mathematical Functions

import numpy as np

arr = np.array([0, np.pi/2, np.pi, 3*np.pi/2])

# Trigonometric functions
print(np.sin(arr))   # [0. 1. 0. -1.]
print(np.cos(arr))   # [1. 0. -1. 0.]
print(np.tan(arr))   # [0. inf 0. inf]

# Other important functions
arr2 = np.array([1, 4, 9, 16])
print(np.sqrt(arr2))   # [1. 2. 3. 4.]
print(np.log(arr2))    # [0. 1. 2. 2.77...]
print(np.abs([-1, -2, 3]))  # [1, 2, 3]

# Statistics
grades = np.array([7.5, 8.0, 6.5, 9.0, 7.0])
print(f"Mean: {np.mean(grades):.2f}")      # 7.6
print(f"Median: {np.median(grades):.2f}")  # 7.5
print(f"Std deviation: {np.std(grades):.2f}")  # 0.98
print(f"Variance: {np.var(grades):.2f}")   # 0.96
print(f"Max: {np.max(grades)}")         # 9.0
print(f"Min: {np.min(grades)}")          # 6.5

🎯 Indexing and Slicing

Accessing Elements

import numpy as np

arr = np.array([[1, 2, 3, 4],
                [5, 6, 7, 8],
                [9, 10, 11, 12]])

# Access by index
print(arr[0, 0])      # 1
print(arr[1, 2])      # 7
print(arr[-1, -1])    # 12

# Negative indexing
print(arr[-1])        # [9, 10, 11, 12]
print(arr[-1, -2])   # 11

Slicing

import numpy as np

arr = np.arange(1, 21).reshape(4, 5)

# Specific rows and columns
print(arr[0, :])      # first row
print(arr[:, 2])      # third column

# Ranges
print(arr[1:3, 1:4])  # rows 1-2, columns 1-3

# With step
print(arr[::2])       # all rows, step 2
print(arr[::2, ::2])  # rows and columns with step 2

# Boolean mask
arr2 = np.array([10, 20, 30, 40, 50])
mask = arr2 > 25
print(arr2[mask])  # [30, 40, 50]

# Fancy indexing
indices = [0, 2, 4]
print(arr2[indices])  # [10, 30, 50]

🔄 Array Manipulation

reshape() and flatten()

import numpy as np

arr = np.arange(1, 13)
print(arr.shape)  # (12,)

# Reshape to 3x4 matrix
matrix = arr.reshape(3, 4)
print(matrix)
# [[1  2  3  4]
#  [5  6  7  8]
#  [9 10 11 12]]

# flatten() - flatten to 1D
print(matrix.flatten())

# ravel() - similar to flatten but returns view
print(matrix.ravel())

transpose() and swapaxes()

import numpy as np

matrix = np.array([[1, 2, 3],
                   [4, 5, 6]])

print(matrix.T)
# [[1 4]
#  [2 5]
#  [3 6]]

# swapaxes() - swap axes
arr3d = np.arange(1, 13).reshape(2, 3, 2)
print(arr3d.swapaxes(1, 2).shape)  # (2, 2, 3)

Concatenation and Split

import numpy as np

a = np.array([1, 2, 3])
b = np.array([4, 5, 6])

# Concatenate
print(np.concatenate([a, b]))        # [1 2 3 4 5 6]
print(np.hstack([a, b]))             # horizontal stack
print(np.vstack([a, b]))             # vertical stack

# Split
arr = np.array([1, 2, 3, 4, 5, 6])
print(np.split(arr, 3))  # [array([1,2]), array([3,4]), array([5,6])]

💡 Broadcasting

Broadcasting is a powerful NumPy technique that allows operations between arrays of different shapes. NumPy automatically expands the smaller array for compatibility.

import numpy as np

# Example 1: arrays of different shapes
a = np.array([[1, 2, 3],
              [4, 5, 6]])
b = np.array([10, 20, 30])

# b is "broadcast" to match a's shape
print(a + b)
# [[11 22 33]
#  [14 25 36]]

# Example 2: scalar with array
arr = np.array([1, 2, 3, 4, 5])
print(arr * 10)  # [10 20 30 40 50]

# Example 3: 2D matrix with vector
matrix = np.ones((3, 3))
vector = np.array([1, 2, 3])
print(matrix + vector)
# [[2 3 4]
#  [2 3 4]
#  [2 3 4]]

🧮 Linear Algebra with NumPy

import numpy as np

# Dot product
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])
print(np.dot(a, b))      # 32

# Matrix multiplication
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])
print(np.matmul(A, B))
# [[19 22]
#  [43 50]]

# Determinant
print(np.linalg.det(A))  # -2.0

# Inverse
print(np.linalg.inv(A))
# [[-2.   1. ]
#  [ 1.5 -0.5]]

# Eigenvalues
w, v = np.linalg.eig(A)
print(f"Eigenvalues: {w}")
print(f"Eigenvectors: {v}")

🎓 Practical Project: Sales Analysis

import numpy as np

# Simulate store sales data
np.random.seed(42)

days = 30
products = ["Electronics", "Clothing", "Food", "Home"]

# Generate random sales for each product
sales = np.random.randint(1000, 5000, size=(days, len(products)))
sales = sales.astype(float)

# Add price variation per product
prices = np.array([500.00, 80.00, 25.00, 150.00])

# Calculate daily revenue
daily_revenue = sales * prices

print("=" * 50)
print("📊 SALES REPORT - MIX STORE")
print("=" * 50)

# Total revenue per product
revenue_per_product = daily_revenue.sum(axis=0)
print("\n💰 Revenue by Product:")
for i, prod in enumerate(products):
    print(f"  {prod}: $ {revenue_per_product[i]:,.2f}")

# Best selling product
best_product = products[np.argmax(revenue_per_product)]
print(f"\n🏆 Best selling product: {best_product}")

# Average daily sales
avg_daily_sales = sales.mean(axis=1)
print(f"\n📈 Average daily sales: {avg_daily_sales.mean():,.0f}")

# Day with highest revenue
best_day = np.argmax(daily_revenue.sum(axis=1)) + 1
max_revenue = daily_revenue.sum(axis=1).max()
print(f"\n📅 Best day: Day {best_day} ($ {max_revenue:,.2f})")

# Sales standard deviation (volatility)
volatility = sales.std(axis=0)
print(f"\n📉 Volatility by product:")
for i, prod in enumerate(products):
    print(f"  {prod}: {volatility[i]:,.0f}")

🔗 Integration with Other Libraries

NumPy is the foundation of the Python data science ecosystem. Here's how it integrates with the main libraries:

NumPy + Pandas

import numpy as np
import pandas as pd

# Create DataFrame from NumPy arrays
data = np.random.randn(100, 3)
df = pd.DataFrame(data, columns=['A', 'B', 'C'])
print(df.describe())

NumPy + Matplotlib

import numpy as np
import matplotlib.pyplot as plt

x = np.linspace(0, 2 * np.pi, 100)
y = np.sin(x)

plt.plot(x, y)
plt.title('Sine with NumPy')
plt.show()

NumPy + SciPy

from scipy import stats
import numpy as np

data = np.array([23, 25, 28, 30, 32, 35, 38, 40])
mean = np.mean(data)
std = np.std(data)
result = stats.normaltest(data)
print(f"Mean: {mean:.2f}")
print(f"Normality test: {result}")

⚡ Performance Tips

  • ✅ Use NumPy arrays instead of Python lists for numerical operations
  • ✅ Prefer vectorized operations over for loops
  • ✅ Use np.where() for conditionals on arrays
  • ✅ Use @njit (Numba) for functions that can't be vectorized
  • ✅ Avoid using append() in loops - pre-allocate memory
  • ✅ Use view instead of copy when possible
  • ✅ Set the correct dtype to save memory
  • ❌ Avoid converting NumPy arrays to Python lists

🚀 Next Steps

Now that you've mastered NumPy, the next step is exploring libraries that use it as a foundation:

NumPy is just the beginning of your journey into data science with Python. Keep practicing and exploring the endless possibilities this powerful library offers!