π Advanced Python OpenCV Cheat Sheet #
A guide to advanced computer vision with OpenCV, focusing on performance, powerful techniques, and best practices.
βοΈ Setup & Core Concepts #
- Color Order: OpenCV uses BGR order by default, while most other libraries (Pillow, Matplotlib) use RGB. Always be mindful of color space conversions.
- NumPy Backend: OpenCV images are NumPy arrays. This allows for powerful, efficient, and simple slicing and manipulation.
- Performance: Use
cv2.setUseOptimized(True)to enable optimized code paths.
import cv2
import numpy as np
# Load image
img = cv2.imread('image.jpg')
# Check if image loaded correctly
if img is None:
print("Error: Could not read image.")
else:
# Get properties
height, width, channels = img.shape
print(f'Dimensions: {width}x{height}, Channels: {channels}')
# BGR to RGB conversion for display with other libs
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# Grayscale conversion
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
π¨ Image Processing & Enhancement #
Blurring & Filtering #
- Gaussian Blur: For reducing high-frequency noise.
- Median Blur: Effective against salt-and-pepper noise.
- Bilateral Filtering: Preserves edges while reducing noise.
# Gaussian Blur (kernel size must be odd)
gaussian = cv2.GaussianBlur(img, (5, 5), 0)
# Median Blur (kernel size must be odd)
median = cv2.medianBlur(img, 5)
# Bilateral Filter
bilateral = cv2.bilateralFilter(img, 9, 75, 75)
Thresholding #
Used to segment parts of an image.
# Simple binary thresholding
ret, thresh1 = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
# Adaptive Thresholding: better for varying lighting conditions
- `ADAPTIVE_THRESH_MEAN_C`: Threshold is the mean of the neighborhood area.
- `ADAPTIVE_THRESH_GAUSSIAN_C`: Threshold is a weighted sum of neighborhood values (gaussian window).
adap_thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, \
cv2.THRESH_BINARY, 11, 2)
# Otsu's Binarization: automatically finds the optimal threshold value
ret2, otsu_thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
Morphological Transformations #
Operations on image shapes, typically on binary images.
kernel = np.ones((5,5), np.uint8)
# Erosion: Erodes away boundaries of foreground object
erosion = cv2.erode(otsu_thresh, kernel, iterations=1)
# Dilation: Increases the object area
dilation = cv2.dilate(otsu_thresh, kernel, iterations=1)
# Opening: Erosion followed by Dilation (removes noise)
opening = cv2.morphologyEx(otsu_thresh, cv2.MORPH_OPEN, kernel)
# Closing: Dilation followed by Erosion (fills small holes)
closing = cv2.morphologyEx(otsu_thresh, cv2.MORPH_CLOSE, kernel)
# Morphological Gradient: Difference between dilation and erosion
gradient = cv2.morphologyEx(otsu_thresh, cv2.MORPH_GRADIENT, kernel)
π Histograms & Image Analysis #
Histograms are a graphical representation of the intensity distribution of an image.
import matplotlib.pyplot as plt
# Calculate histogram for a grayscale image
hist = cv2.calcHist([gray], [0], None, [256], [0, 256])
# Plot histogram
# plt.plot(hist)
# plt.show()
# Histogram Equalization (improves contrast)
equalized_gray = cv2.equalizeHist(gray)
# CLAHE (Contrast Limited Adaptive Histogram Equalization)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
clahe_img = clahe.apply(gray)
π§© Feature Detection & Description #
Edge Detection #
# Canny Edge Detection
edges = cv2.Canny(img, 100, 200) # (image, minVal, maxVal)
Corner Detection #
# Harris Corner Detection
gray_float = np.float32(gray)
dst = cv2.cornerHarris(gray_float, 2, 3, 0.04)
dst = cv2.dilate(dst, None)
img[dst > 0.01 * dst.max()] = [0, 0, 255] # Mark corners in red
# Shi-Tomasi Corner Detector (often better)
corners = cv2.goodFeaturesToTrack(gray, 25, 0.01, 10)
corners = np.intp(corners)
for i in corners:
x, y = i.ravel()
cv2.circle(img, (x, y), 3, 255, -1)
Contours #
Contours are curves joining all continuous points along a boundary with the same color or intensity.
# Find contours
contours, hierarchy = cv2.findContours(otsu_thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# Draw all contours
contour_img = cv2.drawContours(img.copy(), contours, -1, (0, 255, 0), 3)
# Get specific contour info
cnt = contours[0]
M = cv2.moments(cnt)
area = cv2.contourArea(cnt)
perimeter = cv2.arcLength(cnt, True)
SIFT, SURF, ORB #
Algorithms for detecting and describing local features in images.
# ORB (Oriented FAST and Rotated BRIEF) - Free alternative to SIFT/SURF
orb = cv2.ORB_create()
kp, des = orb.detectAndCompute(gray, None)
# Draw keypoints
img_kp = cv2.drawKeypoints(img, kp, None, color=(0,255,0), flags=0)
πΉ Video Analysis #
Reading & Displaying Video #
cap = cv2.VideoCapture('video.mp4') # or 0 for webcam
if not cap.isOpened():
print("Error: Could not open video.")
else:
while cap.isOpened():
ret, frame = cap.read()
if ret:
# cv2.imshow('Frame', frame)
# if cv2.waitKey(25) & 0xFF == ord('q'):
# break
pass # process frame
else:
break
cap.release()
cv2.destroyAllWindows()
Background Subtraction #
Common technique for motion detection.
# fgbg = cv2.createBackgroundSubtractorMOG2()
# fgmask = fgbg.apply(frame)
# fgbg_knn = cv2.createBackgroundSubtractorKNN()
# fgmask_knn = fgbg_knn.apply(frame)
Optical Flow #
Tracks the movement of objects between video frames.
# Lucas-Kanade Optical Flow
# Requires good features to track (e.g., from Shi-Tomasi)
# ... setup for lucas_kanade ...
# new_pts, status, err = cv2.calcOpticalFlowPyrLK(old_gray, frame_gray, old_pts, None, **lk_params)
πΏ Object Detection (Haar Cascades) #
A machine learning based approach where a cascade function is trained from a lot of positive and negative images.
# Load pre-trained classifiers
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_eye.xml')
# Detect faces
faces = face_cascade.detectMultiScale(gray, 1.3, 5) # (image, scaleFactor, minNeighbors)
for (x, y, w, h) in faces:
cv2.rectangle(img, (x, y), (x+w, y+h), (255, 0, 0), 2)
roi_gray = gray[y:y+h, x:x+w]
roi_color = img[y:y+h, x:x+w]
# Detect eyes within the face ROI
eyes = eye_cascade.detectMultiScale(roi_gray)
for (ex, ey, ew, eh) in eyes:
cv2.rectangle(roi_color, (ex, ey), (ex+ew, ey+eh), (0, 255, 0), 2)