OpenCV (Compputer Vision)

Install the opencv-contrib-python and numpy libraries:

pip install opencv-contrib-python numpy

OpenCV is a sophisticated image processing library. It takes considerably more disk space than Pillow so use that if you don’t need the features of OpenCV. OpenCV is used with machine learning computer vision tasks such as face detection/recognition and object detection/recognition.

Sample code recipes

Use the camera, display the video frames and save to PNG

import cv2
import numpy as np

capture = cv2.VideoCapture(0)       # Camera number
capture.set(3, 800)                 # Request camera width
capture.set(4, 600)                 # Request camera height
while True:
    ret, img = capture.read()       # Get image from camera
    cv2.imshow('window label',img)  # Show on screen
    k = cv2.waitKey(100)            # Wait 100 ms for keypress
    if k % 256 == 27:               # If ESC key pressed
        break
cv2.imwrite("final-frame.png", img) # Save final frame as a PNG

Use the camera, save video file.

capture = cv2.VideoCapture(0)           # Camera number
fourcc = cv2.VideoWriter_fourcc(*'mp4v') 
video=cv2.VideoWriter('video.mp4',fourcc,25,(800,600))
while True:
    ret, img = capture.read()           # Get success code and image from camera
    frame = cv2.resize(img, (800,600))  # Resize image to 800x600
    video.write(frame)                  # Add frame to video
    k = cv2.waitKey(100)                # Wait 100 ms for keypress
    if k % 256 == 27:                   # If ESC key pressed
        break
video.release()                         # Close video file when done

  Basic face detection

Requires the cascade file (contains the pattern information to detect a face) from https://github.com/opencv/opencv/blob/master/data/haarcascades/haarcascade_frontalface_default.xml

import cv2
import numpy as np
yellow = (0,255,255)                # Yellow in BGR colors
n = 0                               # Number of faces seen
capture = cv2.VideoCapture(0)       # Camera number
capture.set(3, 800)                 # Request camera width
capture.set(4, 600)                 # Request camera height
cascade_file = "imagestuff\haarcascade_frontalface_default.xml"
cascade = cv2.CascadeClassifier(cascade_file)
while True:
    status, img = capture.read()       # Get success code and image from camera
    # By default image is BGR (Blue Green Red) rather than RGB
    # Create gray scale image for face detection algorithm to use
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # Detect any faces in the image? Put coordinates of any faces seen in an array
    faces = cascade.detectMultiScale(
        gray,               # Use the grayscale image
        scaleFactor=1.2,
        minNeighbors=5,     
        minSize=(100, 100)  # Minimum pixel size to recognise as a "face"
    )
    # For every face we found
    for (x,y,w,h) in faces:
        # Draw a rectangle around the face
        cv2.rectangle(img, (x,y), (x+w,y+h), yellow, 2)
        face = img[y:y+h, x:x+w]            # Extract the face portion of the image
        cv2.imwrite(f"face-{n}.png", face)  # Save the face image to disk
        n = n + 1
    cv2.imshow('window label',img)          # Show on screen
    k = cv2.waitKey(100)                    # Wait 100 ms for keypress
    if k % 256 == 27:                       # If ESC key pressed
        break
cv2.imwrite("final-frame.png", img) # Save final frame as a PNG

Commonly used functions

Read an image file

# Returns numpy array, containing the pixel values. For colored images, each pixel is represented as an array containing Red, Green and Blue and optionally Alpha channels.
img = cv2.imread("image.png")

Read a video file

video = cv2.VideoCapture("video.mp4")

Read a video stream from camera

# Read a video stream from camera
video = cv2.VideoCapture(0) # parameter = camera number

Read an image (frame) from the video file or camera

status, image = video.read()

Get information about the image (for colour images)

height = img.shape[0]
width = img.shape[1]
channels = img.shape[2] # Will not exist for grayscale

Get an individual pixel

pixel = img[y, x]

Resize an image

img2 = cv2.resize(img, (800,600))

Overlay text onto an image

  • Parameters of the putText() function: image, string message, top/left coordinates, font scale, color, font width.
font = cv2.FONT_HERSHEY_SIMPLEX
cv2.putText(img, "Message", (x,y), 1, yellow, 2)

Display an image on screen

cv2.imshow('window label',img)

Convert colour modes

# Image conversions - examples
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)    # From BGR to grayscale
rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)      # From BGR to RGB
bgr = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR)      # From RGBA to BGR

Convert PIL image object to CV2 numpy array

cv2_image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)

Convert CV2 numpy array to PIL image object

pil_image = Image.fromarray(cv2.cvtColor(cv2_image, cv2.COLOR_BGR2RGB))

Paste one image onto another

h,w,channels = img2.shape   # Get dimensions of img2 to paste
x, y=50,50                  # Coordinates to apply paste in img1
img1[y:y+h, x:x+w] = img2   # Will paste without regard to alpha transparency
cv2.imshow("Final product",img1)

Copyright © Paul Baumgarten.