Przeglądaj źródła

computer vision examples

Andrey Koryagin 6 miesięcy temu
rodzic
commit
64f923dadc

+ 53 - 0
face_blur/face-blur.py

@@ -0,0 +1,53 @@
+from ultralytics import YOLO
+from ultralytics.utils.plotting import Annotator, colors
+import cv2
+import numpy as np
+
+# Load model
+model = YOLO('yolov8m-face.pt')
+names = model.names
+blur_ratio = 50
+
+# Open the input video
+cap = cv2.VideoCapture(0)
+
+if not cap.isOpened():
+    raise Exception("Error: Could not open video.")
+
+while cap.isOpened():
+    # Read a frame from the input video
+    success, frame = cap.read()
+
+    if success:
+        # Run YOLOv8 tracking on the frame, persisting tracks between frames
+        results = model.predict(frame, iou=0.65, conf=0.40, verbose=False)
+
+        # Process results list
+        for result in results:
+            boxes = result.boxes.xyxy.cpu().tolist()
+            clss = result.boxes.cls.cpu().tolist()
+            annotator = Annotator(frame, line_width=2, example=names)
+
+            if boxes is not None:
+                for box, cls in zip(boxes, clss):
+                    annotator.box_label(box, color=colors(int(cls), True), label=names[int(cls)])
+
+                    obj = frame[int(box[1]):int(box[3]), int(box[0]):int(box[2])]
+                    blur_obj = cv2.blur(obj, (blur_ratio, blur_ratio))
+
+                    frame[int(box[1]):int(box[3]), int(box[0]):int(box[2])] = blur_obj
+
+
+        cv2.imshow("YOLOv8 Face blur", frame)
+
+        # Check for the 'q' key to exit
+        if cv2.waitKey(10) & 0xFF == ord('q'):
+            break
+    else:
+        break
+
+# Release the input video capture and output video writerй
+cap.release()
+
+# Close all OpenCV windows
+cv2.destroyAllWindows()

+ 12 - 0
face_detect/face.py

@@ -0,0 +1,12 @@
+from ultralytics import YOLO
+
+# Load model
+model = YOLO('yolov8m-face.pt')
+#model = YOLO('path/to/best.pt')  # Load a custom trained model
+
+# Perform tracking with the model
+results = model.predict(source=0, show=True)  #
+#results = model.track(source="https://www.youtube.com/watch?v=6n5d1C1Alh4", show=True)  # Tracking with default tracker
+#results = model.track(source="https://youtu.be/LNwODJXcvt4", show=True, tracker="bytetrack.yaml")  # Tracking with ByteTrack tracker
+
+

+ 51 - 0
geometry/circle.py

@@ -0,0 +1,51 @@
+import cv2
+import numpy as np
+
+# define a video capture object
+vid = cv2.VideoCapture(0)
+
+while(True):
+
+    # Capture the video frame
+    # by frame
+    ret, frame = vid.read()
+
+    # Convert to grayscale.
+    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+
+    # Blur using 3 * 3 kernel.
+    gray_blurred = cv2.blur(gray, (3, 3))
+
+    # Apply Hough transform on the blurred image.
+    detected_circles = cv2.HoughCircles(gray_blurred, 
+                    cv2.HOUGH_GRADIENT, 1, 1000, param1 = 60,
+                param2 = 30, minRadius = 100, maxRadius = 200)
+
+    # Draw circles that are detected.
+    if detected_circles is not None:
+
+        # Convert the circle parameters a, b and r to integers.
+        detected_circles = np.uint16(np.around(detected_circles))
+
+        for pt in detected_circles[0, :]:
+            a, b, r = pt[0], pt[1], pt[2]
+
+            # Draw the circumference of the circle.
+            cv2.circle(frame, (a, b), r, (0, 255, 0), 2)
+
+            # Draw a small circle (of radius 1) to show the center.
+            cv2.circle(frame, (a, b), 1, (0, 0, 255), 3)
+
+    # Display the resulting frame
+    cv2.imshow('frame', frame)
+
+    # the 'q' button is set as the
+    # quitting button you may use any
+    # desired button of your choice
+    if cv2.waitKey(1) & 0xFF == ord('q'):
+        break
+
+# After the loop release the cap object
+vid.release()
+# Destroy all the windows
+cv2.destroyAllWindows()

+ 70 - 0
geometry/line1.py

@@ -0,0 +1,70 @@
+import cv2
+import numpy as np
+
+# define a video capture object
+vid = cv2.VideoCapture(0)
+
+while(True):
+
+    # Capture the video frame
+    # by frame
+    ret, frame = vid.read()
+
+    # Convert to grayscale.
+    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+
+    # Apply edge detection method on the image
+    edges = cv2.Canny(gray, 50, 150, apertureSize=3)
+
+    # This returns an array of r and theta values
+    lines = cv2.HoughLines(edges, 1, np.pi/180, 200)
+
+    # The below for loop runs till r and theta values
+    # are in the range of the 2d array
+
+    if lines is not None:
+        for r_theta in lines:
+            arr = np.array(r_theta[0], dtype=np.float64)
+            r, theta = arr
+            # Stores the value of cos(theta) in a
+            a = np.cos(theta)
+
+            # Stores the value of sin(theta) in b
+            b = np.sin(theta)
+
+            # x0 stores the value rcos(theta)
+            x0 = a*r
+
+            # y0 stores the value rsin(theta)
+            y0 = b*r
+
+            # x1 stores the rounded off value of (rcos(theta)-1000sin(theta))
+            x1 = int(x0 + 1000*(-b))
+
+            # y1 stores the rounded off value of (rsin(theta)+1000cos(theta))
+            y1 = int(y0 + 1000*(a))
+
+            # x2 stores the rounded off value of (rcos(theta)+1000sin(theta))
+            x2 = int(x0 - 1000*(-b))
+
+            # y2 stores the rounded off value of (rsin(theta)-1000cos(theta))
+            y2 = int(y0 - 1000*(a))
+
+            # cv2.line draws a line in img from the point(x1,y1) to (x2,y2).
+            # (0,0,255) denotes the colour of the line to be
+            # drawn. In this case, it is red.
+            cv2.line(frame, (x1, y1), (x2, y2), (0, 0, 255), 2)
+
+    # Display the resulting frame
+    cv2.imshow('frame', frame)
+
+    # the 'q' button is set as the
+    # quitting button you may use any
+    # desired button of your choice
+    if cv2.waitKey(1) & 0xFF == ord('q'):
+        break
+
+# After the loop release the cap object
+vid.release()
+# Destroy all the windows
+cv2.destroyAllWindows()

+ 43 - 0
geometry/line2.py

@@ -0,0 +1,43 @@
+import cv2
+import numpy as np
+
+# define a video capture object
+vid = cv2.VideoCapture(0)
+
+while(True):
+
+    # Capture the video frame
+    # by frame
+    ret, frame = vid.read()
+
+    # Convert to grayscale.
+    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+
+    #Create default parametrization LSD
+    lsd = cv2.createLineSegmentDetector(1000)
+
+    lines = lsd.detect(gray)[0] #Position 0 of the returned tuple are the detected lines
+
+    # The below for loop runs till r and theta values
+    # are in the range of the 2d array
+
+    if lines is not None:
+        for line in lines:
+            print (line)
+
+    # Draw detected lines in the image
+    frame = lsd.drawSegments(frame,lines)
+
+    # Display the resulting frame
+    cv2.imshow('frame', frame)
+
+    # the 'q' button is set as the
+    # quitting button you may use any
+    # desired button of your choice
+    if cv2.waitKey(1) & 0xFF == ord('q'):
+        break
+
+# After the loop release the cap object
+vid.release()
+# Destroy all the windows
+cv2.destroyAllWindows()

+ 46 - 0
geometry/rectangle.py

@@ -0,0 +1,46 @@
+import cv2
+import numpy as np
+
+# define a video capture object
+vid = cv2.VideoCapture(0)
+
+while(True):
+
+    # Capture the video frame
+    # by frame
+    ret, frame = vid.read()
+
+    # Convert to grayscale.
+    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+    gray_blurred = cv2.blur(gray, (3, 3))
+
+    ret, thresh = cv2.threshold(gray_blurred, 50,255,00)
+    contours, hierarchy = cv2.findContours(thresh, 1, 2)
+
+    if contours is not None:
+        for cnt in contours:
+            #x1,y1 = cnt[0][0]
+            approx = cv2.approxPolyDP(cnt, 0.1*cv2.arcLength(cnt, True), True)
+            if len(approx) == 4:
+                x, y, w, h = cv2.boundingRect(approx)
+                if (w > 40):
+                    if (w < 200):
+                        rect = cv2.minAreaRect(approx)
+                        box = cv2.boxPoints(rect)
+                        box = np.intp(box)
+                        frame = cv2.drawContours(frame,[box],0,(0,0,255),2)
+
+    # Display the resulting frame
+    cv2.imshow('frame', frame)
+    #cv2.imshow('frame', thresh)
+
+    # the 'q' button is set as the
+    # quitting button you may use any
+    # desired button of your choice
+    if cv2.waitKey(1) & 0xFF == ord('q'):
+        break
+
+# After the loop release the cap object
+vid.release()
+# Destroy all the windows
+cv2.destroyAllWindows()

+ 90 - 0
nn/nn.py

@@ -0,0 +1,90 @@
+import numpy as np
+
+class NeuralNetwork:
+    def __init__(self, input_layer_size=3, hidden_layer_size=4, output_layer_size=2):
+        self.input_layer_size = input_layer_size
+        self.hidden_layer_size = hidden_layer_size
+        self.output_layer_size = output_layer_size
+
+        self.weights_input_to_hidden = np.random.uniform(-0.5, 0.5, (self.hidden_layer_size, self.input_layer_size))
+        self.weights_hidden_to_output = np.random.uniform(-0.5, 0.5, (self.output_layer_size, self.hidden_layer_size))
+        self.bias_input_to_hidden = np.zeros((self.hidden_layer_size, 1))
+        self.bias_hidden_to_output = np.zeros((self.output_layer_size, 1))
+
+        self.epochs = 3000 # Default
+        self.learning_rate = 0.1 # Default
+
+        return
+    
+    def feedforward(self, data):
+        # Forward propagation (to hidden layer)
+        hidden_raw = self.bias_input_to_hidden + self.weights_input_to_hidden @ data
+        self.hidden = 1 / (1 + np.exp(-hidden_raw)) # sigmoid
+
+        # Forward propagation (to output layer)
+        output_raw = self.bias_hidden_to_output + self.weights_hidden_to_output @ self.hidden
+        output = 1 / (1 + np.exp(-output_raw))
+        return output
+    
+    def backprop(self, data, output, result):
+        # Backpropagation (output layer)
+        delta_output = output - result
+        self.weights_hidden_to_output += -self.learning_rate * delta_output @ np.transpose(self.hidden)
+        self.bias_hidden_to_output += -self.learning_rate * delta_output
+
+        # Backpropagation (hidden layer)
+        delta_hidden = np.transpose(self.weights_hidden_to_output) @ delta_output * (self.hidden * (1 - self.hidden))
+        self.weights_input_to_hidden += -self.learning_rate * delta_hidden @ np.transpose(data)
+        self.bias_input_to_hidden += -self.learning_rate * delta_hidden
+        return
+    
+    def get(self, data):
+        data = np.reshape(data, (-1, 1))
+        return self.feedforward(data)
+
+    def learning(self, dataset, results, epochs, learning_rate):
+        self.epochs = epochs
+        self.learning_rate = learning_rate
+        e_loss = 0
+        e_correct = 0
+
+        # Learning
+        for epoch in range(epochs):
+            print(f"Epoch {epoch}")
+
+            for data, result in zip(dataset, results):
+                data = np.reshape(data, (-1, 1))
+                result = np.reshape(result, (-1, 1))
+                
+                output = self.feedforward(data)
+
+                # Loss / Error calculation
+                e_loss += 1 / len(output) * np.sum((output - result) ** 2, axis=0)
+                e_correct += int(np.argmax(output) == np.argmax(result))
+
+                self.backprop(data, output, result)
+
+            # print some debug info between epochs
+            print(f"Loss: {round((e_loss[0] / len(dataset)) * 100, 3)}%")
+            print(f"Accuracy: {round((e_correct / len(dataset)) * 100, 3)}%")
+            e_loss = 0
+            e_correct = 0
+
+        return
+    
+    def save(self, filename):
+        np.savez(filename,
+                 weights_input_to_hidden=self.weights_input_to_hidden,
+                 weights_hidden_to_output=self.weights_hidden_to_output,
+                 bias_input_to_hidden=self.bias_input_to_hidden,
+                 bias_hidden_to_output=self.bias_hidden_to_output
+                 )
+        return
+
+    def load(self, filename):
+        with np.load(filename) as f:
+            self.weights_input_to_hidden = f['weights_input_to_hidden'],
+            self.weights_hidden_to_output = f['weights_hidden_to_output'],
+            self.bias_input_to_hidden = f['bias_input_to_hidden'],
+            self.bias_hidden_to_output = f['bias_hidden_to_output']
+        return

BIN
nn/test.npz


+ 135 - 0
nn/test.py

@@ -0,0 +1,135 @@
+import numpy as np
+from nn import NeuralNetwork
+
+#  _
+# | |
+#  _
+# | |
+#  _
+
+#   1
+# 2   3
+#   4
+# 5   6
+#   7
+
+nn = NeuralNetwork(input_layer_size=7, hidden_layer_size=16, output_layer_size=10)
+nn.load('test.npz')
+
+
+test_data = [0.0, 0.0, 0.98, 0.0, 0.0, 0.99, 0.0] #1
+output = nn.get(test_data)
+print ("Test: 1")
+print (output)
+index = np.argmax(output) 
+predict = np.argmax(output)+1
+confidence = round(output[0][index][0] * 100, 2)
+print (f'Predict: {predict}; Confidence: {confidence}%\r\n')
+
+test_data = [0.89, 0.0, 0.92, 0.97, 0.87, 0.0, 0.87] #2
+output = nn.get(test_data)
+print ("Test: 2")
+print (output)
+index = np.argmax(output) 
+predict = np.argmax(output)+1
+confidence = round(output[0][index][0] * 100, 2)
+print (f'Predict: {predict}; Confidence: {confidence}%\r\n')
+
+
+test_data = [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0] #3
+output = nn.get(test_data)
+print ("Test: 3")
+print (output)
+index = np.argmax(output) 
+predict = np.argmax(output)+1
+confidence = round(output[0][index][0] * 100, 2)
+print (f'Predict: {predict}; Confidence: {confidence}%\r\n')
+
+
+test_data = [0.08, 0.97, 0.98, 0.97, 0.0, 0.89, 0.12] #4
+output = nn.get(test_data)
+print ("Test: 4")
+print (output)
+index = np.argmax(output) 
+predict = np.argmax(output)+1
+confidence = round(output[0][index][0] * 100, 2)
+print (f'Predict: {predict}; Confidence: {confidence}%\r\n')
+
+
+test_data = [0.82, 0.97, 0.09, 0.97, 0.0, 0.89, 0.92] #5
+output = nn.get(test_data)
+print ("Test: 5")
+print (output)
+index = np.argmax(output) 
+predict = np.argmax(output)+1
+confidence = round(output[0][index][0] * 100, 2)
+print (f'Predict: {predict}; Confidence: {confidence}%\r\n')
+
+
+test_data = [0.82, 0.97, 0.09, 0.97, 0.89, 0.89, 0.92] #6
+output = nn.get(test_data)
+print ("Test: 6")
+print (output)
+index = np.argmax(output) 
+predict = np.argmax(output)+1
+confidence = round(output[0][index][0] * 100, 2)
+print (f'Predict: {predict}; Confidence: {confidence}%\r\n')
+
+test_data = [0.82, 0.09, 0.92, 0.09, 0.08, 0.91, 0.07] #7
+output = nn.get(test_data)
+print ("Test: 7")
+print (output)
+index = np.argmax(output) 
+predict = np.argmax(output)+1
+confidence = round(output[0][index][0] * 100, 2)
+print (f'Predict: {predict}; Confidence: {confidence}%\r\n')
+
+
+test_data = [0.82, 0.97, 0.91, 0.97, 0.89, 0.89, 0.92] #8
+output = nn.get(test_data)
+print ("Test: 8")
+print (output)
+index = np.argmax(output) 
+predict = np.argmax(output)+1
+confidence = round(output[0][index][0] * 100, 2)
+print (f'Predict: {predict}; Confidence: {confidence}%\r\n')
+
+
+test_data = [0.82, 0.97, 0.91, 0.97, 0.08, 0.89, 0.92] #9
+output = nn.get(test_data)
+print ("Test: 9")
+print (output)
+index = np.argmax(output) 
+predict = np.argmax(output)+1
+confidence = round(output[0][index][0] * 100, 2)
+print (f'Predict: {predict}; Confidence: {confidence}%\r\n')
+
+
+test_data = [0.8, 1.0, 0.98, 0.0, 0.89, 0.89, 0.99] #0
+output = nn.get(test_data)
+print ("Test: 0")
+print (output)
+index = np.argmax(output) 
+predict = np.argmax(output)+1
+confidence = round(output[0][index][0] * 100, 2)
+print (f'Predict: {predict}; Confidence: {confidence}%\r\n')
+
+
+test_data = [0.1, 0.12, 0.15, 0.05, 0.09, 0.098, 0.11] # Nothing
+output = nn.get(test_data)
+print ("Test: Nothing")
+print (output)
+index = np.argmax(output) 
+predict = np.argmax(output)+1
+confidence = round(output[0][index][0] * 100, 2)
+print (f'Predict: {predict}; Confidence: {confidence}%\r\n')
+
+
+test_data = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] # Nothing
+output = nn.get(test_data)
+print ("Test: Absolutely nothing :)")
+print (output)
+index = np.argmax(output) 
+predict = np.argmax(output)+1
+confidence = round(output[0][index][0] * 100, 2)
+print (f'Predict: {predict}; Confidence: {confidence}%\r\n')

+ 44 - 0
nn/train.py

@@ -0,0 +1,44 @@
+import numpy as np
+from nn import NeuralNetwork
+
+#  _
+# | |
+#  _
+# | |
+#  _
+
+#   1
+# 2   3
+#   4
+# 5   6
+#   7
+
+
+dataset = [
+    [0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0], #1
+    [1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0], #2
+    [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0], #3
+    [0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0], #4
+    [1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0], #5
+    [1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0], #6
+    [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0], #7
+    [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], #8
+    [1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0], #9
+]
+
+results = [
+    [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+    [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+    [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+    [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+    [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+    [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0],
+    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0],
+    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0],
+    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0],
+    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0],
+]
+
+nn = NeuralNetwork(input_layer_size=7, hidden_layer_size=16, output_layer_size=10)
+nn.learning(dataset, results, 500, 0.1)
+nn.save('test.npz')

+ 32 - 0
obb/obb.py

@@ -0,0 +1,32 @@
+import cv2
+from ultralytics import YOLO
+
+# Load a model
+model = YOLO('yolov8n-obb.pt')  # load an official model
+
+# Open the input video
+cap = cv2.VideoCapture('obb.mp4')
+
+if not cap.isOpened():
+    raise Exception("Error: Could not open video.")
+
+while cap.isOpened():
+    # Read a frame from the input video
+    success, frame = cap.read()
+
+    if success:
+        results = model.predict(source=frame, verbose=False)
+        for result in results:
+            boxs = result.obb.xyxyxyxy.cpu().numpy().astype(int)
+            for box in boxs:
+                color = (0, 255, 0)
+                image = cv2.polylines(frame, [box], True, color, 1)
+
+        cv2.imshow("Obb example", frame)
+
+        key = cv2.waitKey(1)
+        if key & 0xff == ord('q'):
+            break
+
+cap.release()
+cv2.destroyAllWindows()

+ 12 - 0
pose/pose.py

@@ -0,0 +1,12 @@
+from ultralytics import YOLO
+
+# Load an official or custom model
+#model = YOLO('yolov8s.pt')  # Load an official Detect model
+#model = YOLO('yolov8s-seg.pt')  # Load an official Segment model
+model = YOLO('yolov8s-pose.pt')  # Load an official Pose model
+#model = YOLO('path/to/best.pt')  # Load a custom trained model
+
+# Perform tracking with the model
+results = model.predict(source=0, show=True)  #
+#results = model.track(source="https://www.youtube.com/watch?v=6n5d1C1Alh4", show=True)  # Tracking with default tracker
+#results = model.track(source="https://youtu.be/LNwODJXcvt4", show=True, tracker="bytetrack.yaml")  # Tracking with ByteTrack tracker

+ 14 - 0
predict/predict.py

@@ -0,0 +1,14 @@
+from ultralytics import YOLO
+
+# Load an official or custom model
+model = YOLO('yolov8m.pt')  # Load an official Detect model
+#model = YOLO('yolov8n-seg.pt')  # Load an official Segment model
+#model = YOLO('yolov8n-pose.pt')  # Load an official Pose model
+#model = YOLO('path/to/best.pt')  # Load a custom trained model
+
+# Perform tracking with the model
+results = model.predict(source=0, show=True)  #
+#results = model.track(source="https://www.youtube.com/watch?v=6n5d1C1Alh4", show=True)  # Tracking with default tracker
+#results = model.track(source="https://youtu.be/LNwODJXcvt4", show=True, tracker="bytetrack.yaml")  # Tracking with ByteTrack tracker
+
+

+ 61 - 0
read_text/read_text1.py

@@ -0,0 +1,61 @@
+#sudo apt install tesseract-ocr
+#pip pip install pytesseract
+
+from PIL import Image
+
+import pytesseract
+
+# If you don't have tesseract executable in your PATH, include the following:
+#pytesseract.pytesseract.tesseract_cmd = r'<full_path_to_your_tesseract_executable>'
+# Example tesseract_cmd = r'C:\Program Files (x86)\Tesseract-OCR\tesseract'
+
+# Simple image to string
+print ('Test1:')
+print (pytesseract.image_to_string(Image.open('test1.png')))
+print ('Test2:')
+print (pytesseract.image_to_string(Image.open('test2.jpg')))
+print ('Test3:')
+print (pytesseract.image_to_string(Image.open('test3.jpg')))
+
+exit()
+
+# In order to bypass the image conversions of pytesseract, just use relative or absolute image path
+# NOTE: In this case you should provide tesseract supported images or tesseract will return error
+print(pytesseract.image_to_string('test.png'))
+
+# List of available languages
+print(pytesseract.get_languages(config=''))
+
+# French text image to string
+print(pytesseract.image_to_string(Image.open('test-european.jpg'), lang='fra'))
+
+# Batch processing with a single file containing the list of multiple image file paths
+print(pytesseract.image_to_string('images.txt'))
+
+# Timeout/terminate the tesseract job after a period of time
+try:
+    print(pytesseract.image_to_string('test.jpg', timeout=2)) # Timeout after 2 seconds
+    print(pytesseract.image_to_string('test.jpg', timeout=0.5)) # Timeout after half a second
+except RuntimeError as timeout_error:
+    # Tesseract processing is terminated
+    pass
+
+# Get bounding box estimates
+print(pytesseract.image_to_boxes(Image.open('test.png')))
+
+# Get verbose data including boxes, confidences, line and page numbers
+print(pytesseract.image_to_data(Image.open('test.png')))
+
+# Get information about orientation and script detection
+print(pytesseract.image_to_osd(Image.open('test.png')))
+
+# Get a searchable PDF
+pdf = pytesseract.image_to_pdf_or_hocr('test.png', extension='pdf')
+with open('test.pdf', 'w+b') as f:
+    f.write(pdf) # pdf type is bytes by default
+
+# Get HOCR output
+hocr = pytesseract.image_to_pdf_or_hocr('test.png', extension='hocr')
+
+# Get ALTO XML output
+xml = pytesseract.image_to_alto_xml('test.png')

+ 11 - 0
read_text/read_text2.py

@@ -0,0 +1,11 @@
+#pip install easyocr
+
+import easyocr
+reader = easyocr.Reader(['ch_sim','en'], gpu=False) # this needs to run only once to load the model into memory
+result = reader.readtext('test1.png')
+print(result)
+result = reader.readtext('test2.jpg')
+print(result)
+result = reader.readtext('test3.jpg')
+print(result)
+

BIN
read_text/test1.png


BIN
read_text/test2.jpg


BIN
read_text/test3.jpg


+ 81 - 0
seg/seg.py

@@ -0,0 +1,81 @@
+from ultralytics import YOLO
+import cv2
+import numpy as np
+
+# Load an official or custom model
+model = YOLO('yolov8s-seg.pt')  # Load an official Segment model
+
+# Open the input video
+cap = cv2.VideoCapture(0)
+
+if not cap.isOpened():
+    raise Exception("Error: Could not open video.")
+
+while cap.isOpened():
+    # Read a frame from the input video
+    success, frame = cap.read()
+
+    if success:
+        # Run YOLOv8 tracking on the frame, persisting tracks between frames
+        results = model.track(frame, iou=0.65, conf=0.40, persist=True, imgsz=640, verbose=False, tracker="botsort.yaml")
+
+        # Process results list
+        for result in results:
+            if result.boxes.id != None: # this will ensure that id is not None -> exist tracks
+                boxes = result.boxes.xyxy.cpu().numpy().astype(int)
+                masks = result.masks.data.cpu().numpy().astype(int)
+                ids = result.boxes.id.cpu().numpy().astype(int)
+                classes = result.boxes.cls.cpu().numpy()
+                class_names = result.names
+
+                for box, mask, id, class_id in zip(boxes, masks, ids, classes):
+                    color = (0, 0, 255)
+
+                    # Mask
+                    color_mask = np.zeros_like(frame)
+                    mask = cv2.resize(mask, (frame.shape[1], frame.shape[0]), interpolation=cv2.INTER_NEAREST)
+                    color_mask[mask > 0] = color
+                    alpha = 0.3
+                    frame = cv2.addWeighted(frame, 1, color_mask, alpha, 0)
+
+                    # Mask Boder
+                    mask_contours, _ = cv2.findContours(mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+                    frame = cv2.drawContours(frame, mask_contours, -1, color, 2)
+
+                    # Text
+                    class_name = class_names[class_id]
+
+                    text = f"{class_name}"
+                    font = cv2.FONT_HERSHEY_SIMPLEX
+                    fontScale = 1.2
+                    fontColor = (0, 0, 255)
+                    thickness = 3
+                    textSize = cv2.getTextSize(text, font, fontScale, thickness)[0]
+                    textWidth, textHeight = textSize[0], textSize[1]
+
+                    centerX = (box[0]+box[2])//2 - textWidth // 2
+                    centerY = (box[1]+box[3])//2 + textHeight // 2
+
+                    frame = cv2.putText(
+                        frame,
+                        text,
+                        (centerX, centerY),
+                        font,
+                        fontScale,
+                        fontColor,
+                        thickness
+                    )
+
+        cv2.imshow("YOLOv8 Segmentation", frame)
+
+        # Check for the 'q' key to exit
+        if cv2.waitKey(10) & 0xFF == ord('q'):
+            break
+    else:
+        break
+
+# Release the input video capture and output video writerй
+cap.release()
+
+# Close all OpenCV windows
+cv2.destroyAllWindows()