Ros2 for unity help

hi ,
its me again . ive seen that you guys have worked with unity and on a ros2 package awhile ago.(RDP093: Using Unity with ROS with Sarah Gibson - YouTube)
if there is anyone withing the company that could give me some help with reading and minupilating my geometry_msg/Point(). even if its possible for me to be able to do a discord call with someone to help me with this it would be great. cause this is difficult to exaplin over message. im having difficulty still being able to manipulate the x,y,z points within the Point msg that i am publishing.
many thanks,

This version is able to display the video feed with tracked objects on multiple cameras

the spatial coordinates of the tracked object are then sent to Unity using Ros2, from there

a model of a person can be placed into the model in Unity

import rclpy
from rclpy.node import Node
from std_msgs.msg import String
from geometry_msgs.msg import Point
from pathlib import Path
import cv2
import sys
import depthai as dai
import contextlib
import blobconverter
import time
import json
import argparse
import numpy as np
from time import monotonic
from import VideoStream
from depthai_sdk import PipelineManager, NNetManager, PreviewManager, Previews, FPSHandler

Label Map for the objects in the network

labelMap = [“background”, “aeroplane”, “bicycle”, “bird”, “boat”, “bottle”, “bus”, “car”, “cat”, “chair”, “cow”,
“diningtable”, “dog”, “horse”, “motorbike”, “person”, “pottedplant”, “sheep”, “sofa”, “train”, “tvmonitor”]

Path to the network

nnPathDefault = str((Path(file).parent / Path(‘models/mobilenet-ssd_openvino_2021.4_5shave.blob’)).resolve().absolute())

parser = argparse.ArgumentParser()
parser.add_argument(‘nnPath’, nargs=’?’,
help=“Path to mobilenet detection network blob”, default=nnPathDefault)
parser.add_argument(’-ff’, ‘–full_frame’, action=“store_true”,
help=“Perform tracking on full RGB frame”, default=False)
args = parser.parse_args()

fullFrameTracking = args.full_frame
syncNN = True

def getPipeline():
# Start defining a pipeline
pipeline = dai.Pipeline()

# Define a source - color camera
cam_rgb = pipeline.create(dai.node.ColorCamera)
# Define Network
spatialDetectionNetwork = pipeline.create(dai.node.MobileNetSpatialDetectionNetwork)
# Define Mono cameras
monoLeft = pipeline.create(dai.node.MonoCamera)
monoRight = pipeline.create(dai.node.MonoCamera)
stereo = pipeline.create(dai.node.StereoDepth)
# Define Object Tracker
objectTracker = pipeline.create(dai.node.ObjectTracker)

# Create output
xout_rgb = pipeline.create(dai.node.XLinkOut)
xout_nn = pipeline.create(dai.node.XLinkOut)
xoutBoundingBoxDepthMapping = pipeline.create(dai.node.XLinkOut)
xoutDepth = pipeline.create(dai.node.XLinkOut)
# Create output for Object Tracker
trackerOut = pipeline.create(dai.node.XLinkOut)

# Set the names for the output stream
# Set stream name for Object Tracker

# Properties
cam_rgb.setPreviewSize(300, 300)

# Properties for the left and right mono cameras

# Setting Node Configs

stereo.setOutputSize(monoLeft.getResolutionWidth(), monoLeft.getResolutionHeight())

spatialDetectionNetwork.setConfidenceThreshold(0.6) # Confidence level for a detection, less that 60% sure ignore, can be changed if desired
spatialDetectionNetwork.setDepthLowerThreshold(100)# Set min distance
spatialDetectionNetwork.setDepthUpperThreshold(30000)# Set the max distance

objectTracker.setDetectionLabelsToTrack([15])  # track only person can add more tracking class options for the network e.g for bottle and person ([5,15])
# take the smallest ID when new object is tracked, possible options: SMALLEST_ID, UNIQUE_ID

# Linking

if fullFrameTracking:
    # do not block the pipeline if it's too slow on full frame

return pipeline    

class TalkerNode(Node):

def __init__(self):
    self.publisher = self.create_publisher(
    timer_period = 0.5
    self.timer = self.create_timer(

def timer_callback(self): # Outputs the final x, y, z coordinates of the tracked object, need to try output everytime coords change
    p = getPipeline() # Function for setting the cameras pipeline

    # Iterate through the connected devices and display the output video 
    # and send the tracked objects coordinates to unity
    with contextlib.ExitStack() as stack: 
        device_infos = dai.Device.getAllAvailableDevices()
        # Check for number of avaliable devices connected
        if len(device_infos) == 0:
            raise RuntimeError("No devices found!")
            print("Found", len(device_infos), "devices") # Display device name
        devices = {}

        for device_info in device_infos:
            # Note: the pipeline isn't set here, as we don't know yet what device it is.
            # The extra arguments passed are required by the existing overload variants
            openvino_version = dai.OpenVINO.Version.VERSION_2021_4
            usb2_mode = False
            device = stack.enter_context(dai.Device(openvino_version, device_info, usb2_mode))

            # Note: currently on POE, DeviceInfo.getMxId() and Device.getMxId() are different!
            print("=== Connected to " + device_info.getMxId()) # Display what device has been connected to
            mxid = device.getMxId()

            # Get a customized pipeline based on identified device type
            pipeline = p

            # Output queue will be used to get the rgb frames from the output defined above
            devices[mxid] = {
                'rgb': device.getOutputQueue(name="rgb"),
                'nn': device.getOutputQueue(name="detections"),
                'boundingBoxDepthMapping' : device.getOutputQueue(name="boundingBoxDepthMapping"),
                'depth' : device.getOutputQueue(name="depth"),
                'tracklets' : device.getOutputQueue(name="tracklets"),
            startTime = time.monotonic()
            counter = 0
            frame = 0
            fps = 0
            coordinates = "ooo"
            location = Point()
            location.x = 1.0
            location.y = 0.0
            location.z = 0.0
            while True:
                # Iterate through all connected devices
                for mxid, q in devices.items():
                        frame = q['rgb'].get().getCvFrame()
                        tracklets = q['tracklets'].get()

                        current_time = time.monotonic()
                        # Counter to find the FPS of the output video
                        if (current_time - startTime) > 1 : 
                            fps = counter / (current_time - startTime)
                            counter = 0
                            startTime = current_time

                        trackletsData = tracklets.tracklets
                        # Hold the values of the spatial coordinates and the Object ID number
                        # x = 0
                        # y = 0
                        # z = 0
                        ID_num = 0
                        # Iterate over the detected object data and extract desired info
                        for detection in trackletsData:
                            # Denormalize Bounding Box
                            roi = detection.roi.denormalize(frame.shape[1], frame.shape[0])
                            x1 = int(roi.topLeft().x)
                            y1 = int(roi.topLeft().y)
                            x2 = int(roi.bottomRight().x)
                            y2 = int(roi.bottomRight().y)
                            # Get the label ID  
                                label = labelMap[detection.label]
                                label = detection.label

                            cv2.putText(frame, str(label), (x1+10, y1+20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, 255)
                            #cv2.putText(frame, "{:.2f}".format(detection.confidence*100), (x1+10, y1+35), cv2.FONT_HERSHEY_SIMPLEX, 0.5, 255)
                            cv2.putText(frame, f"ID: {[]}", (x1 + 10, y1 + 35), cv2.FONT_HERSHEY_TRIPLEX, 0.5, (140, 250, 15))
                            # Extract the spatial coords of the tracked object
                            cv2.putText(frame, f"X: {int(detection.spatialCoordinates.x)}mm", (x1+10, y1+50), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (48,213,200))
                            cv2.putText(frame, f"Y: {int(detection.spatialCoordinates.y)}mm", (x1+10, y1+65), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (48,213,200))
                            cv2.putText(frame, f"Z: {int(detection.spatialCoordinates.z)}mm", (x1+10, y1+80), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (48,213,200))
                            # Put Bounding Box around the tracked object
                            cv2.rectangle(frame, (x1,y1),(x2,y2),(255,0,0), cv2.FONT_HERSHEY_SIMPLEX)
                            # Save the spatial coordinates of the bounding box of the detected object (X,Y,Z)
                            x = float(detection.spatialCoordinates.x)
                            y = float(detection.spatialCoordinates.y)
                            z = float(detection.spatialCoordinates.z)
                            location.x = x
                            location.y = y
                            location.z = z
                            #location =  location.x  + location.y +  location.z #+ "\n"
                            # Save the ID number for the detected object
                            ID_num =
                        # Show the frame and FPS
                        cv2.putText(frame, "NN fps: {:.2f}".format(fps), (2, frame.shape[0] - 4), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,0,0)) #Display FPS when there is an object detected
                        cv2.imshow(f"rgb - {mxid}", frame)

                if cv2.waitKey(1) == ord('q'): # Press q key to stop

                msg = location
       = location
                #self.get_logger().info('Publishing: "%s"' % msg)

def main(args = None):

talker_node = TalkerNode()



using System;
using System.Collections;
using System.Collections.Generic;
using UnityEngine;

namespace ROS2


/// An example class provided for testing of basic ROS2 communication

public class My_Listener : MonoBehaviour

private ROS2UnityComponent ros2Unity;
private ROS2Node ros2Node;
private ISubscription<geometry_msgs.msg.Point> point_sub;

void Start()
    ros2Unity = GetComponent<ROS2UnityComponent>();

void Update()
    if (ros2Node == null && ros2Unity.Ok())
        ros2Node = ros2Unity.CreateNode("ROS2UnityListenerNode");

        point_sub = ros2Node.CreateSubscription<geometry_msgs.msg.Point>("talker", msg => Debug.Log("Unity listener heard: [X =" + msg.X + ", Y = " + msg.Y + ", Z = " + msg.Z  + "]"));
        point_sub = ros2Node.CreateSubscription<geometry_msgs.msg.Point>(msg.X);



} // namespace ROS2

// pos_x = msg.X;
// pos_y = msg.Y;
// pos_z = msg.Z;

using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using ROS2;

public class Spawner_Script : My_Listener
[SerializeField] private GameObject ethan;
bool isDone = false;
// Start is called before the first frame update
private void Start()


// Update is called once per frame
void Update()
        //transform.position = new Vector3(ethan,transform.position,Quaternion.identity);

        isDone = true;


so the first code is my code for camera vision person detection in a ros2 package. it is publishing the identified positions of the people and publishes them to the unity subscriber.
the next two pueces of code is the subscriber/lisener in unity and then the third code is the script where i am able to spawn a gameobject of a person into that virtual position from the subscriber.

what i would like to know i HOW DO I ACCESS THE MSG.X,Y,Z from the point msg. as i keep on getting the error that msg does not exist in this context. so any help would be great thanks

I’m sorry we do not use or support Unity with ros2 at the moment.

This topic was automatically closed 3 days after the last reply. New replies are no longer allowed.