Commit e6dcc86c authored by Radek Ošlejšek's avatar Radek Ošlejšek
Browse files

Merge branch '395-multi-gpu' into 'master'

Adds parallel batch registration multi GPU support

Closes #395

See merge request grp-fidentis/analyst2!454
parents 067cceb8 c8318407
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
package cz.fidentis.analyst.engines.face;

import com.jogamp.opencl.CLContext;
import com.jogamp.opencl.CLCommandQueue;
import cz.fidentis.analyst.data.face.HumanFace;
import cz.fidentis.analyst.data.landmarks.Landmark;
import cz.fidentis.analyst.data.shapes.Plane;
@@ -22,8 +22,8 @@ public class FaceRegistrationServicesOpenCL implements CLResources {

    private final IcpServicesOpenCL icpServicesOpenCL;

    public FaceRegistrationServicesOpenCL(CLContext clContext) {
        this.icpServicesOpenCL = new IcpServicesOpenCL(clContext);
    public FaceRegistrationServicesOpenCL(CLCommandQueue queue) {
        this.icpServicesOpenCL = new IcpServicesOpenCL(queue);
    }

    /**
+9 −5
Original line number Diff line number Diff line
package cz.fidentis.analyst.engines.face;

import com.jogamp.opencl.CLCommandQueue;
import com.jogamp.opencl.CLContext;
import cz.fidentis.analyst.opencl.memory.CLResourcePool;
import cz.fidentis.analyst.opencl.execution.CLResourcePool;

import java.util.List;

/**
 * Manages a pool of reusable instances of {@link FaceRegistrationServicesOpenCL}
@@ -9,12 +12,13 @@ import cz.fidentis.analyst.opencl.memory.CLResourcePool;
 * @author Marek Horský
 */
public class FaceRegistrationServicesOpenCLPool extends CLResourcePool<FaceRegistrationServicesOpenCL> {
    public FaceRegistrationServicesOpenCLPool(CLContext context) {
        super(context, 4);

    public FaceRegistrationServicesOpenCLPool(List<CLContext> contexts) {
        super(contexts);
    }

    @Override
    protected FaceRegistrationServicesOpenCL createResource(CLContext clContext) {
        return new FaceRegistrationServicesOpenCL(clContext);
    protected FaceRegistrationServicesOpenCL createResource(CLCommandQueue queue) {
        return new FaceRegistrationServicesOpenCL(queue);
    }
}
+4 −4
Original line number Diff line number Diff line
package cz.fidentis.analyst.engines.face.batch.registration;

import com.jogamp.opencl.CLContext;
import com.jogamp.opencl.CLCommandQueue;
import cz.fidentis.analyst.data.face.HumanFace;
import cz.fidentis.analyst.data.kdtree.KdTreeVisitor;
import cz.fidentis.analyst.data.kdtree.LeftBalancedKdTreeVisitor;
@@ -53,10 +53,10 @@ public class AverageFaceServices {
     * @param superimposedFace A face newly registered to the {@code initFace}. Its geometry is used to update the average face geometry
     * @param avgFaceVisitor   Average face visitor. If {@code null}, then new visitor is created.
     * @param config           Configuration object
     * @param clContext        OpenCL context
     * @param clCommandQueue   OpenCL queue
     * @return Either newly created visitor or the {@code avgFaceVisitor}
     */
    public static AvgMeshVisitor computeAvgFaceNNGPU(HumanFace initFace, HumanFace superimposedFace, AvgMeshVisitor avgFaceVisitor, BatchFaceRegistrationConfig config, CLContext clContext) {
    public static AvgMeshVisitor computeAvgFaceNNGPU(HumanFace initFace, HumanFace superimposedFace, AvgMeshVisitor avgFaceVisitor, BatchFaceRegistrationConfig config, CLCommandQueue clCommandQueue) {
        // If the face was moved by registration, then the spatial ordering structure was removed => create it
        // If no transformation was made, then you can use old structure, if exists
        FaceStateServices.updateLeftBalancedKdTree(
@@ -65,7 +65,7 @@ public class AverageFaceServices {
        );

        AvgMeshVisitor ret = (avgFaceVisitor == null)
                ? (new AvgMeshConfig(initFace.getMeshModel(), null)).getNearestNeighborsGpuVisitor(clContext)
                ? (new AvgMeshConfig(initFace.getMeshModel(), null)).getNearestNeighborsGpuVisitor(clCommandQueue)
                : avgFaceVisitor;

        ((LeftBalancedKdTreeVisitor) ret).visitKdTree(superimposedFace.getLeftBalancedKdTree()); // replace with accept after resolving conflict
+18 −10
Original line number Diff line number Diff line
package cz.fidentis.analyst.engines.face.batch.registration.impl;

import com.jogamp.opencl.CLCommandQueue;
import com.jogamp.opencl.CLContext;
import cz.fidentis.analyst.data.face.HumanFace;
import cz.fidentis.analyst.data.kdtree.KdTreeVisitor;
@@ -18,7 +19,7 @@ import cz.fidentis.analyst.engines.face.batch.registration.BatchFaceRegistration
import cz.fidentis.analyst.engines.face.batch.registration.BatchFaceRegistrationServices;
import cz.fidentis.analyst.engines.icp.IcpConfig;
import cz.fidentis.analyst.engines.sampling.PointSamplingConfig;
import cz.fidentis.analyst.opencl.memory.CLResourcePool;
import cz.fidentis.analyst.opencl.execution.CLResourcePool;
import cz.fidentis.analyst.opencl.memory.CLResources;

import java.util.concurrent.BlockingQueue;
@@ -35,7 +36,7 @@ import static cz.fidentis.analyst.engines.face.batch.registration.BatchFaceRegis
public class AsyncBatchFaceRegistrationImpl implements AsyncBatchFaceRegistration {
    private final AsynchronousWorker<HumanFace, HumanFace> faceRegistrationWorker = new AsynchronousRegistrationWorker();
    private final AsynchronousWorker<HumanFace, HumanFace> kdTreeUpdateWorker = new AsynchronousFaceStructureUpdateWorker();
    private final CLContext clContext;
    private final CLCommandQueue clCommandQueue;
    private final CLResourcePool<FaceRegistrationServicesOpenCL> pool;
    private final int pollTimeLimit;
    private final MemoryLimiter memoryLimiter;
@@ -43,6 +44,7 @@ public class AsyncBatchFaceRegistrationImpl implements AsyncBatchFaceRegistratio
    private BatchFaceRegistrationConfig config;
    private IcpConfig icpConfig;
    private AvgMeshVisitor avgFaceVisitor;
    private BatchFaceRegistrationServices.AverageFaceStrategy lastAvgFaceStrategy;

    public AsyncBatchFaceRegistrationImpl(CLContext clContext,
                                          FaceRegistrationServicesOpenCLPool pool,
@@ -50,7 +52,7 @@ public class AsyncBatchFaceRegistrationImpl implements AsyncBatchFaceRegistratio
                                          int pollTimeLimit,
                                          int faceRegistrationsThreadCount,
                                          int updateFaceStructuresThreadCount) {
        this.clContext = clContext;
        this.clCommandQueue = clContext.getMaxFlopsDevice().createCommandQueue();
        this.pool = pool;
        this.faceRegistrationWorker.setThreadCount(faceRegistrationsThreadCount);
        this.kdTreeUpdateWorker.setThreadCount(updateFaceStructuresThreadCount);
@@ -110,9 +112,19 @@ public class AsyncBatchFaceRegistrationImpl implements AsyncBatchFaceRegistratio
    public void release() {
        faceRegistrationWorker.release();
        kdTreeUpdateWorker.release();
        if (avgFaceVisitor != null && avgFaceVisitor instanceof CLResources clResources) {
            clResources.release();
        }
        clCommandQueue.release();
    }

    private MeshModel computeAvgMeshModel(MemoryLimiter memoryLimiter) {
        if (lastAvgFaceStrategy != config.avgFaceStrategy()) {
            if (avgFaceVisitor != null && avgFaceVisitor instanceof CLResources clResources) {
                clResources.release();
            }
            avgFaceVisitor = null;
        }
        try {
            while (kdTreeUpdateWorker.isInProgress().get() || !kdTreeUpdateWorker.getOutputQueue().isEmpty()) {
                HumanFace face = kdTreeUpdateWorker.getOutputQueue().poll(pollTimeLimit, TimeUnit.MILLISECONDS);
@@ -128,7 +140,7 @@ public class AsyncBatchFaceRegistrationImpl implements AsyncBatchFaceRegistratio
                        }
                        case NEAREST_NEIGHBOURS_GPU -> {
                            AvgMeshVisitor ret = (avgFaceVisitor == null)
                                    ? (new AvgMeshConfig(templateFace.getMeshModel(), null)).getNearestNeighborsGpuVisitor(clContext)
                                    ? (new AvgMeshConfig(templateFace.getMeshModel(), null)).getNearestNeighborsGpuVisitor(clCommandQueue)
                                    : avgFaceVisitor;
                            face.getLeftBalancedKdTree().accept((LeftBalancedKdTreeVisitor) ret);
                            yield ret;
@@ -149,18 +161,14 @@ public class AsyncBatchFaceRegistrationImpl implements AsyncBatchFaceRegistratio
                            yield ret;
                        }
                    };
                    lastAvgFaceStrategy = config.avgFaceStrategy();
                    memoryLimiter.releaseFaceMemory(face);
                }
            }
        } catch (InterruptedException ignored) {
        }

        MeshModel model = (avgFaceVisitor == null) ? templateFace.getMeshModel() : avgFaceVisitor.getAveragedMeshModel();
        if (avgFaceVisitor instanceof CLResources clResources) {
            clResources.release();
        }
        avgFaceVisitor = null;
        return model;
        return (avgFaceVisitor == null) ? templateFace.getMeshModel() : avgFaceVisitor.getAveragedMeshModel();
    }

    /**
+7 −2
Original line number Diff line number Diff line
package cz.fidentis.analyst.engines.face.batch.registration.impl;

import com.jogamp.opencl.CLCommandQueue;
import com.jogamp.opencl.CLContext;
import cz.fidentis.analyst.data.face.HumanFace;
import cz.fidentis.analyst.data.mesh.MeshModel;
@@ -27,6 +28,7 @@ public class BatchFaceRegistrationImpl implements BatchFaceRegistration {
    private final IcpConfig icpConfig;
    private final HumanFace templateFace;
    private final CLContext clContext;
    private final CLCommandQueue clCommandQueue;

    private FaceRegistrationServicesOpenCL faceRegistrationServicesOpenCL;
    private AvgMeshVisitor avgFaceVisitor = null;
@@ -44,6 +46,9 @@ public class BatchFaceRegistrationImpl implements BatchFaceRegistration {
        this.templateFace = Objects.requireNonNull(templateFace);
        this.config = config;
        this.clContext = OpenCLServices.createContext();
        this.clCommandQueue = clContext != null
                ? clContext.getMaxFlopsDevice().createCommandQueue()
                : null;

        switch (config.regStrategy()) {
            case ICP -> {
@@ -59,7 +64,7 @@ public class BatchFaceRegistrationImpl implements BatchFaceRegistration {
                        config.icpAutoCropSince());
            }
            case ICP_GPU -> {
                this.faceRegistrationServicesOpenCL = new FaceRegistrationServicesOpenCL(clContext);
                this.faceRegistrationServicesOpenCL = new FaceRegistrationServicesOpenCL(clCommandQueue);
                PointSamplingConfig sampling = (config.icpSubsampling() == 0)
                        ? new PointSamplingConfig(PointSamplingConfig.Method.NO_SAMPLING, config.icpSubsampling())
                        : new PointSamplingConfig(PointSamplingConfig.Method.RANDOM, config.icpSubsampling());
@@ -90,7 +95,7 @@ public class BatchFaceRegistrationImpl implements BatchFaceRegistration {
        avgFaceVisitor = switch (config.avgFaceStrategy()) {
            case NONE -> null;
            case NEAREST_NEIGHBOURS -> AverageFaceServices.computeAvgFaceNN(templateFace, face, avgFaceVisitor, config);
            case NEAREST_NEIGHBOURS_GPU -> AverageFaceServices.computeAvgFaceNNGPU(templateFace, face, avgFaceVisitor, config, clContext);
            case NEAREST_NEIGHBOURS_GPU -> AverageFaceServices.computeAvgFaceNNGPU(templateFace, face, avgFaceVisitor, config, clCommandQueue);
            case PROJECTION_CPU -> AverageFaceServices.computeAvgFaceRT(templateFace, face, avgFaceVisitor, config);
            case PROJECTION_GPU -> AverageFaceServices.computeAvgFaceRTGPU(templateFace, face, avgFaceVisitor, config);
        };
Loading