Sordie.co.uk

libsassy/libSassy.OpenCL.pas

Raw

{(
 )) libSassy.OpenCL
((    GPU based calculations
 ))
((  Copyright  Sordie Aranka Solomon-Smith 2015-2016
 ))
((  This work is made available under the terms of the Creative Commons
 )) Attribution-NonCommercial-ShareAlike 3.0 Unported license
((  http://creativecommons.org/licenses/by-nc-sa/3.0/
 )}

unit libSassy.OpenCL;

interface

uses
  libSassy.Interfaces,
  libSassy.Arrays,
  libSassy.Errors,
  libSassy.Streams,
  libSassy.Bitmaps,
  libSassy.Strings;

type
  TOpenCLID = Pointer;
  POpenCLID = ^TOpenCLID;

  TOpenCLPlatform = class;
  TOpenCLDevice   = class;
  TOpenCLProgram  = class;
  TOpenCLKernel   = class;
  TOpenCLBuffer   = class;
  TOpenCLBitmap   = class;

  TOpenCLDeviceType = (dtCPU, dtGPU, dtDefault, dtAccelerator, dtCustom);
  TOpenCLDeviceTypes = set of TOpenCLDeviceType;

  TOpenCLBufferMode = (bmReadWrite = 1, bmWriteOnly = 2, bmReadOnly = 4);

  EOpenCLError = class(TException);

{$REGION 'OpenCL'}
  OpenCL = class abstract
  private
    class var fPlatforms: TArray<TOpenCLPlatform>;

    class var fDefaultDevice: TOpenCLDevice;

    class function GetPlatformCount: Integer; static; inline;
    class function GetPlatform(Index: Integer): TOpenCLPlatform; static; inline;
  public
    class constructor Create;
    class destructor  Destroy;

    class function  RefreshPlatforms: Boolean;
    class procedure ClearPlatforms;

    class function FindDevice(const OfType: TOpenCLDeviceType): TOpenCLDevice;

    class function CreateProgram(const Source: String): TOpenCLProgram; overload; inline;
    class function CreateProgram(Stream: TStream):      TOpenCLProgram; overload;

    class property PlatformCount: Integer read GetPlatformCount;
    class property Platform[Index: Integer]: TOpenCLPlatform read GetPlatform;

    class property DefaultDevice: TOpenCLDevice read fDefaultDevice write fDefaultDevice;
  end;
{$ENDREGION}

{$REGION 'TOpenCLPlatform'}
  TOpenCLPlatform = class(TInterface)
  private
    fID: TOpenCLID;

    fDevices: TArray<TOpenCLDevice>;

    function GetDeviceCount: Integer; inline;
    function GetDevice(Index: Integer): TOpenCLDevice; inline;
  public
    constructor Create(const AID: TOpenCLID);
    destructor  Destroy; override;

    function RefreshDevices: Boolean;
    procedure ClearDevices;

    function GetInfoString (Index: Integer): String;
    function GetInfoInteger(Index: Integer): LongWord; inline;

    property ID: TOpenCLID read fID;

    property Profile:    String index $900 read GetInfoString;
    property Version:    String index $901 read GetInfoString;
    property Name:       String index $902 read GetInfoString;
    property Vendor:     String index $903 read GetInfoString;
    property Extensions: String index $904 read GetInfoString;

    property DeviceCount: Integer read GetDeviceCount;
    property Device[Index: Integer]: TOpenCLDevice read GetDevice;
  end;
{$ENDREGION}

{$REGION 'TOpenCLDevice'}
  TOpenCLDevice = class(TInterface)
  private
    fID:       TOpenCLID;
    fPlatform: TOpenCLPlatform;

    fDeviceType: TOpenCLDeviceTypes;
  public
    constructor Create(const AID: TOpenCLID; const APlatform: TOpenCLPlatform);
    destructor  Destroy; override;

    function CreateProgram(const Source: String): TOpenCLProgram;

    function GetInfoString (Index: Integer): String;
    function GetInfoInteger(Index: Integer): LongWord; inline;

    property ID: TOpenCLID read fID;
    property Platform: TOpenCLPlatform read fPlatform;

    property DeviceType: TOpenCLDeviceTypes read fDeviceType;

    property Name:       String index $102B read GetInfoString;
    property Vendor:     String index $102C read GetInfoString;
    property Driver:     String index $102D read GetInfoString;
    property Version:    String index $102F read GetInfoString;
    property Extensions: String index $1030 read GetInfoString;

    property Units:      LongWord index $1002 read GetInfoInteger;
    property ClockSpeed: LongWord index $100C read GetInfoInteger;
  end;
{$ENDREGION}

{$REGION 'TOpenCLProgram'}
  TOpenCLProgram = class(TInterface)
  private
    fID:     TOpenCLID;
    fDevice: TOpenCLDevice;

    fContext:      TOpenCLID;
    fCommandQueue: TOpenCLID;

    fSource: String;
  protected
    class procedure ContextNotify(ErrorInfo: PAnsiChar; PrivateInfo: Pointer; Size: LongWord; UserData: TOpenCLProgram); stdcall;
    class procedure ProgramNotify(Prog: TOpenCLID; UserData: TOpenCLProgram); stdcall;
  public
    constructor Create(const ADevice: TOpenCLDevice; const ASource: String);
    destructor  Destroy; override;

    function CreateKernel(const Name: String = 'main'): TOpenCLKernel; inline;
    function CreateBuffer(const Mode: TOpenCLBufferMode; const Size: Cardinal; Data: Pointer = nil): TOpenCLBuffer; inline;
    function CreateBitmap(const Mode: TOpenCLBufferMode; const Width, Height: Cardinal): TOpenCLBitmap; inline;

    function Flush:  Boolean; inline;
    function Finish: Boolean; inline;

    property ID:     TOpenCLID     read fID;
    property Device: TOpenCLDevice read fDevice;

    property Context:      TOpenCLID read fContext;
    property CommandQueue: TOpenCLID read fCommandQueue;

    property Source: String read fSource;
  end;
{$ENDREGION}

{$REGION 'TOpenCLKernel'}
  TOpenCLKernelArgType = (
    atChar,
    atUChar,
    atShort,
    atUShort,
    atInt,
    atUInt,
    atLong,
    atULong,
    atFloat,
    atData,
    atBuffer,
    atBitmap
  );

  TOpenCLKernelArg = record
    Size: Integer;
    case ArgType: TOpenCLKernelArgType of
      atChar:   (VChar:   Byte);
      atUChar:  (VUChar:  ShortInt);
      atShort:  (VShort:  SmallInt);
      atUShort: (VUShort: Word);
      atInt:    (VInt:    Integer);
      atUInt:   (VUInt:   Cardinal);
      atLong:   (VLong:   Int64);
      atULong:  (VULong:  UInt64);
      atFloat:  (VFloat:  Single);
      atData:   (VData:   Pointer);
      atBuffer: (VBuffer: TOpenCLBuffer);
      atBitmap: (VBitmap: TOpenCLBitmap);
  end;

  TOpenCLKernel = class(TInterface)
  private
    fID:      TOpenCLID;
    fProgram: TOpenCLProgram;
    fDevice:  TOpenCLDevice;

    fName: String;

    fArguments: array of TOpenCLKernelArg;

    function  GetArgumentCount: Integer;        inline;
    procedure SetArgumentCount(Value: Integer); inline;
  public
    constructor Create(const AProgram: TOpenCLProgram; const AName: String);
    destructor  Destroy; override;

    procedure UpdateArguments;

    procedure Execute(const Width: Integer; const Height: Integer = 0);

    procedure SetArgument(const Index: Integer; const Value: Int64; const UseArgType: TOpenCLKernelArgType = atInt); overload;
    procedure SetArgument(const Index: Integer; const Value: Extended);                                              overload;
    procedure SetArgument(const Index: Integer; const Value: Pointer; const DataSize: Integer);                      overload;
    procedure SetArgument(const Index: Integer; const Value: TOpenCLBuffer);                                         overload;
    procedure SetArgument(const Index: Integer; const Value: TOpenCLBitmap);                                         overload;

    function GetArgument      (const Index: Integer): Int64;
    function GetArgumentFloat (const Index: Integer): Extended;
    function GetArgumentData  (const Index: Integer): Pointer;
    function GetArgumentBuffer(const Index: Integer): TOpenCLBuffer;
    function GetArgumentBitmap(const Index: Integer): TOpenCLBitmap;

    property ArgumentCount: Integer read GetArgumentCount write SetArgumentCount;

    property ID:     TOpenCLID      read fID;
    property Prog:   TOpenCLProgram read fProgram;
    property Device: TOpenCLDevice  read fDevice;

    property Name: String read fName;
  end;
{$ENDREGION}

{$REGION 'TOpenCLBuffer'}
  TOpenCLBuffer = class(TInterface)
  private
    fID:      TOpenCLID;
    fProgram: TOpenCLProgram;

    fSize: Cardinal;
    fData: Pointer;

    fMode: TOpenCLBufferMode;

    fOwnData: Boolean;

    procedure SetSize(Value: Cardinal);
    procedure SetData(Value: Pointer);
  public
    constructor Create(const AProgram: TOpenCLProgram; const AMode: TOpenCLBufferMode; const ASize: Cardinal; AData: Pointer = nil);
    destructor  Destroy; override;

    procedure Allocate(const ASize: Cardinal);
    procedure Deallocate;

    procedure SyncToDevice;
    procedure SyncFromDevice;

    property ID:   TOpenCLID read fID;
    property Prog: TOpenCLProgram read fProgram;

    property Size: Cardinal read fSize write SetSize;
    property Data: Pointer  read fData write SetData;

    property Mode: TOpenCLBufferMode read fMode;

    property OwnData: Boolean read fOwnData;
  end;
{$ENDREGION}

{$REGION 'TOpenCLBitmap'}
  TOpenCLBitmap = class(TInterface)
  private
    fID:      TOpenCLID;
    fProgram: TOpenCLProgram;

    fWidth:  Cardinal;
    fHeight: Cardinal;

    fMode: TOpenCLBufferMode;

    procedure SetWidth (Value: Cardinal); inline;
    procedure SetHeight(Value: Cardinal); inline;
  public
    constructor Create(const AProgram: TOpenCLProgram; const AMode: TOpenCLBufferMode; const AWidth, AHeight: Cardinal);
    destructor  Destroy; override;

    procedure Resize(const AWidth, AHeight: Cardinal);

    procedure ReadFromDevice(Bitmap: TBitmap);
    procedure WriteToDevice (Bitmap: TBitmap);

    property ID:   TOpenCLID      read fID;
    property Prog: TOpenCLProgram read fProgram;

    property Width:  Cardinal read fWidth  write SetWidth;
    property Height: Cardinal read fHeight write SetHeight;

    property Mode: TOpenCLBufferMode read fMode;
  end;
{$ENDREGION}

{$REGION 'OpenCL API'}
type
  TContextNotify = procedure(ErrorInfo: PAnsiChar; PrivateInfo: Pointer; Size: LongWord; UserData: TOpenCLProgram); stdcall;
  TProgramNotify = procedure(Prog: TOpenCLID; UserData: TOpenCLProgram); stdcall;

  POpenCLImageFormat = ^TOpenCLImageFormat;
  TOpenCLImageFormat = packed record
    ChannelOrder: LongWord;
    ChannelType:  LongWord;
  end;

const
  OpenCLDLL = 'OpenCL.dll';

  FormatRGBA: TOpenCLImageFormat = (ChannelOrder:$10B5; ChannelType:$10DA);

function clGetPlatformIDs(NumEntries: LongWord; Platforms: Pointer; NumPlatforms: PLongWord): LongInt; stdcall; external OpenCLDLL;
function clGetPlatformInfo(PlatformID: TOpenCLID; ParamName: LongWord; ParamSize: LongWord; ParamValue: PAnsiChar; ParamRetSize: PLongWord): LongInt; stdcall; external OpenCLDLL;
function clGetDeviceIDs(PlatformID: TOpenCLID; DeviceType: uint64; NumEntries: LongWord; Devices: Pointer; NumDevices: PLongWord): LongInt; stdcall; external OpenCLDLL;
function clGetDeviceInfo(DeviceID: TOpenCLID; ParamName: LongWord; ParamSize: LongWord; ParamValue: Pointer; ParamRetSize: PLongWord): LongInt; stdcall; external OpenCLDLL;
function clCreateContext(Properties: Pointer; NumDevices: LongWord; Devices: Pointer; Notify: TContextNotify; UserData: Pointer; ErrorCode: PLongInt): TOpenCLID; stdcall; external OpenCLDLL;
function clReleaseContext(Context: TOpenCLID): LongInt; stdcall; external OpenCLDLL;
function clCreateCommandQueue(Context: TOpenCLID; Device: TOpenCLID; Properties: uint64; ErrorCode: PLongInt): TOpenCLID; stdcall; external OpenCLDLL;
function clReleaseCommandQueue(CommandQueue: TOpenCLID): LongInt; stdcall; external OpenCLDLL;
function clCreateProgramWithSource(Context: TOpenCLID; Count: LongWord; Strings: PPAnsiChar; Lengths: PLongWord; ErrorCode: PLongInt): TOpenCLID; stdcall; external OpenCLDLL;
function clReleaseProgram(Prog: TOpenCLID): LongInt; stdcall; external OpenCLDLL;
function clBuildProgram(Prog: TOpenCLID; NumDevices: LongWord; DeviceList: Pointer; Options: PAnsiChar; Notify: TProgramNotify; UserData: Pointer): LongInt; stdcall; external OpenCLDLL;
function clCreateKernel(Prog: TOpenCLID; KernelName: PAnsiChar; ErrorCode: PLongInt): TOpenCLID; stdcall; external OpenCLDLL;
function clReleaseKernel(Kernel: TOpenCLID): LongInt; stdcall; external OpenCLDLL;
function clSetKernelArg(Kernel: TOpenCLID; Index: LongWord; Size: LongWord; Value: Pointer): LongInt; stdcall; external OpenCLDLL;
function clGetKernelWorkGroupInfo(Kernel, Device: TOpenCLID; ParamName: LongWord; ParamSize: LongWord; ParamValue: Pointer; ParamRetSize: PLongWord): LongInt; stdcall; external OpenCLDLL;
function clEnqueueNDRangeKernel(CommandQueue, Kernel: TOpenCLID; WorkDim: LongWord; GlobalWorkOffset, GlobalWorkSize, LocalWorkSize: PLongWord; NumWaitEvents: LongWord; WaitEvents: Pointer; Event: Pointer): LongInt; stdcall; external OpenCLDLL;
function clFinish(CommandQueue: TOpenCLID): LongInt; stdcall; external OpenCLDLL;
function clFlush(CommandQueue: TOpenCLID): LongInt; stdcall; external OpenCLDLL;
function clGetProgramBuildInfo(Prog, Device: TOpenCLID; ParamName: LongWord; ParamSize: LongWord; ParamValue: Pointer; ParamRetSize: PLongword): LongInt; stdcall; external OpenCLDLL;
function clCreateBuffer(Context: TOpenCLID; Flags: UInt64; Size: LongWord; HostPtr: Pointer; ErrorCode: PLongInt): TOpenCLID; stdcall external OpenCLDLL;
function clReleaseMemObject(Mem: TOpenCLID): LongInt; stdcall; external OpenCLDLL;
function clEnqueueReadBuffer(CommandQueue, Buffer: TOpenCLID; BlockingRead, Offset, Size: LongWord; Data: Pointer; Events: LongWord; EventList, Event: Pointer): LongInt; stdcall; external OpenCLDLL;
function clEnqueueWriteBuffer(CommandQueue, Buffer: TOpenCLID; BlockingWrite, Offset, Size: LongWord; Data: Pointer; Events: LongWord; EventList, Event: Pointer): LongInt; stdcall; external OpenCLDLL;
function clCreateImage2D(Context: TOpenCLID; Flags: UInt64; Format: POpenCLImageFormat; Width, Height, Pitch: LongWord; HostPtr: Pointer; Error: PLongInt): TOpenCLID; stdcall; external OpenCLDLL;
function clEnqueueReadImage(CommandQueue, Image: TOpenCLID; BlockingRead: LongWord; Origin, Region: PLongWord; RowPitch, SlizePitch: LongWord; Data: Pointer; Events: LongWord; EventList, Event: Pointer): LongInt; stdcall; external OpenCLDLL;
function clEnqueueWriteImage(CommandQueue, Image: TOpenCLID; BlockingWrite: LongWord; Origin, Region: PLongWord; RowPitch, SlizePitch: LongWord; Data: Pointer; Events: LongWord; EventList, Event: Pointer): LongInt; stdcall; external OpenCLDLL;
{$ENDREGION}

implementation

{$REGION 'OpenCL'}
class function OpenCL.GetPlatformCount;
begin
  Result := fPlatforms.Count;
end;

class function OpenCL.GetPlatform;
begin
  Result := fPlatforms[Index];
end;

class constructor OpenCL.Create;
begin
  fPlatforms := TArray<TOpenCLPlatform>.Create;

  RefreshPlatforms;
end;

class destructor OpenCL.Destroy;
begin
  ClearPlatforms;
  fPlatforms.Free;
end;

class function OpenCL.RefreshPlatforms;
var
  n: LongWord;
  i: Integer;
  p: array of TOpenCLID;
begin
  ClearPlatforms;

  if clGetPlatformIDs(0, nil, @n) <> 0 then exit(False);

  SetLength(p, n);
  if clGetPlatformIDs(n, @p[0], nil) <> 0 then exit(False);

  for i := 0 to n - 1 do
    fPlatforms.Add(TOpenCLPlatform.Create(p[i]));

  Result := fPlatforms.Count > 0;

  if Result then
  begin
    fDefaultDevice := FindDevice(dtDefault);

    if fDefaultDevice = nil then fDefaultDevice := FindDevice(dtAccelerator);
    if fDefaultDevice = nil then fDefaultDevice := FindDevice(dtGPU);
    if fDefaultDevice = nil then fDefaultDevice := FindDevice(dtCPU);

    if (fDefaultDevice = nil) and (fPlatforms[0].fDevices.Count > 0) then
      fDefaultDevice := fPlatforms[0].fDevices[0];
  end;
end;

class procedure OpenCL.ClearPlatforms;
var
  p: TOpenCLPlatform;
begin
  for p in fPlatforms do
    p.Free;

  fPlatforms.Clear;

  fDefaultDevice := nil;
end;

class function OpenCL.FindDevice;
var
  p: TOpenCLPlatform;
  d: TOpenCLDevice;
begin
  for p in fPlatforms do
    for d in p.fDevices do
      if OfType in d.DeviceType then
        exit(d);

  Result := nil;
end;

class function OpenCL.CreateProgram(const Source: String): TOpenCLProgram;
begin
  if fDefaultDevice = nil then
    Result := nil
  else
    Result := fDefaultDevice.CreateProgram(Source);
end;

class function OpenCL.CreateProgram(Stream: TStream): TOpenCLProgram;
var
  S: String;
begin
  if fDefaultDevice = nil then
  begin
    if Stream.FreeAfterOp then
      Stream.Free;

    exit(nil);
  end;

  if not S.Load(Stream) then exit(nil);

  Result := CreateProgram(S);
end;
{$ENDREGION}

{$REGION 'TOpenCLPlatform'}
function TOpenCLPlatform.GetDeviceCount;
begin
  Result := fDevices.Count;
end;

function TOpenCLPlatform.GetDevice;
begin
  Result := fDevices[Index];
end;

constructor TOpenCLPlatform.Create;
begin
  inherited Create;

  fID := AID;

  fDevices := TArray<TOpenCLDevice>.Create;

  RefreshDevices;
end;

destructor TOpenCLPlatform.Destroy;
begin
  ClearDevices;
  fDevices.Free;

  inherited;
end;

function TOpenCLPlatform.RefreshDevices;
var
  n: LongWord;
  i: Integer;
  p: array of TOpenCLID;
begin
  ClearDevices;

  if clGetDeviceIDs(fID, $FFFFFFFFFFFFFFFF, 0, nil, @n) <> 0 then exit(False);

  SetLength(p, n);
  if clGetDeviceIDs(fID, $FFFFFFFFFFFFFFFF, n, @p[0], nil) <> 0 then exit(False);

  for i := 0 to n - 1 do
    fDevices.Add(TOpenCLDevice.Create(p[i], Self));

  Result := fDevices.Count > 0;
end;

procedure TOpenCLPlatform.ClearDevices;
var
  d: TOpenCLDevice;
begin
  for d in fDevices do
    d.Free;

  fDevices.Clear;
end;

function TOpenCLPlatform.GetInfoString;
var
  l: LongWord;
  s: AnsiString;
begin
  SetLength(s, 1024);
  if clGetPlatformInfo(fID, LongWord(Index), Length(s), @s[1], @l) <> 0 then exit('');
  Result := String(Copy(s, 1, l - 1));
end;

function TOpenCLPlatform.GetInfoInteger;
begin
  if clGetPlatformInfo(fID, LongWord(Index), sizeof(Result), @Result, nil) <> 0 then Result := 0;
end;
{$ENDREGION}

{$REGION 'TOpenCLDevice'}
constructor TOpenCLDevice.Create;
var
  i: uint64;
begin
  inherited Create;

  fID := AID;
  fPlatform := APlatform;

  fDeviceType := [];

  if clGetDeviceInfo(fID, $1000, sizeof(i), @i, nil) = 0 then
  begin
    if (i and 1) = 1 then fDeviceType := fDeviceType + [dtDefault];
    if (i and 2) = 2 then fDeviceType := fDeviceType + [dtCPU];
    if (i and 4) = 4 then fDeviceType := fDeviceType + [dtGPU];
    if (i and 8) = 8 then fDeviceType := fDeviceType + [dtAccelerator];
  end;
end;

destructor TOpenCLDevice.Destroy;
begin
  inherited;
end;

function TOpenCLDevice.CreateProgram;
begin
  Result := TOpenCLProgram.Create(Self, Source);
end;

function TOpenCLDevice.GetInfoString;
var
  l: LongWord;
  s: AnsiString;
begin
  SetLength(s, 1024);
  if clGetDeviceInfo(fID, LongWord(Index), Length(s), @s[1], @l) <> 0 then exit('');
  Result := String(Copy(s, 1, l - 1));
end;

function TOpenCLDevice.GetInfoInteger;
begin
  if clGetDeviceInfo(fID, LongWord(Index), sizeof(Result), @Result, nil) <> 0 then Result := 0;
end;
{$ENDREGION}

{$REGION 'TOpenCLProgram'}
class procedure TOpenCLProgram.ContextNotify;
begin
  //Writeln('ContextNotify');
end;

class procedure TOpenCLProgram.ProgramNotify;
begin
  //Writeln('ProgramNotify');
end;

constructor TOpenCLProgram.Create;
var
  Error:   LongInt;
  PSource: PAnsiChar;

  BuildErrorBuf: AnsiString;
  BuildErrorLen: LongWord;
begin
  inherited Create;

  fDevice := ADevice;
  fSource := ASource;

  fContext := clCreateContext(nil, 1, @fDevice.ID, nil{@TOpenCLProgram.ContextNotify}, nil{Self}, @Error);
  if fContext = nil then EOpenCLError.RaiseException('Failed to create context (' + String.Int(Error) + ')');

  fCommandQueue := clCreateCommandQueue(fContext, fDevice.ID, 0, @Error);
  if fCommandQueue = nil then EOpenCLError.RaiseException('Failed to create command queue (' + String.Int(Error) + ')');

  PSource := PAnsiChar(AnsiString(fSource));
  fID := clCreateProgramWithSource(fContext, 1, @PSource, nil, @Error);
  if fID = nil then EOpenCLError.RaiseException('Failed to create program (' + String.Int(Error) + ')');

  if clBuildProgram(fID, 0, nil, nil, nil{@TOpenCLProgram.ProgramNotify}, nil{Self}) <> 0 then
  begin
    SetLength(BuildErrorBuf, 2048);
    clGetProgramBuildInfo(fID, fDevice.ID, $1183, length(BuildErrorBuf), @BuildErrorBuf[1], @BuildErrorLen);
    SetLength(BuildErrorBuf, BuildErrorLen - 1);
    EOpenCLError.RaiseException('Failed to build program: ' + String(BuildErrorBuf));
  end;
end;

destructor TOpenCLProgram.Destroy;
begin
  if fCommandQueue <> nil then clReleaseCommandQueue(fCommandQueue);
  if fContext      <> nil then clReleaseContext(fContext);
  if fID           <> nil then clReleaseProgram(fID);

  inherited;
end;

function TOpenCLProgram.CreateKernel;
begin
  Result := TOpenCLKernel.Create(Self, Name);
end;

function TOpenCLProgram.CreateBuffer;
begin
  Result := TOpenCLBuffer.Create(Self, Mode, Size, Data);
end;

function TOpenCLProgram.CreateBitmap;
begin
  Result := TOpenCLBitmap.Create(Self, Mode, Width, Height);
end;

function TOpenCLProgram.Flush;
begin
  Result := clFlush(fCommandQueue) = 0;
end;

function TOpenCLProgram.Finish;
begin
  Result := clFinish(fCommandQueue) = 0;
end;
{$ENDREGION}

{$REGION 'TOpenCLKernel'}
function TOpenCLKernel.GetArgumentCount;
begin
  Result := Length(fArguments);
end;

procedure TOpenCLKernel.SetArgumentCount;
begin
  SetLength(fArguments, Value);
end;

constructor TOpenCLKernel.Create;
var
  Error: LongInt;
begin
  inherited Create;

  fProgram := AProgram;
  fDevice  := fProgram.fDevice;

  fName := AName;

  fID := clCreateKernel(fProgram.ID, PAnsiChar(AnsiString(fName)), @Error);
  if fID = nil then EOpenCLError.RaiseException('Failed to create kernel "' + fName + '" (' + String.Int(Error) + ')');
end;

destructor TOpenCLKernel.Destroy;
begin
  if fID <> nil then clReleaseKernel(fID);

  inherited;
end;

procedure TOpenCLKernel.UpdateArguments;
var
  i:       Integer;
  ArgSize: Integer;
  ArgPtr:  Pointer;
begin
  for i := 0 to high(fArguments) do
  begin
    ArgSize := fArguments[i].Size;

    with fArguments[i] do
      case ArgType of
        atChar:   ArgPtr := @VChar;
        atUChar:  ArgPtr := @VUChar;
        atShort:  ArgPtr := @VShort;
        atUShort: ArgPtr := @VUShort;
        atInt:    ArgPtr := @VInt;
        atUInt:   ArgPtr := @VUInt;
        atLong:   ArgPtr := @VLong;
        atULong:  ArgPtr := @VULong;
        atFloat:  ArgPtr := @VFloat;
        atData:   ArgPtr := @VData;
        atBuffer: ArgPtr := @VBuffer.fID;
        atBitmap: ArgPtr := @VBitmap.fID;
      else
        ArgSize := Size;
        ArgPtr  := @VChar;
      end;

    if clSetKernelArg(fID, i, ArgSize, ArgPtr) <> 0 then
      EOpenCLError.RaiseException('Failed to set kernel argument #' + String.Int(i));
  end;
end;

procedure TOpenCLKernel.Execute;
var
  Global:     array[0..1] of LongWord;
  Dims:       Integer;
begin
  if Height > 0 then
  begin
    Global[1] := Height;

    Dims := 2;
  end
  else
    Dims := 1;

  Global[0] := Width;

  if clEnqueueNDRangeKernel(fProgram.fCommandQueue, fID, Dims, nil, @Global[0], nil, 0, nil, nil) <> 0 then
    EOpenCLError.RaiseException('Failed to enqueue kernel');

  fProgram.Finish;
end;

procedure TOpenCLKernel.SetArgument(const Index: Integer; const Value: Int64; const UseArgType: TOpenCLKernelArgType = atInt);
begin
  with fArguments[Index] do
  begin
    ArgType := UseArgType;

    case UseArgType of
      atChar:   begin VChar   := Value; Size := sizeof(VChar);   end;
      atUChar:  begin VUChar  := Value; Size := sizeof(VUChar);  end;
      atShort:  begin VShort  := Value; Size := sizeof(VShort);  end;
      atUShort: begin VUShort := Value; Size := sizeof(VUShort); end;
      atInt:    begin VInt    := Value; Size := sizeof(VInt);    end;
      atUInt:   begin VUInt   := Value; Size := sizeof(VUInt);   end;
      atLong:   begin VLong   := Value; Size := sizeof(VLong);   end;
      atULong:  begin VULong  := Value; Size := sizeof(VULong);  end;
      atFloat:  begin VFloat  := Value; Size := sizeof(VFloat);  end;
    else
      EOpenCLError.RaiseException('Invalid argument data type');
    end;
  end;
end;

procedure TOpenCLKernel.SetArgument(const Index: Integer; const Value: Extended);
begin
  with fArguments[Index] do
  begin
    ArgType := atFloat;
    VFloat  := Value;
    Size    := sizeof(VFloat);
  end;
end;

procedure TOpenCLKernel.SetArgument(const Index: Integer; const Value: Pointer; const DataSize: Integer);
begin
  with fArguments[Index] do
  begin
    ArgType := atData;
    VData   := Value;
    Size    := DataSize;
  end;
end;

procedure TOpenCLKernel.SetArgument(const Index: Integer; const Value: TOpenCLBuffer);
begin
  with fArguments[Index] do
  begin
    ArgType := atBuffer;
    VBuffer := Value;
    Size    := sizeof(TOpenCLID);
  end;
end;

procedure TOpenCLKernel.SetArgument(const Index: Integer; const Value: TOpenCLBitmap);
begin
  with fArguments[Index] do
  begin
    ArgType := atBitmap;
    VBitmap := Value;
    Size    := sizeof(TOpenCLID);
  end;
end;

function TOpenCLKernel.GetArgument;
begin
  with fArguments[Index] do
    case ArgType of
      atChar:    Result := VChar;
      atUChar:   Result := VUChar;
      atShort:   Result := VShort;
      atUShort:  Result := VUShort;
      atInt:     Result := VInt;
      atUInt:    Result := VUInt;
      atLong:    Result := VLong;
      atULong:   Result := VULong;
      atFloat:   Result := Round(VFloat);
    else
      Result := 0;
      EOpenCLError.RaiseException('Invalid argument type');
    end;
end;

function TOpenCLKernel.GetArgumentFloat;
begin
  with fArguments[Index] do
    case ArgType of
      atChar:    Result := VChar;
      atUChar:   Result := VUChar;
      atShort:   Result := VShort;
      atUShort:  Result := VUShort;
      atInt:     Result := VInt;
      atUInt:    Result := VUInt;
      atLong:    Result := VLong;
      atULong:   Result := VULong;
      atFloat:   Result := VFloat;
    else
      Result := 0;
      EOpenCLError.RaiseException('Invalid argument type');
    end;
end;

function TOpenCLKernel.GetArgumentData;
begin
  with fArguments[Index] do
    case ArgType of
      atData:   Result := VData;
      atBuffer: Result := VBuffer.Data;
    else
      Result := nil;
      EOpenCLError.RaiseException('Invalid argument type');
  end;
end;

function TOpenCLKernel.GetArgumentBuffer;
begin
  with fArguments[Index] do
    case ArgType of
      atBuffer: Result := VBuffer;
    else
      Result := nil;
      EOpenCLError.RaiseException('Invalid argument type');
  end;
end;

function TOpenCLKernel.GetArgumentBitmap;
begin
  with fArguments[Index] do
    case ArgType of
      atBitmap: Result := VBitmap;
    else
      Result := nil;
      EOpenCLError.RaiseException('Invalid argument type');
  end;
end;
{$ENDREGION}

{$REGION 'TOpenCLBuffer'}
procedure TOpenCLBuffer.SetSize;
var
  Error: LongInt;
begin
  if fID <> nil then clReleaseMemObject(fID);

  fSize := Value;

  fID := clCreateBuffer(fProgram.fContext, ord(fMode), fSize, nil, @Error);
  if fID = nil then EOpenCLError.RaiseException('Unable to create buffer (' + String.Int(Error) + ')');

  if fOwnData then
  begin
    if fData = nil then
      GetMem(fData, fSize)
    else
      ReallocMem(fData, fSize);
  end;
end;

procedure TOpenCLBuffer.SetData;
begin
  if fOwnData and (fData <> nil) then
    FreeMem(fData);

  fData := Value;

  fOwnData := False;
end;

constructor TOpenCLBuffer.Create;
begin
  inherited Create;

  fProgram := AProgram;

  fID      := nil;
  fSize    := 0;
  fData    := nil;

  fMode := AMode;

  if AData = nil then
    Allocate(ASize)
  else
  begin
    SetData(AData);
    SetSize(ASize);
  end;
end;

destructor TOpenCLBuffer.Destroy;
begin
  Deallocate;

  inherited;
end;

procedure TOpenCLBuffer.Allocate;
var
  Error: LongInt;
begin
  Deallocate;

  fSize := ASize;

  fID := clCreateBuffer(fProgram.fContext, ord(fMode), fSize, nil, @Error);
  if fID = nil then EOpenCLError.RaiseException('Unable to create buffer (' + String.Int(Error) + ')');

  GetMem(fData, fSize);

  fOwnData := True;
end;

procedure TOpenCLBuffer.Deallocate;
begin
  if fID <> nil then
    clReleaseMemObject(fID);

  if fOwnData and (fData <> nil) then
    FreeMem(fData);

  fID   := nil;
  fData := nil;

  fOwnData := True;
end;

procedure TOpenCLBuffer.SyncToDevice;
begin
  if (fData = nil) or (fID = nil) or (fMode = bmWriteOnly) then exit;

  if clEnqueueWriteBuffer(fProgram.fCommandQueue, fID, 1, 0, fSize, fData, 0, nil, nil) <> 0 then
    EOpenCLError.RaiseException('Failed to write buffer');
end;

procedure TOpenCLBuffer.SyncFromDevice;
begin
  if (fData = nil) or (fID = nil) or (fMode = bmReadOnly) then exit;

  if clEnqueueReadBuffer(fProgram.fCommandQueue, fID, 1, 0, fSize, fData, 0, nil, nil) <> 0 then
    EOpenCLError.RaiseException('Failed to read buffer');
end;
{$ENDREGION}

{$REGION 'TOpenCLBitmap'}
procedure TOpenCLBitmap.SetWidth;
begin
  Resize(Value, fHeight);
end;

procedure TOpenCLBitmap.SetHeight;
begin
  Resize(fWidth, Value);
end;

constructor TOpenCLBitmap.Create;
begin
  inherited Create;

  fProgram := AProgram;
  fMode    := AMode;
  fID      := nil;

  Resize(AWidth, AHeight);
end;

destructor TOpenCLBitmap.Destroy;
begin
  Resize(0, 0);

  inherited;
end;

procedure TOpenCLBitmap.Resize;
var
  Error:  LongInt;
begin
  if fID <> nil then clReleaseMemObject(fID);
  fID := nil;

  fWidth  := AWidth;
  fHeight := AHeight;

  if (AWidth = 0) or (AHeight = 0) then exit;

  fID := clCreateImage2D(fProgram.fContext, ord(fMode), @FormatRGBA, fWidth, fHeight, 0, nil, @Error);
  if fID = nil then EOpenCLError.RaiseException('Failed to create image (' + String.Int(Error) + ')');
end;

procedure TOpenCLBitmap.ReadFromDevice;
const
  Origin: array[0..2] of LongWord = (0, 0, 0);
var
  Region: array[0..2] of LongWord;
begin
  if (fID = nil) or (fWidth <> Bitmap.Width) or (fHeight <> Bitmap.Height) then exit;

  Region[0] := fWidth;
  Region[1] := fHeight;
  Region[2] := 1;

  if clEnqueueReadImage(fProgram.fCommandQueue, fID, 1, @Origin[0], @Region[0], 0, 0, Bitmap.BitmapData.Scan0, 0, nil, nil) <> 0 then
    EOpenCLError.RaiseException('Failed to read image');
end;

procedure TOpenCLBitmap.WriteToDevice;
const
  Origin: array[0..2] of LongWord = (0, 0, 0);
var
  Region: array[0..2] of LongWord;
begin
  if (fID = nil) or (fWidth <> Bitmap.Width) or (fHeight <> Bitmap.Height) then exit;

  Region[0] := fWidth;
  Region[1] := fHeight;
  Region[2] := 1;

  if clEnqueueWriteImage(fProgram.fCommandQueue, fID, 1, @Origin[0], @Region[0], 0, 0, Bitmap.BitmapData.Scan0, 0, nil, nil) <> 0 then
    EOpenCLError.RaiseException('Failed to write image');
end;
{$ENDREGION}

end.