FShade


Compute Shaders

FShade also provides an API for compute shaders which is not part of the effect/composition system. We opted for a very low abstraction here, since compute shaders are used vor various tasks and cannot easily be represented as pure functions. Here's a little example of a very basic compute shader adding two arrays/buffers.

1: 
2: 
3: 
4: 
5: 
6: 
[<LocalSize(X = 64)>]
let add (l : float32[]) (r : float32[]) =
    compute {
        let id = getGlobalId().X
        l.[id] <- l.[id] + r.[id]
    }

compute shaders can (like Effects) be compiled to a Module which in turn can be assembled to GLSL using the default pipeline. note that all array-inputs are translated to storage-buffers.

1: 
2: 
3: 
4: 
5: 
ComputeShader.ofFunction maxLocalSize add
    |> ComputeShader.toModule
    |> ModuleCompiler.compileGLSL430
    |> GLSL.code
    |> printfn "%s"
#version 440

layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
layout(std430,binding=0) buffer l_ssb  { float l[]; };
layout(std430,binding=1) buffer r_ssb  { float r[]; };
void main()
{
    int id = ivec3(gl_GlobalInvocationID).x;
    l[id] = (l[id] + r[id]);
}

Special functions

the compute API includes various special-functions like

  • getGlobalId()
  • getLocalId()
  • getWorkGoupId()

Here's a shader using some of those built-in functions.

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
23: 
24: 
25: 
26: 
27: 
28: 
29: 
// missing glsl builtins can be imported like this
[<GLSLIntrinsic("atomicAdd({0}, {1})")>]
let atomicAdd (r : ref<int>) (v : int) = onlyInShaderCode "atomicAdd"

[<LocalSize(X = 64)>]
let builtIns (a : int[]) (img : IntImage2d<Formats.r32i>) =
    compute {
        // note that the argument to allocateShared must be a compile-time constant
        let shared      = allocateShared<int> 64

        let globalId    = getGlobalId()
        let groupCount  = getWorkGroupCount()
        let groupSize   = getWorkGroupSize()
        let groupId     = getWorkGroupId()
        let localId     = getLocalId()
        let localIndex  = getLocalIndex()

        shared.[localId.X] <- a.[globalId.X]
        barrier()
        
        img.AtomicAdd(globalId.XY, 10) |> ignore
        
        atomicAdd &&a.[globalId.X] 10

        a.[globalId.X] <- 123

    }
    
ComputeShader.printGLSL builtIns
#version 440

layout(binding = 0, r32i)
uniform iimage2D cs_img;

shared int shared_intx64[64];

layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
layout(std430,binding=0) buffer a_ssb  { int a[]; };
void main()
{
    ivec3 globalId = ivec3(gl_GlobalInvocationID);
    ivec3 groupCount = ivec3(gl_NumWorkGroups);
    ivec3 groupSize = ivec3(gl_WorkGroupSize);
    ivec3 groupId = ivec3(gl_WorkGroupID);
    ivec3 localId = ivec3(gl_LocalInvocationID);
    int localIndex = int(gl_LocalInvocationIndex);
    shared_intx64[localId.x] = a[globalId.x];
    barrier();
    imageAtomicAdd(cs_img, globalId.xy, 10);
    atomicAdd(a[globalId.x], 10);
    a[globalId.x] = 123;
}

Optimization

Due to the imperative nature of compute shaders we turned off several optimizations in the compiler by default. For example reordering statements may actually change the shader's results here. Our optimizations (designed for regular shaders) currently assume that shaders are functionally pure. We'll fix that in future FShade releases, but for the moment users need to perform some optimizations by hand.

namespace Microsoft
namespace Microsoft.FSharp
namespace Microsoft.FSharp.Quotations
namespace Aardvark
namespace Aardvark.Base
namespace FShade
namespace FShade.Imperative
module Utilities
val maxLocalSize : V3i
Multiple items
type V3i =
  struct
    new : v:int -> V3i + 25 overloads
    val X : int
    val Y : int
    val Z : int
    member Abs : V3i
    member AllDifferent : v:V3i -> bool + 1 overload
    member AllEqual : v:V3i -> bool + 1 overload
    member AllGreater : v:V3i -> bool + 1 overload
    member AllGreaterOrEqual : v:V3i -> bool + 1 overload
    member AllSmaller : v:V3i -> bool + 1 overload
    ...
  end

--------------------
V3i ()
   (+0 other overloads)
V3i(v: int) : V3i
   (+0 other overloads)
V3i(a: int []) : V3i
   (+0 other overloads)
V3i(v: int64) : V3i
   (+0 other overloads)
V3i(a: int64 []) : V3i
   (+0 other overloads)
V3i(v: float32) : V3i
   (+0 other overloads)
V3i(a: float32 []) : V3i
   (+0 other overloads)
V3i(v: float) : V3i
   (+0 other overloads)
V3i(a: float []) : V3i
   (+0 other overloads)
V3i(index_fun: System.Func<int,int>) : V3i
   (+0 other overloads)
Multiple items
val LocalSize : V3i

--------------------
type LocalSizeAttribute =
  inherit Attribute
  new : unit -> LocalSizeAttribute
  override ToString : unit -> string
  member X : int
  member Y : int
  member Z : int
  member X : int with set
  member Y : int with set
  member Z : int with set

--------------------
new : unit -> LocalSizeAttribute
property LocalSizeAttribute.X: int
val add : l:float32 [] -> r:float32 [] -> Expr<unit>
val l : float32 []
Multiple items
val float32 : value:'T -> float32 (requires member op_Explicit)

--------------------
type float32 = System.Single

--------------------
type float32<'Measure> = float32
val r : float32 []
val compute : ComputeBuilder
val id : int
val getGlobalId : unit -> V3i
field V3i.X: int
Multiple items
module ComputeShader

from Utilities

--------------------
module ComputeShader

from FShade

--------------------
type ComputeShader =
  private new : id:string * method:MethodBase * localSize:V3i * data:Lazy<ComputeShaderData> -> ComputeShader
  member csBody : Expr
  member csBuffers : Map<string,ComputeBuffer>
  member csId : string
  member csImages : Map<string,ComputeImage>
  member csLocalSize : V3i
  member csMethod : MethodBase
  member csSamplerStates : Map<(string * int),SamplerState>
  member csShared : Map<string,(Type * int)>
  member csTextureNames : Map<(string * int),string>
  ...
val ofFunction : maxLocalSize:V3i -> f:('a -> 'b) -> ComputeShader
val toModule : shader:ComputeShader -> Module
Multiple items
module ModuleCompiler

from Utilities

--------------------
module ModuleCompiler

from FShade.Imperative

--------------------
module ModuleCompiler

from FShade.SpirV Extensions

--------------------
module ModuleCompiler

from FShade.Backends
val compileGLSL430 : module_:Module -> GLSL.GLSLShader
Multiple items
module GLSL

from Utilities

--------------------
namespace FShade.GLSL
val code : glsl:GLSL.GLSLShader -> string
val printfn : format:Printf.TextWriterFormat<'T> -> 'T
Multiple items
type GLSLIntrinsicAttribute =
  inherit IntrinsicAttribute
  new : format:string -> GLSLIntrinsicAttribute
  new : format:string * [<ParamArray>] requiredExtensions:string [] -> GLSLIntrinsicAttribute
  private new : format:string * requiredExtensions:Set<string> -> GLSLIntrinsicAttribute
  override Intrinsic : CIntrinsic

--------------------
new : format:string -> GLSLIntrinsicAttribute
new : format:string * [<System.ParamArray>] requiredExtensions:string [] -> GLSLIntrinsicAttribute
val atomicAdd : r:int ref -> v:int -> 'a
val r : int ref
Multiple items
val ref : value:'T -> 'T ref

--------------------
type 'T ref = Ref<'T>
Multiple items
val int : value:'T -> int (requires member op_Explicit)

--------------------
type int = int32

--------------------
type int<'Measure> = int
val v : int
val onlyInShaderCode : name:string -> 'a
val builtIns : a:int [] -> img:IntImage2d<Formats.r32i> -> Expr<unit>
val a : int []
val img : IntImage2d<Formats.r32i>
Multiple items
type IntImage2d<'f (requires 'f :> ISignedFormat)> =
  interface IImage
  new : unit -> IntImage2d<'f>
  member AtomicAdd : coord:V2i * data:int -> int
  member AtomicAnd : coord:V2i * data:int -> int
  member AtomicCompareExchange : coord:V2i * cmp:int * data:int -> int
  member AtomicExchange : coord:V2i * data:int -> int
  member AtomicMax : coord:V2i * data:int -> int
  member AtomicMin : coord:V2i * data:int -> int
  member AtomicOr : coord:V2i * data:int -> int
  member AtomicXor : coord:V2i * data:int -> int
  ...

--------------------
new : unit -> IntImage2d<'f>
module Formats

from FShade
Multiple items
type r32i =
  interface ISignedFormat
  new : unit -> r32i

--------------------
new : unit -> Formats.r32i
val shared : int []
val allocateShared : size:int -> 'a [] (requires unmanaged)
val globalId : V3i
val groupCount : V3i
val getWorkGroupCount : unit -> V3i
val groupSize : V3i
val getWorkGroupSize : unit -> V3i
val groupId : V3i
val getWorkGroupId : unit -> V3i
val localId : V3i
val getLocalId : unit -> V3i
val localIndex : int
val getLocalIndex : unit -> int
val barrier : unit -> unit
val ignore : value:'T -> unit
val printGLSL : f:('a -> 'b) -> unit
Fork me on GitHub