You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Runtime.CompilerServices;
using System.Text;
using System.Threading.Tasks;
using ILGPU;
using ILGPU.Runtime;
using ILGPU.Runtime.Cuda;
using ILGPU.Runtime.OpenCL;
using ILGPU.Util;
using ILGPU.Algorithms;
using Microsoft.VisualBasic;
using System.IO;
using ILGPU.Runtime.CPU;
MemoryBuffer1D<float, Stride1D.Dense> osciScreen;
MemoryBuffer1D<byte, Stride1D.Dense> osScreenImg;
MemoryBuffer1D<int, Stride1D.Dense> Setings;
MemoryBuffer1D<byte, Stride1D.Dense> pointdata;
Action<Index1D, ArrayView<int>, ArrayView<float>> Kernel_Decayi;
Action<Index1D, ArrayView<byte>, ArrayView<int>, ArrayView<float>,int> Kernel_osci;
Action<Index2D, ArrayView<float>, ArrayView<byte>> Kernel_img;
byte[] bytes;
int imagewidth = 800;
int imageheight = 600;
bool isirending= false;
bool isseting= false;
int currentframe = 0;
int currentlen = 0;
int[] darwSet;
byte[] image;
Random Random = new Random((int)DateTime.Now.Ticks);
public IlgpuRender()
{
context = Context.Create(builder =>
{
builder.Default(); // Use default configuration
builder.EnableAlgorithms(); // Critical: Enable algorithm library
});
accelerator = context.CreateCudaAccelerator(0);
Debug.WriteLine(accelerator);
osciScreen = accelerator.Allocate1D<float>(4096 * 4096 * 4);
osScreenImg = accelerator.Allocate1D<byte>(800 * 600 * 4);
pointdata = accelerator.Allocate1D<byte>(500000);
Setings = accelerator.Allocate1D<int>(11);
Kernel_Decayi = accelerator.LoadAutoGroupedStreamKernel<
Index1D,
ArrayView<int>,
ArrayView<float>>(Kernel_osc_Decayi);
Kernel_osci = accelerator.LoadAutoGroupedStreamKernel<
Index1D,
ArrayView<byte>,
ArrayView<int>,
ArrayView<float>,
int>(Kernel_emu_osci);
Kernel_img = accelerator.LoadAutoGroupedStreamKernel<
Index2D,
ArrayView<float>,
ArrayView<byte>>(Kernel_full_img);
currentlen = 3200; currentframe = 0;
bytes = new byte[6400 * 2 * 3];
image = new byte[800 * 600 * 4];
}
public void Render(int len=3200)
{
while (isseting)
{
Thread.Sleep(1);
}
Stopwatch sw = Stopwatch.StartNew();
// Transfer data to GPU
pointdata.View.SubView(0, len).CopyFromCPU(bytes);
Setings.View.CopyFromCPU(darwSet);
isirending = true;
Kernel_Decayi(10, Setings.View, osciScreen.View);
accelerator.Synchronize();
Setings.View.CopyFromCPU(darwSet);
Kernel_osci(20, pointdata.View, Setings.View, osciScreen.View, Random.Next());
accelerator.Synchronize();
Kernel_img(new Index2D(imagewidth, imageheight), osciScreen.View, osScreenImg.View);
accelerator.Synchronize();
isirending = false;
sw.Stop();
Debug.WriteLine("GPU Time Cost:"+ sw.ElapsedMilliseconds.ToString());
var result = osScreenImg.GetAsArray1D();
currentframe++;
}
public void Dispose()
{
osciScreen.Dispose();
osScreenImg.Dispose();
pointdata.Dispose();
Setings.Dispose();
accelerator.Dispose();
context.Dispose();
}
public void run()
{
Thread thread = new Thread(()=>{
currentframe = 0;
while (true)
{
Render(currentlen);
Thread.Sleep(16);
}
});
thread.Start();
}
public void SetData(byte[] data,int len)
{
bytes = data;
currentlen = data.Length;
}
public void SetImageSize(int width, int height)
{
while (isirending)
{
Thread.Sleep(1);
}
isseting = true;
imagewidth = width;
imageheight = height;
osScreenImg.Dispose();
osScreenImg = accelerator.Allocate1D<byte>(imagewidth * imageheight * 4);
isseting = false;
}
public void setSetings(darwSetings setings)
{
while (isirending)
{
Thread.Sleep(0);
}
isseting = true;
darwSet = new int[11] {
(int)setings.ImageHeight,
(int)setings.ImageWidth,
(int)setings._bufferStride,
(int) setings.PEN_COLOR_R,
(int)setings.PEN_COLOR_G,
(int)setings.PEN_COLOR_B,
(int)setings.PEN_WIDTH,
(int)setings.TAU,
(int)(setings.sigma*1000),
(int)setings.miuscBitDepth,
(int) currentlen
};
Setings.View.CopyFromCPU(darwSet);
isseting = false;
}
public struct XorShift128
{
private uint _state0, _state1, _state2, _state3;
public XorShift128(uint seed)
{
_state0 = (uint)(seed + 1);
_state1 = (uint)(seed + 2);
_state2 = (uint)(seed + 3);
_state3 = (uint)(seed + 4);
// Initialize states (must be non-zero)
if (_state0 == 0) _state0 = 1;
if (_state1 == 0) _state1 = 1;
if (_state2 == 0) _state2 = 1;
if (_state3 == 0) _state3 = 1;
}
public uint NextUInt()
{
uint t = _state3;
t ^= t << 11;
t ^= t >> 8;
_state3 = _state2;
_state2 = _state1;
_state1 = _state0;
t ^= _state0;
t ^= _state0 >> 19;
_state0 = t;
return t;
}
public float NextFloat()
{
return NextUInt() / (float)uint.MaxValue; // [0,1)
}
}
public static void Kernel_osc_Decayi(Index1D index, ArrayView<int> setings,ArrayView<float> screen)
{
int TAU = setings[7];
float brightness = (float)Math.Exp(-16.0f / TAU);
int screenindex = (index * 4096 * 4);
for (int i = 0; i < (4096 * 4); i += 4)
{
screen[screenindex + i] = screen[screenindex + i] < 1 ? 0 : screen[screenindex + i] * brightness;
screen[screenindex + i + 1] = screen[screenindex + i + 1] < 1 ? 0 : screen[screenindex + i + 1] * brightness;
screen[screenindex + i + 2] = screen[screenindex + i + 2] < 1 ? 0 : screen[screenindex + i + 2] * brightness;
}
}
public static void Kernel_emu_osci(
Index1D index,
ArrayView<byte> miuscData,
ArrayView<int> setings,
ArrayView<float> screen,
int seed=278943941)
{
int miuscBitDepth = setings[9];
int len = setings[10];
if (index >= len)
return;
int stride = (2048 * 4);
int PEN_COLOR_R = setings[3];
int PEN_COLOR_G = setings[4];
int PEN_COLOR_B = setings[5];
int PEN_WIDTH = setings[6];
int TAU = setings[7];
float sigma = setings[8] / 1000;
float pathLiting = 3.0f;
float pointliting = 0.04f;
float ftameTime = 16.0f;
float pointTilme = (float)ftameTime / len; // pointTilme is invalid because len is invalid
float decay_time = (index * pointTilme); // index is invalid here, pointTilme is also invalid
float decay = (float)Math.Exp(-decay_time / TAU); // decay is invalid due to previous calculation
/*"I might have misidentified the error. The values of index and len are not invalid, but Math.Exp seems to cause issues. When I modify the three lines above to:
float pointTilme = (float)ftameTime / 3200;
float decay_time = (200 * pointTilme);
float decay = (float)Math.Exp(-decay_time / 10);
the program doesn’t crash. I’m unclear why this fixes the issue."*/
I encountered illegal memory access errors in CUDA kernels when using Math.Exp with certain inputs. After replacing Math.Exp with a custom implementation that explicitly handles edge cases, the errors disappeared. This suggests that the ILGPU implementation of Math.Exp may not properly handle invalid inputs (e.g., extremely large/small values leading to NaNs or Infinity). public static float FastExp(float x)
{
// 1. Manually clamp input range to prevent overflow
const float MAX_INPUT = 88.0f;
const float MIN_INPUT = -88.0f;
if (x > MAX_INPUT) return float.MaxValue;
if (x < MIN_INPUT) return 0.0f;
// 2. Compute integer and fractional parts without Math.Floor
int n = (x >= 0 || x == (int)x) ? (int)x : (int)x - 1;
float r = x - n;
// 3. Polynomial approximation for e^r
float er = 1.0f + r * (1.0f + r * (0.5f + r * (0.1666667f + r * 0.041666667f)));
// 4. Fast exponentiation via bit operations
float en = 1.0f;
if (n != 0)
{
uint exponent = (uint)(n > 0 ? n : -n);
float baseE = 2.718281828459045f;
float result = 1.0f;
while (exponent > 0)
{
if ((exponent & 1) == 1)
result *= baseE;
baseE *= baseE;
exponent >>= 1;
}
en = (n > 0) ? result : 1.0f / result;
}
return en * er;
The code you provided does not compile - it missing some definitions.
And even if I got it to compile, it would require additional changes, such as adding a Main method.
Are you able to provide a smaller sample project that reproduces the issue?
Question
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Runtime.CompilerServices;
using System.Text;
using System.Threading.Tasks;
using ILGPU;
using ILGPU.Runtime;
using ILGPU.Runtime.Cuda;
using ILGPU.Runtime.OpenCL;
using ILGPU.Util;
using ILGPU.Algorithms;
using Microsoft.VisualBasic;
using System.IO;
using ILGPU.Runtime.CPU;
namespace SharpOsci
{
internal class IlgpuRender
{
Context context;
Accelerator accelerator;
/*"I might have misidentified the error. The values of index and len are not invalid, but Math.Exp seems to cause issues. When I modify the three lines above to:
float pointTilme = (float)ftameTime / 3200;
float decay_time = (200 * pointTilme);
float decay = (float)Math.Exp(-decay_time / 10);
the program doesn’t crash. I’m unclear why this fixes the issue."*/
}
Please help me
Environment
Additional context
No response
The text was updated successfully, but these errors were encountered: