Skip to content

[QUESTION]: <title>value is invalid #1328

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
l278943941 opened this issue May 9, 2025 · 3 comments
Open

[QUESTION]: <title>value is invalid #1328

l278943941 opened this issue May 9, 2025 · 3 comments

Comments

@l278943941
Copy link

l278943941 commented May 9, 2025

Question

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Runtime.CompilerServices;
using System.Text;
using System.Threading.Tasks;
using ILGPU;
using ILGPU.Runtime;
using ILGPU.Runtime.Cuda;
using ILGPU.Runtime.OpenCL;
using ILGPU.Util;
using ILGPU.Algorithms;
using Microsoft.VisualBasic;
using System.IO;
using ILGPU.Runtime.CPU;

namespace SharpOsci
{
internal class IlgpuRender
{
Context context;
Accelerator accelerator;

    MemoryBuffer1D<float, Stride1D.Dense> osciScreen;
    MemoryBuffer1D<byte, Stride1D.Dense> osScreenImg;
    MemoryBuffer1D<int, Stride1D.Dense> Setings;
    MemoryBuffer1D<byte, Stride1D.Dense> pointdata;

    Action<Index1D, ArrayView<int>, ArrayView<float>> Kernel_Decayi;
    Action<Index1D, ArrayView<byte>, ArrayView<int>, ArrayView<float>,int> Kernel_osci;
    Action<Index2D, ArrayView<float>, ArrayView<byte>> Kernel_img;

    byte[] bytes;
    int imagewidth = 800;
    int imageheight = 600;
    bool isirending= false;
    bool isseting= false;
    int currentframe = 0;
    int currentlen = 0;
    int[] darwSet;
    byte[] image;
    Random Random = new Random((int)DateTime.Now.Ticks);
    public IlgpuRender()
    {
        context = Context.Create(builder =>
        {
            builder.Default(); // Use default configuration
            builder.EnableAlgorithms(); // Critical: Enable algorithm library
        });
        accelerator = context.CreateCudaAccelerator(0);

        Debug.WriteLine(accelerator);

        osciScreen = accelerator.Allocate1D<float>(4096 * 4096 * 4);
        osScreenImg = accelerator.Allocate1D<byte>(800 * 600 * 4);
        pointdata = accelerator.Allocate1D<byte>(500000);
        Setings = accelerator.Allocate1D<int>(11);

        Kernel_Decayi = accelerator.LoadAutoGroupedStreamKernel<
            Index1D,
            ArrayView<int>,
            ArrayView<float>>(Kernel_osc_Decayi);

        Kernel_osci = accelerator.LoadAutoGroupedStreamKernel<
            Index1D,
            ArrayView<byte>,
            ArrayView<int>,
            ArrayView<float>,
            int>(Kernel_emu_osci);

        Kernel_img = accelerator.LoadAutoGroupedStreamKernel<
            Index2D,
            ArrayView<float>,
            ArrayView<byte>>(Kernel_full_img);

        currentlen = 3200; currentframe = 0;

        bytes = new byte[6400 * 2 * 3];
        image = new byte[800 * 600 * 4];
    }

    public void Render(int len=3200)
    {
        while (isseting)
        {
            Thread.Sleep(1);
        }
        
        Stopwatch sw = Stopwatch.StartNew();

        // Transfer data to GPU
        pointdata.View.SubView(0, len).CopyFromCPU(bytes);
        Setings.View.CopyFromCPU(darwSet);
        isirending = true;
        Kernel_Decayi(10, Setings.View, osciScreen.View);
        accelerator.Synchronize();
        Setings.View.CopyFromCPU(darwSet);
        Kernel_osci(20, pointdata.View, Setings.View, osciScreen.View, Random.Next());
        accelerator.Synchronize();
        Kernel_img(new Index2D(imagewidth, imageheight), osciScreen.View, osScreenImg.View);
        accelerator.Synchronize();
        isirending = false;
        
        sw.Stop();
        Debug.WriteLine("GPU Time Cost:"+ sw.ElapsedMilliseconds.ToString());

        var result = osScreenImg.GetAsArray1D();
        currentframe++;
    }

    public void Dispose()
    {
        osciScreen.Dispose();
        osScreenImg.Dispose();
        pointdata.Dispose();
        Setings.Dispose();
        accelerator.Dispose();
        context.Dispose();
    }

    public void run()
    {
        Thread thread = new Thread(()=>{
            currentframe = 0;
            while (true)
            {
                Render(currentlen);
                Thread.Sleep(16);
            }
        });
        thread.Start();
    }

    public void SetData(byte[] data,int len)
    {
        bytes = data;
        currentlen = data.Length;
    }

    public void SetImageSize(int width, int height)
    {
        while (isirending)
        {
            Thread.Sleep(1);
        }
        isseting = true;
        imagewidth = width;
        imageheight = height;
        osScreenImg.Dispose();
        osScreenImg = accelerator.Allocate1D<byte>(imagewidth * imageheight * 4);
        isseting = false;
    }

    public void setSetings(darwSetings setings)
    {
        while (isirending)
        {
            Thread.Sleep(0);
        }
        isseting = true;
        darwSet = new int[11] {
            (int)setings.ImageHeight,
            (int)setings.ImageWidth,
            (int)setings._bufferStride,
            (int) setings.PEN_COLOR_R,
            (int)setings.PEN_COLOR_G,
            (int)setings.PEN_COLOR_B,
            (int)setings.PEN_WIDTH,
            (int)setings.TAU,
            (int)(setings.sigma*1000),
            (int)setings.miuscBitDepth,
            (int) currentlen
        };
        Setings.View.CopyFromCPU(darwSet);
        isseting = false;
    }

    public struct XorShift128
    {
        private uint _state0, _state1, _state2, _state3;

        public XorShift128(uint seed)
        {
            _state0 = (uint)(seed + 1);
            _state1 = (uint)(seed + 2);
            _state2 = (uint)(seed + 3);
            _state3 = (uint)(seed + 4);
            // Initialize states (must be non-zero)
            if (_state0 == 0) _state0 = 1;
            if (_state1 == 0) _state1 = 1;
            if (_state2 == 0) _state2 = 1;
            if (_state3 == 0) _state3 = 1;
        }

        public uint NextUInt()
        {
            uint t = _state3;
            t ^= t << 11;
            t ^= t >> 8;
            _state3 = _state2;
            _state2 = _state1;
            _state1 = _state0;
            t ^= _state0;
            t ^= _state0 >> 19;
            _state0 = t;
            return t;
        }

        public float NextFloat()
        {
            return NextUInt() / (float)uint.MaxValue; // [0,1)
        }
    }

    public static void Kernel_osc_Decayi(Index1D index, ArrayView<int> setings,ArrayView<float> screen) 
    {
        int TAU = setings[7];
        float brightness = (float)Math.Exp(-16.0f / TAU);
        int screenindex = (index * 4096 * 4);

        for (int i = 0; i < (4096 * 4); i += 4)
        {
            screen[screenindex + i] = screen[screenindex + i] < 1 ? 0 : screen[screenindex + i] * brightness;
            screen[screenindex + i + 1] = screen[screenindex + i + 1] < 1 ? 0 : screen[screenindex + i + 1] * brightness;
            screen[screenindex + i + 2] = screen[screenindex + i + 2] < 1 ? 0 : screen[screenindex + i + 2] * brightness;
        }
    }

    public static void Kernel_emu_osci(
        Index1D index, 
        ArrayView<byte> miuscData, 
        ArrayView<int> setings, 
        ArrayView<float> screen,
        int seed=278943941)
    {
        int miuscBitDepth = setings[9];
        int len = setings[10];

        if (index >= len) 
            return;

        int stride = (2048 * 4);
        int PEN_COLOR_R = setings[3];
        int PEN_COLOR_G = setings[4];
        int PEN_COLOR_B = setings[5];
        int PEN_WIDTH = setings[6];
        int TAU = setings[7];
        float sigma = setings[8] / 1000;
        float pathLiting = 3.0f;
        float pointliting = 0.04f;

        float ftameTime = 16.0f;
        float pointTilme = (float)ftameTime / len; // pointTilme is invalid because len is invalid
        float decay_time = (index * pointTilme); // index is invalid here, pointTilme is also invalid
        float decay = (float)Math.Exp(-decay_time / TAU); // decay is invalid due to previous calculation

/*"I might have misidentified the error. The values of index and len are not invalid, but Math.Exp seems to cause issues. When I modify the three lines above to:

float pointTilme = (float)ftameTime / 3200;
float decay_time = (200 * pointTilme);
float decay = (float)Math.Exp(-decay_time / 10);
the program doesn’t crash. I’m unclear why this fixes the issue."*/

        int originX = 4096 / 2;
        int originY = 4096 / 2;

        float scalex = originX * 0.95f;
        float scaley = originY * 0.95f;

        float lastx = 0;
        float lasty = 0;

        int i = index * miuscBitDepth * 2;

        float x = 0;
        float y = 0;
        switch (miuscBitDepth)
        {
            case 3:
                x = ((miuscData[i] << 8 | miuscData[i + 1] << 16 | miuscData[i + 2] << 24) >> 8) / 8388608f;
                y = ((miuscData[i + 3] << 8 | miuscData[i + 4] << 16 | miuscData[i + 5] << 24) >> 8) / 8388608f;
                lastx = i == 0 ? 0 : ((miuscData[i - 6] << 8 | miuscData[i - 5] << 16 | miuscData[i - 4] << 24) >> 8) / 8388608f;
                lasty = i == 0 ? 0 : ((miuscData[i - 3] << 8 | miuscData[i - 2] << 16 | miuscData[i - 4] << 24) >> 8) / 8388608f;
                break;
            default:
                break;
        }

        int[] planxs = new int[81];
        int[] planxy = new int[81];
        float[] planatten = new float[81];
        int planlen = 0;

        for (int i1 = -(PEN_WIDTH / 2); i1 < PEN_WIDTH / 2; i1++)
        {
            for (int j = -(PEN_WIDTH / 2); j < PEN_WIDTH / 2; j++)
            {
                float normeist = (float)Math.Sqrt(i1 * i1 + j * j);
                if (normeist <= PEN_WIDTH / 2)
                {
                    planxs[planlen] = j;
                    planxy[planlen] = i1;
                    planatten[planlen] = (float)Math.Exp(-(normeist * normeist) / (2.0f * sigma * sigma));
                    planlen++;
                }
            }
        }

        x = x * scalex + originX;
        y = y * scaley + originY;

        lastx = lastx * scalex + originX;
        lasty = lasty * scaley + originY;

        float pathlen = (float)Math.Sqrt((x - lastx) * (x - lastx) + (y - lasty) * (y - lasty));
        float lenadd = 1.0f / pathlen;
        float atten = decay * pathLiting * lenadd;

        int lastdarwx = -1;
        int lastdarwy = -1;
        for (float path_i = 0; path_i < pathlen; path_i++)
        {
            int posix = (int)(lastx + (x - lastx) * path_i * lenadd);
            int posiy = (int)(lasty + (y - lasty) * path_i * lenadd);
            if (posix == lastdarwx && posiy == lastdarwy)
                continue;
            else
            {
                lastdarwx = posix;
                lastdarwy = posiy;
            }
            for (int plan_i = 0; plan_i < planlen; plan_i++)
            {
                int darwx = posix + planxs[plan_i];
                int darwy = posiy + planxy[plan_i];
                float darwatten = planatten[plan_i];
                if (darwx < 0 || darwx >= 4096 || darwy < 0 || darwy >= 4096)
                    continue;
                int screenindex = ((darwy * (4096 * 4)) + (darwx * 4));
                if (screenindex < 0 || screenindex >= 67108860)
                    continue;

                // Error occurs here because decay is invalid
                float temp = decay;
                float p = temp < 255f ? 255f : temp; 
            }
        }
    }

    public static void Kernel_full_img(Index2D index, ArrayView<float> screen, ArrayView<byte> retimg)
    {
        // Image processing logic to be implemented
    }
}

}

Please help me

Environment

  • ILGPU version: [1.5.2]
  • .NET version: [ .NET 8]
  • Operating system: [Windows 10]
  • Hardware : [NVIDIA GeForce GTX 1050]

Additional context

No response

@l278943941
Copy link
Author

I encountered illegal memory access errors in CUDA kernels when using Math.Exp with certain inputs. After replacing Math.Exp with a custom implementation that explicitly handles edge cases, the errors disappeared. This suggests that the ILGPU implementation of Math.Exp may not properly handle invalid inputs (e.g., extremely large/small values leading to NaNs or Infinity). public static float FastExp(float x)
{
// 1. Manually clamp input range to prevent overflow
const float MAX_INPUT = 88.0f;
const float MIN_INPUT = -88.0f;
if (x > MAX_INPUT) return float.MaxValue;
if (x < MIN_INPUT) return 0.0f;

// 2. Compute integer and fractional parts without Math.Floor
int n = (x >= 0 || x == (int)x) ? (int)x : (int)x - 1;
float r = x - n;

// 3. Polynomial approximation for e^r
float er = 1.0f + r * (1.0f + r * (0.5f + r * (0.1666667f + r * 0.041666667f)));

// 4. Fast exponentiation via bit operations
float en = 1.0f;
if (n != 0)
{
    uint exponent = (uint)(n > 0 ? n : -n);
    float baseE = 2.718281828459045f;
    float result = 1.0f;
    while (exponent > 0)
    {
        if ((exponent & 1) == 1)
            result *= baseE;
        baseE *= baseE;
        exponent >>= 1;
    }
    en = (n > 0) ? result : 1.0f / result;
}

return en * er;

}

@MoFtZ
Copy link
Collaborator

MoFtZ commented May 14, 2025

hi @l278943941

The code you provided does not compile - it missing some definitions.
And even if I got it to compile, it would require additional changes, such as adding a Main method.

Are you able to provide a smaller sample project that reproduces the issue?

Thanks.

@l278943941
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

2 participants