#version 460

#extension GL_NV_gpu_shader5 : require
//#extension GL_NV_gpu_program5_mem_extended  : require
//#extension GL_NV_gpu_program5  : require
//#extension GL_NV_shader_buffer_load : require
//#extension GL_NV_shader_buffer_store : require
//#extension GL_EXT_bindable_uniform : require
#extension GL_ARB_bindless_texture : require
#extension GL_NV_shader_texture_footprint : disable
#extension GL_EXT_shader_image_load_formatted : require
#extension GL_NV_shader_atomic_float : enable
//#extension GL_EXT_shader_atomic_float2 : require




			
#define MAX_IMG_COUNT 256
#define COHERENT  
//#define COHERENT  

			



#define U gl_FragCoord.xy



layout(std430, binding = 0) buffer GGG {
    sampler2D textures[MAX_IMG_COUNT];
    COHERENT image2D f_images[MAX_IMG_COUNT];
    mat4 V;
    mat4 P;
    mat4 V_prev;
    mat4 P_prev;
    mat4 V_inv;
    mat4 P_inv;
    vec3 cam_pos;
    float dT;
    vec3 cam_dir;
    float cam_fov;

    vec2 curr_jitt;
    vec2 prev_jitt;
    vec2 R;
    vec2 mouse_ndc;
    vec2 mouse_uv;

    float T;
    float cam_near;
    float cam_far;
    float ambient_light_intensity;
    uint F;
    bool lmb_down;

    bool lmb_just_pressed;
    bool rmb_down;
    bool rmb_just_pressed;
    bool mmb_just_pressed;
//    bool padasdgaaaaaaa;

//    int* amogu;
    int* indices_buff;
    vec3* verts_buff;
    vec3* normals_buff;
    vec2* texcoords_buff;
};

layout(std430, binding = 0) buffer asdg {
    usampler2D textures_u[MAX_IMG_COUNT];
    layout(r32ui) COHERENT uimage2D u32_images[MAX_IMG_COUNT];
};

layout(std430, binding = 0) buffer aasdgdasggggsdg {
    usampler2D DELETEME3[MAX_IMG_COUNT];
    layout(r32i) COHERENT iimage2D i32r_images[MAX_IMG_COUNT];
};

layout(std430, binding = 0) buffer asdggggg {
    usampler2D DELETEME2[MAX_IMG_COUNT];
    layout(r32f) COHERENT image2D f32r_images[MAX_IMG_COUNT];
};

layout(std430, binding = 0) buffer gdsasdggggg {
    usampler2D DELETEME5123[MAX_IMG_COUNT];
    layout(r16f) COHERENT image2D f16r_images[MAX_IMG_COUNT];
};

layout(std430, binding = 0) buffer asdga {
    samplerCube textures_cube[MAX_IMG_COUNT];
    layout(rgba32f) COHERENT image3D f_images_3d[MAX_IMG_COUNT];
};
layout(std430, binding = 0) buffer asdgaasd {
    sampler3D textures_3d[MAX_IMG_COUNT];
    layout(rgba16f) COHERENT image2D f16_images[MAX_IMG_COUNT];
};
layout(std430, binding = 0) buffer aaaasdgaasd {
    sampler3D DELETEME[MAX_IMG_COUNT];
    layout(rgba16f) COHERENT image3D f16_images_3d[MAX_IMG_COUNT];
};
layout(std430, binding = 0) buffer aaaasdgaasdgasd {
    sampler3D DELETEMEaa[MAX_IMG_COUNT];
    layout(rgba16ui) COHERENT uimage2D rgba16ui_images[MAX_IMG_COUNT];
};

//layout(std430, binding = 0) buffer aasdgasdgaaasdgaasdgasd {
//    sampler3D DELETEMEaaaa[64];
//    layout(rgba8) COHERENT image2D rgba8_images[64];
//};



#define print_idx F%10

#define MAX_PRINT_VALS 150

struct DbgPrintVal{
    uint type;
    uint data[16];
};
struct DbgPrintBuff{
    uint cnt;
    DbgPrintVal dbg_print_values[MAX_PRINT_VALS];
};

layout(std430, binding = 1) COHERENT restrict buffer bbb {
    DbgPrintBuff dbg_print_buff[10];
};

#define LIGHT_TYPE_DIRECTIONAL 0
#define LIGHT_TYPE_POINT 1


struct Light{
    vec3 pos;
    int type;
    float near;
    float far;
    vec3 col;
    vec3 dir;
    mat4 view_mat;
    mat4 proj_mat;
    mat4 view_mats_point[6];
};

layout(std430, binding = 2) COHERENT restrict buffer gsdlights {
    int light_cnt;
    float pad_a;
    float pad_b;
    float pad_c;
    int light_textures[64];
    Light lights[64];
};

//asdg
#define dbg_frag(u) all(equal(uvec2(u), uvec2(U)))
#define dbg_comp(u) all(equal(uvec2(u), gl_GlobalInvocationID.xy))
#define dbg_comp_3d(u) all(equal(uvec3(u), gl_GlobalInvocationID.xyz))


//10 * (150*(17) + 1) * 4


void print(float f){
    if(atomicAdd(dbg_print_buff[print_idx].cnt, 0) >= MAX_PRINT_VALS){ return; }
    uint idx = atomicAdd(dbg_print_buff[print_idx].cnt, 1);
    int tid = 0;
    dbg_print_buff[print_idx].dbg_print_values[idx].type = 0;
    dbg_print_buff[print_idx].dbg_print_values[idx].data[tid++] = floatBitsToUint(f);
}
void print(vec2 f){
    if(
        atomicAdd(dbg_print_buff[print_idx].cnt, 0) >= MAX_PRINT_VALS){
        return;
    }
    int tid = 0;
    uint idx = atomicAdd(dbg_print_buff[print_idx].cnt, 1);
    dbg_print_buff[print_idx].dbg_print_values[idx].type = 1;
    for(int i = 0; i < 2; i++){
        dbg_print_buff[print_idx].dbg_print_values[idx].data[tid++] = floatBitsToUint(f[i]);
    }
}
void print(vec3 f){
    if(atomicAdd(dbg_print_buff[print_idx].cnt, 0) >= MAX_PRINT_VALS){ return; }
    int tid = 0;
    uint idx = atomicAdd(dbg_print_buff[print_idx].cnt, 1);
    dbg_print_buff[print_idx].dbg_print_values[idx].type = 2;
    for(int i = 0; i < 3; i++){
        dbg_print_buff[print_idx].dbg_print_values[idx].data[tid++] = floatBitsToUint(f[i]);
    }
}
void print(vec4 f){
    if(atomicAdd(dbg_print_buff[print_idx].cnt, 0) >= MAX_PRINT_VALS){
        return;
    }
    int tid = 0;
    uint idx = atomicAdd(dbg_print_buff[print_idx].cnt, 1);
    dbg_print_buff[print_idx].dbg_print_values[idx].type = 3;
    for(int i = 0; i < 4; i++){
        dbg_print_buff[print_idx].dbg_print_values[idx].data[tid++] = floatBitsToUint(f[i]);
    }
}

void print(mat4 f){
    if(atomicAdd(dbg_print_buff[print_idx].cnt, 0) >= MAX_PRINT_VALS){ return; }
    int tid = 0;
    uint idx = atomicAdd(dbg_print_buff[print_idx].cnt, 1);
    dbg_print_buff[print_idx].dbg_print_values[idx].type = 4;
    for(int k = 0; k < 4; k++){
        for(int i = 0; i < 4; i++){
            dbg_print_buff[print_idx].dbg_print_values[idx].data[tid++] = floatBitsToUint(f[k][i]);
        }
    }
}
void print(mat3 f){
    if(atomicAdd(dbg_print_buff[print_idx].cnt, 0) >= MAX_PRINT_VALS){ return; }
    int tid = 0;
    uint idx = atomicAdd(dbg_print_buff[print_idx].cnt, 1);
    dbg_print_buff[print_idx].dbg_print_values[idx].type = 5;
    for(int k = 0; k < 3; k++){
        for(int i = 0; i < 3; i++){
            dbg_print_buff[print_idx].dbg_print_values[idx].data[tid++] = floatBitsToUint(f[k][i]);
        }
    }
}
void print(mat2 f){
    if(atomicAdd(dbg_print_buff[print_idx].cnt, 0) >= MAX_PRINT_VALS){ return; }
    int tid = 0;
    uint idx = atomicAdd(dbg_print_buff[print_idx].cnt, 1);
    dbg_print_buff[print_idx].dbg_print_values[idx].type = 6;
    for(int k = 0; k < 2; k++){
        for(int i = 0; i < 2; i++){
            dbg_print_buff[print_idx].dbg_print_values[idx].data[tid++] = floatBitsToUint(f[k][i]);
        }
    }
}
void print(uint f){
    if(atomicAdd(dbg_print_buff[print_idx].cnt, 0) >= MAX_PRINT_VALS){ return; }
    int tid = 0;
    uint idx = atomicAdd(dbg_print_buff[print_idx].cnt, 1);
    dbg_print_buff[print_idx].dbg_print_values[idx].type = 7;
    dbg_print_buff[print_idx].dbg_print_values[idx].data[tid++] = f;
}
void print(int f){
    if(atomicAdd(dbg_print_buff[print_idx].cnt, 0) >= MAX_PRINT_VALS){ return; }
    int tid = 0;
    uint idx = atomicAdd(dbg_print_buff[print_idx].cnt, 1);
    dbg_print_buff[print_idx].dbg_print_values[idx].type = 8;
    dbg_print_buff[print_idx].dbg_print_values[idx].data[tid++] = uint(f);
}

void print(uvec2 f){
    if(atomicAdd(dbg_print_buff[print_idx].cnt, 0) >= MAX_PRINT_VALS){ return; }
    int tid = 0;
    uint idx = atomicAdd(dbg_print_buff[print_idx].cnt, 1);
    dbg_print_buff[print_idx].dbg_print_values[idx].type = 9;
    for(int i = 0; i < 2; i++){
        dbg_print_buff[print_idx].dbg_print_values[idx].data[tid++] = f[i];
    }
}
void print(uvec3 f){
    if(atomicAdd(dbg_print_buff[print_idx].cnt, 0) >= MAX_PRINT_VALS){ return; }
    int tid = 0;
    uint idx = atomicAdd(dbg_print_buff[print_idx].cnt, 1);
    dbg_print_buff[print_idx].dbg_print_values[idx].type = 10;
    for(int i = 0; i < 3; i++){
        dbg_print_buff[print_idx].dbg_print_values[idx].data[tid++] = f[i];
    }
}
void print(uvec4 f){
    if(atomicAdd(dbg_print_buff[print_idx].cnt, 0) >= MAX_PRINT_VALS){ return; }
    int tid = 0;
    uint idx = atomicAdd(dbg_print_buff[print_idx].cnt, 1);
    dbg_print_buff[print_idx].dbg_print_values[idx].type = 11;
    for(int i = 0; i < 4; i++){
        dbg_print_buff[print_idx].dbg_print_values[idx].data[tid++] = f[i];
    }
}

void print(ivec2 f){
    if(atomicAdd(dbg_print_buff[print_idx].cnt, 0) >= MAX_PRINT_VALS){ return; }
    int tid = 0;
    uint idx = atomicAdd(dbg_print_buff[print_idx].cnt, 1);
    dbg_print_buff[print_idx].dbg_print_values[idx].type = 12;
    for(int i = 0; i < 2; i++){
        dbg_print_buff[print_idx].dbg_print_values[idx].data[tid++] = uint(f[i]);
    }
}
void print(ivec3 f){
    if(atomicAdd(dbg_print_buff[print_idx].cnt, 0) >= MAX_PRINT_VALS){ return; }
    int tid = 0;
    uint idx = atomicAdd(dbg_print_buff[print_idx].cnt, 1);
    dbg_print_buff[print_idx].dbg_print_values[idx].type = 13;
    for(int i = 0; i < 3; i++){
        dbg_print_buff[print_idx].dbg_print_values[idx].data[tid++] = uint(f[i]);
    }
}
void print(ivec4 f){
    if(atomicAdd(dbg_print_buff[print_idx].cnt, 0) >= MAX_PRINT_VALS){ return; }
    int tid = 0;
    uint idx = atomicAdd(dbg_print_buff[print_idx].cnt, 1);
    dbg_print_buff[print_idx].dbg_print_values[idx].type = 14;
    for(int i = 0; i < 4; i++){
        dbg_print_buff[print_idx].dbg_print_values[idx].data[tid++] = uint(f[i]);
    }
}


//void print_vec3(vec3 f){
//    uint idx = atomicAdd(dbg_print_buff[print_idx].cnt, 3);
//    dbg_print_buff[print_idx].values[idx] = floatBitsToUint(f.x);
//    dbg_print_buff[print_idx].values[idx+1] = floatBitsToUint(f.y);
//    dbg_print_buff[print_idx].values[idx+2] = floatBitsToUint(f.z);
//}
//void print_vec4(vec4 f){
//    uint idx = atomicAdd(dbg_print_buff[print_idx].cnt, 4);
//    dbg_print_buff[print_idx].values[idx] = floatBitsToUint(f.x);
//    dbg_print_buff[print_idx].values[idx+1] = floatBitsToUint(f.y);
//    dbg_print_buff[print_idx].values[idx+2] = floatBitsToUint(f.z);
//    dbg_print_buff[print_idx].values[idx+3] = floatBitsToUint(f.w);
//}



#define tex(t,u) texture(textures[t],u)
#define texLod(t,u,l) texture(textures[t],u,l)
#define texFetch(t,u) texelFetch(textures[t],ivec2(u),0)
#define texSz(t) textureSize(textures[t],0)

#define imgLoad(t,u) imageLoad(f_images[t],ivec2(u))
#define pi acos(-1.)
#define tau (2.*pi)
#define pmod(p,a) (mod(p,a) - 0.5 * a)
#define rot(a) mat2(cos(a),-sin(a),sin(a),cos(a))
#define aspect R.y/R.x

#define dbg_tex 0

#define splat_tex_r 1
#define splat_tex_g 2
#define splat_tex_b 3


#define SCENE 2
#define NO_INTERS -1


uint seed;
uint hashi( uint x) {
    x ^= x >> 16;
    x *= 0x7feb352dU;
    x ^= x >> 15;
    x *= 0x846ca68bU;
    x ^= x >> 16;
    return x;
}
uint wang(uint a) {
    a = (a ^ 61U) ^ (a >> 16U);
    a = a * 9U;
    a = a ^ (a >> 4);
    a = a * 0x27d4eb2dU;
    a = a ^ (a >> 15);
    return a;
}


#define hash_f_s(s)  ( float( hashi(uint(s)) ) / float( 0xffffffffU ) )
#define hash_f()  ( float( seed = hashi(seed) ) / float( 0xffffffffU ) )
#define hash_v2()  vec2(hash_f(),hash_f())
#define hash_v3()  vec3(hash_f(),hash_f(),hash_f())
#define hash_v4()  vec3(hash_f(),hash_f(),hash_f(),hash_f())

#define hash_v3_s(s)  vec3(hash_f_s(s),hash_f_s(s + 1),hash_f_s(s + 2))


vec2 sample_circ(){
    vec2 r = hash_v2();
    return vec2(sin(r.x*tau),cos(r.x*tau)) * sqrt(r.y);
}

vec2 sample_rect(vec2 w){
    vec2 r = hash_v2();
    return r * w - 0.5*w;
}

vec2 sample_rect_outline(vec2 w){
    vec2 r = hash_v2();
    vec2 offs = vec2(0);
    int sa = 0;
    int sb = 1;
    if(r.x < 0.5){
        sa = 1;
        sb = 0;
    }
    if(r.y < 0.25){
        offs[sa] = -w[sa]/2.0;
    } else {
        offs[sa] = w[sa]/2.0;
    }
    offs[sb] -= w[sb]/2.0;
    offs[sb] += w[sb]*hash_f();
    return offs;
}

vec3 hsv2rgb(vec3 c)
{
    vec4 K = vec4(1.0, 2.0 / 3.0, 1.0 / 3.0, 3.0);
    vec3 p = abs(fract(c.xxx + K.xyz) * 6.0 - K.www);
    return c.z * mix(K.xxx, clamp(p - K.xxx, 0.0, 1.0), c.y);
}

vec3 rgb2hsv(vec3 c)
{
    vec4 K = vec4(0.0, -1.0 / 3.0, 2.0 / 3.0, -1.0);
    vec4 p = mix(vec4(c.bg, K.wz), vec4(c.gb, K.xy), step(c.b, c.g));
    vec4 q = mix(vec4(p.xyw, c.r), vec4(c.r, p.yzx), step(p.x, c.r));

    float d = q.x - min(q.w, q.y);
    float e = 1.0e-10;
    return vec3(abs(q.z + (q.w - q.y) / (6.0 * d + e)), d / (q.x + e), q.x);
}

struct TraceRes {
    float dist;
    float mat_id;
    bool success;
};

struct Mat{
    vec3 albedo;
};

Mat materials[] = Mat[](
    Mat(vec3(0.55,0.525,0.5)*1.4),
    Mat(vec3(1.0,0.1,0.05)),
    Mat(vec3(0.05,0.1,1))
);

Mat get_mat(uint id){
    return materials[id];
}

//vec3 get_ball_pos(float i, float time){
//    float t = time*abs(sin(i));
//    vec3 sp = vec3(abs(sin((i + 1.)*vec3(
//    1.5,0.6,6.67
//    ))));
//    vec3 ball_pos = sin(vec3(
//    t*0.6,t*0.8,t*1.1
//    )*4.*sp)*0.2*vec3(2.,0.5,2.) + vec3(0,0.5,0);
//    return ball_pos;
//}

#define dmin(a,b,c) (a.x < b.x ? a : vec2(b,c))

//vec2 scene_c(vec3 p){
//    vec3 Q;
//    float i,d=1.,a,b=sqrt(3.);
//    Q=mod(p,b*2.)-b;
//    a=1.; d=9.;
//    for(int j=0;j++<7;){
//        Q=abs(Q);
//        d=min(d,(dot(Q,vec3(1)/b)-1.)/a);
//        Q=Q*3.-6./b;a*=3.;
//    }
//    return vec2(d,0);
//}
//vec2 scene_3(vec3 p){
//    vec3 q = p;
//    p.xz=abs(.5-mod(p.xz,1.))+.01;
//    float DEfactor=1.;
//    for (int i=0; i<14; i++) {
//        p = abs(p)-vec3(0.,2.,0.);
//        float r2 = dot(p, p);
//        float sc=2./clamp(r2,0.4,1.);
//        p*=sc;
//        DEfactor*=sc;
//        p = p - vec3(0.5,1.,0.5);
//    }
//    float d = length(p)/DEfactor-.0005;
//    d = max(d,q.y - 2.0);
//    return vec2(d,0);
//}
//
//vec2 scene_a(vec3 p){
//    //	p.xz *= rot(sin(log(length(p.xz))*0.45 + T*0.5) );
//    float time = T;
//    vec3 j = p;
//    float dr = length(j.xz);
//    p.xz = vec2(
//    atan(j.x,j.z),
//    log(length(j.xz))
//    );
//    p.y /= dr;
//
//    vec3 q = p;
//
//    q.xz = pmod(q.xz,0.5);
//    float d = length(q) - 0.18;
//
//    d = min(d,p.y);
//    d *=dr;
//
//    p = j;
//    p.y -= 2.4;
//    //	ball_id = -1.;
//    for(float i = 1.; i < 4.; i++){
//        vec3 ball_pos = get_ball_pos(i,time);
//        float db = length(p - ball_pos );
//        db -= 0.1;
//        if(db < d){
//            d = db;
//            //			ball_id = i;
//        }
//    }
//
//    return vec2(d,1);
//}
//
//float sdBox(vec3 p, vec3 s){
//    p = abs(p) - s;
//    return max(p.z,max(p.y,p.x));
//}
//vec2 scene_b(vec3 p){
//    vec2 d = vec2(10000);
//
//    float s = 3.;
//
//    d = dmin(d, sdBox(p - vec3(0,0,s), vec3(s,s,0.01)),2);
//    d = dmin(d, sdBox(p + vec3(0,0,s), vec3(s,s,0.01)),1);
//
//    d = dmin(d, sdBox(p + vec3(s,0,0), vec3(0.01,s,s)),0);
//    d = dmin(d, sdBox(p + vec3(0,s,0), vec3(s,0.01,s)),0);
//    d = dmin(d, sdBox(p + vec3(0,-s,0), vec3(s,0.01,s)),0);
//
//    d = dmin(d, sdBox(p + vec3(0,3.,0), vec3(0.2,1.5,0.5)),0);
//
//    d = dmin(d, sdBox(p + vec3(0,3.,0), vec3(0.2,1.5,0.5)),0);
//
//
//    return d;
//}
//
//vec2 scene_4(vec3 p0){
//    vec4 p = vec4(p0, 1.);
//    p.xyz=abs(p.xyz);
//    if(p.x < p.z)p.xz = p.zx;
//    if(p.z < p.y)p.zy = p.yz;
//    if(p.y < p.x)p.yx = p.xy;
//    for(int i = 0; i < 4; i++){
//        p.xyz = abs(p.xyz);
//        uint seed = uint(p.x+p.y+p.z);
//        p*=(2./clamp(dot(p.xyz,p.xyz),0.,1.));
//        p.xyz-=vec3(.6,.9,2.2);
//    }
//    float m = 1.0;
//    p.xyz-=clamp(p.xyz,-m,m);
//    p/= p.w;
//    float d = length(p.xyz);
//    return vec2(d, 0);
//}
//
//vec2 scene_5(vec3 _p){
//    vec3 p = _p;
//    p.z -= 1.4;
//    float width=.22;
//    float scale=8.;
//    float t=0.2 + T;
//    t = 0.;
//    float dotp= dot(p,p);
//    // p.x+=sin(t*40.)*.007;
//    p=p/dotp*scale;
//    p.x += t*0.4;
//    p = mod(p+ 1000,vec3(3.0) )  - 1.5;
//    // p=sin(p+vec3f(sin(1.+t)*2.,-t,-t*2.));
//    float d=length(p.yz)-width;
//    d=min(d,length(p.xz)-width);
//    d=min(d,length(p.xy)-width);
//    // d=min(d,length(p*p*p)-width*.3);
//    d = d*dotp/scale*0.7;
//    return vec2(d, 0);
//}
//
//vec2 sdf(vec3 p){
//    if(SCENE == 0){
//        return scene_a(p);
//    } else if(SCENE == 1){
//        return scene_b(p);
//    } else if(SCENE == 2){
//        return scene_c(p);
//    } else if(SCENE == 3){
//        return scene_3(p);
//    } else if(SCENE == 4){
//        return scene_4(p);
//    } else if(SCENE == 5){
//        return scene_5(p);
//    } else {
//        return vec2(0);
//    }
//}
//
//
//vec3 get_norm(vec3 p){
//    vec2 t = vec2(0.001,0);
//    return normalize(vec3(
//        sdf(p + t.xyy).x - sdf(p - t.xyy).x,
//        sdf(p + t.yxy).x - sdf(p - t.yxy).x,
//        sdf(p + t.yyx).x - sdf(p - t.yyx).x
//    ));
//}
//
//vec2 march_scene(
//    vec3 ro,
//    vec3 rd,
//    out vec3 norm
//){
//    vec2 d = vec2(NO_INTERS);
//    float t = 0.;
//    vec3 p = ro;
//    bool hit = false;
//    float max_d = 15.0;
//    for(int i = 0; i < 350 + min(max(F,0),0); i++){
//        vec2 s = sdf(p);
//        if(s.x < 0.0001){
//            hit = true;
//            d = s;
//            break;
//        } else if (t > max_d){
//            break;
//        }
//        p = ro + rd * (t += s.x);
//    }
//    if(hit){
//        d = vec2(t, d.y);
//        norm = get_norm(p);
//    }
//    return d;
//}
//
//TraceRes trace( vec3 ro, vec3 rd ) {
//    vec3 norm;
//    vec2 d = march_scene(ro, rd, norm);
//
//    TraceRes trace_res = TraceRes(
//        d.x,
//        d.y,
////        intersects(d.x)
//        d.x != NO_INTERS
//    );
//
//    return trace_res;
//}


float intersectPlane(vec3 _ro, vec3 _rd, vec3 _n) {
    vec3 ro = _ro;
    vec3 rd = _rd;
    vec3 n = normalize(_n);

    float dron = dot(ro, n);
    if(dron > 0.){
        ro -= n * dron*2.;
        rd = -rd;
    }

    float nominator = dot(ro,n);

    float denominator = dot(rd,n);

    return -nominator / denominator;
}

mat3 get_orth_mat(vec3 origin, vec3 tar) {
    vec3 dir = normalize(tar - origin);
    vec3 right = normalize(cross(vec3(0,1,0),dir));
    vec3 up = normalize(cross(dir, right));
    return mat3(right, up, dir);
}

mat3 get_orth_mat_forw_y(vec3 origin, vec3 tar){
    vec3 dir = normalize(tar - origin );
    vec3 right = normalize(cross(vec3(0,1,0),dir));
    vec3 up = normalize(cross(dir, right));
    return mat3(up, dir, right);
}


const float invPi = 1.0 / pi;

const float zenithOffset = 0.1;
const float multiScatterPhase = 0.1;
const float density = 0.7;

const float anisotropicIntensity = 0.0; //Higher numbers result in more anisotropic scattering

const vec3 skyColor = vec3(0.39, 0.57, 1.0) * (1.0 + anisotropicIntensity); //Make sure one of the conponents is never 0.0

#define smooth(x) x*x*(3.0-2.0*x)
#define zenithDensity(x) density / pow(max(x - zenithOffset, 0.35e-2), 0.75)

vec3 getSkyAbsorption(vec3 x, float y){

    vec3 absorption = x * -y;
    absorption = exp2(absorption) * 2.0;

    return absorption;
}

float getSunPoint(vec2 p, vec2 lp){
    return smoothstep(0.03, 0.026, distance(p, lp)) * 50.0;
}

float getRayleigMultiplier(vec2 p, vec2 lp){
    return 1.0 + pow(1.0 - clamp(distance(p, lp), 0.0, 1.0), 2.0) * pi * 0.5;
}

float getMie(vec2 p, vec2 lp){
    float disk = clamp(1.0 - pow(distance(p, lp), 0.1), 0.0, 1.0);

    return disk*disk*(3.0 - 2.0 * disk) * 2.0 * pi;
}

vec3 getAtmosphericScattering(vec2 p, vec2 lp){
    vec2 correctedLp = lp;

    float zenith = zenithDensity(p.y);
    float sunPointDistMult =  clamp(length(max(correctedLp.y + multiScatterPhase - zenithOffset, 0.0)), 0.0, 1.0);

    float rayleighMult = getRayleigMultiplier(p, correctedLp);

    vec3 absorption = getSkyAbsorption(skyColor, zenith);
    vec3 sunAbsorption = getSkyAbsorption(skyColor, zenithDensity(correctedLp.y + multiScatterPhase));
    vec3 sky = skyColor * zenith * rayleighMult;
    vec3 sun = getSunPoint(p, correctedLp) * absorption;
    vec3 mie = getMie(p, correctedLp) * sunAbsorption;

    vec3 totalSky = mix(sky * absorption, sky / (sky + 0.5), sunPointDistMult);
    totalSky += sun + mie;
    totalSky *= sunAbsorption * 0.5 + 0.5 * length(sunAbsorption);

    return totalSky;
}
vec2 cartesian_to_spherical(vec3 p){
    p = normalize(p);
    // r = sqrt(x*x+y*y+z*z),
    float y = p.z;
    float x = p.x;
    float z = p.y;
    float theta = atan(y,x);
    float phi = atan(sqrt(x*x+y*y),z);

    theta = theta/pi/2.0;
    // let theta = acos(x);
//    if(theta <= 0.0){
//         if(p.z <= 0.0){
//        theta = pi*0. + theta;
//    }

    return vec2(
        theta,
        phi/pi
    );
}



uniform int hdri_tex;
vec3 sample_env_map(vec3 p, bool quant, bool is_gi){
    vec2 s = cartesian_to_spherical(p);
//    vec2 s = normalize(p).xy;

//    s.y = 1.-s.y;
////    s.y -= 0.5;
//    s.x *= 2.;
//    s.y -= 0.37;
    quant = false;
    if(quant){
        float quant_d = 0.01;
        s = round(s/quant_d) * quant_d;
    }

    vec3 samp;
    if(is_gi){
        samp = textureLod(textures[hdri_tex],fract(s), 8).rgb*1.0;
    } else {
        samp = texture(textures[hdri_tex],fract(s)).rgb*1.0;
    }
//    vec3 samp = texture(textures[hdri_tex],fract(s), 4.0).rgb*5.0;
    return samp * ambient_light_intensity;
//    return vec3(0.4);
}

//vec3 sample_env_map(vec3 p){
//    vec2 s = cartesian_to_spherical(p);
//
//    s.y = 1.-s.y;
////    s.y -= 0.5;
//    s.x *= 2.;
//    s.y -= 0.37;
//
//    vec3 scatt = getAtmosphericScattering(s, vec2(0.,0.1));
//    return scatt*4.0;
////    return vec3(0.4);
//}

float IGN(vec2 p) {
    vec3 magic = vec3(0.06711056, 0.00583715, 52.9829189);
    return fract( magic.z * fract(dot(p,magic.xy)) );
}


float linearize_depth(float d, float zNear,float zFar) {
    return zNear * zFar / (zFar + d * (zNear - zFar));
}
float depth_to_linear(float depth, float near, float far) {
    depth = 2.0 * depth - 1.0;
    float zLinear = 2.0 * near * far / (far + near - depth * (far - near));
    return zLinear;
}

float depth_to_linear(float depth) {
    float near = cam_near;
    float far = cam_far;
    depth = 2.0 * depth - 1.0;
    float zLinear = 2.0 * near * far / (far + near - depth * (far - near));
    return zLinear;
}

float linear_to_depth(float depth) {
    float near = cam_near;
    float far = cam_far;
    float nonLinearDepth = (far + near - 2.0 * near * far / depth) / (far - near);
    nonLinearDepth = (nonLinearDepth + 1.0) / 2.0;
    return nonLinearDepth;
}

vec3 depth_to_world(float depth, vec2 uv, mat4 view_inv, mat4 proj_inv){
    float z = depth*2.0 - 1.0;

    vec4 clipSpacePosition = vec4( uv * 2. - 1.0, z, 1.);
//    vec4 viewSpacePosition = inverse(proj) * clipSpacePosition;
//    vec4 viewSpacePosition = inverse(P) * clipSpacePosition;
    vec4 viewSpacePosition = proj_inv * clipSpacePosition;

    viewSpacePosition /= viewSpacePosition.w;

    vec4 worldSpacePosition = view_inv * viewSpacePosition;
//    vec4 worldSpacePosition = inverse(V) * viewSpacePosition;

    return worldSpacePosition.xyz;
}

vec4 world_to_view(vec3 world, mat4 proj, mat4 view){
    vec4 aoP = vec4(world.xyz, 1);
    aoP = view * aoP;
    float z = aoP.z;
    aoP = proj * aoP;
    aoP.xyz /= aoP.w;
    aoP.xy += 1.;
    aoP.xy /= 2.;
    aoP.z = z;
    return aoP;
}
float luma(vec3 col) {
    return dot(col, vec3(0.2126729, 0.7151522, 0.0721750));
}

vec3 h33(vec3 p3) {
    p3 = fract(p3 * vec3(.1031, .1030, .0973));
    p3 += dot(p3, p3.yxz+33.33);
    return fract((p3.xxy + p3.yxx)*p3.zyx);
}

uint   packSnorm2x12(vec2 v) { uvec2 d = uvec2(round(2047.5 + v*2047.5)); return d.x|(d.y<<12u); }
uint   packSnorm2x8( vec2 v) { uvec2 d = uvec2(round( 127.5 + v* 127.5)); return d.x|(d.y<< 8u); }
vec2 unpackSnorm2x8( uint d) { return vec2(uvec2(d,d>> 8)& 255u)/ 127.5 - 1.0; }
vec2 unpackSnorm2x12(uint d) { return vec2(uvec2(d,d>>12)&4095u)/2047.5 - 1.0; }
vec2 msign( vec2 v ) {
    return vec2( (v.x>=0.0) ? 1.0 : -1.0,
    (v.y>=0.0) ? 1.0 : -1.0 );
}
uint octahedral_16( in vec3 nor ) {
    nor /= ( abs( nor.x ) + abs( nor.y ) + abs( nor.z ) );
    nor.xy = (nor.z >= 0.0) ? nor.xy : (1.0-abs(nor.yx))*msign(nor.xy);
    return packSnorm2x8(nor.xy);
}
vec3 i_octahedral_16( uint data ) { vec2 v = unpackSnorm2x8(data);
    vec3 nor = vec3(v, 1.0 - abs(v.x) - abs(v.y)); // Rune Stubbe's version,
    float t = max(-nor.z,0.0);                     // much faster than original
    nor.x += (nor.x>0.0)?-t:t;                     // implementation of this
    nor.y += (nor.y>0.0)?-t:t;                     // technique
    return normalize( nor );
}


void dbg_plot_ss(vec2 z){
    z *= R;
    if(
        all(greaterThanEqual(z.xy, vec2(0))) &&
        all(lessThan(z.xy, R))
    ){
        imageAtomicAdd(u32_images[dbg_tex], ivec2(z), (1u));
    }
}


void splat_ss(vec2 q, vec3 col){
    ivec3 col_quant = ivec3(col*1000);
    imageAtomicAdd(u32_images[splat_tex_r], ivec2(q), col_quant.r);
    imageAtomicAdd(u32_images[splat_tex_g], ivec2(q), col_quant.g);
    imageAtomicAdd(u32_images[splat_tex_b], ivec2(q), col_quant.b);
}

vec3 proj_p(vec3 z){
    z.x = -z.x;
    vec3 q = world_to_view(z, P, V).xyz;
    q.xy *= R;
    if(
        q.z < 0 &&
        all(greaterThanEqual(q.xy, vec2(0))) &&
        all(lessThan(q.xy, R))
    ){
        return vec3(q.xy, abs(q.z));
    }
    return vec3(-1);
}


void plot_ws(vec3 z, vec3 col){
    z.x = -z.x;
    vec3 q = world_to_view(z, P, V).xyz;
    q.xy *= R;
    if( q.z < 0 ){
        splat_ss(q.xy, col);
    }
}
void dbg_plot_ws(vec3 z){
    vec3 q = world_to_view(z, P, V).xyz;
//    q.xy *= R;
    if( q.z < 0 ){
        dbg_plot_ss(q.xy);
    }
}

void dbg_plot_ws_line(vec3 z, vec3 j){
    for(float i = 0.; i < 150.0; i++){
        vec3 ws = mix(z, j, (i + fract(T))/150.0);
        vec3 q = world_to_view(ws, P, V).xyz;
//        q.xy *= R;
        if( q.z < 0 ){
            dbg_plot_ss(q.xy);
        }

    }
}

void dbg_plot_ss_line(vec2 z, vec2 j){
    for(float i = 0.; i < 150.0; i++){
        vec2 q = mix(z, j, -fract((i + fract(T))/150.0));
        dbg_plot_ss(q);
    }
}


void get_ro_and_rd(out vec3 ro, out vec3 rd, vec2 uv, bool do_jitt){
    ro = cam_pos;

    uv.x *= R.x/R.y;
    uv *= tan(cam_fov/1.0);

    if(do_jitt){
        vec2 jitt = curr_jitt;
        uv += jitt*1.0;
    }


    rd = normalize(vec3(uv,1));

    rd = ((mat3(V)) * rd ).xyz;

    vec3 dir = normalize(cam_dir);
    vec3 right = normalize(cross(dir, vec3(0,1,0)));
    vec3 up = normalize(cross(right, dir));

    rd = normalize(dir + right * uv.x + up * uv.y);
//    rdsaf
}

vec2 p_ss_to_norm_uv(vec2 _p){
    vec2 p = (_p*R.xy - 0.5*R)/R.y;
    return p;
}



// --------------------- //
// --------- SH -------- //
// --------------------- //

struct SH {
    vec3 l00;

    vec3 l1m1;
    vec3 l10;
    vec3 l11;

    vec3 l2m2;
    vec3 l2m1;
    vec3 l20;
    vec3 l21;
    vec3 l22;
};
const int ORDER = 2;

// basis fn
float Y(vec3 p, int l, int m){
    if(l == 0){
        return sqrt(1./pi) * 1./2.;
    } else if(l == 1){
        if(m == -1){
            return p.y * sqrt(3./pi) * 1./2.
            //* -1.0
            ;
        } else if(m == 0){
            return p.z * sqrt(3./pi) * 1./2.;
        } else if(m == 1){
            return p.x * sqrt(3./pi) * 1./2.
            //* -1.0
            ;
        }
    } else if (l == 2){
        if(m == -2){
            return p.y * p.x * sqrt(15./pi) * 1./2.;
        } else if(m == -1){
            return p.z * p.y * sqrt(15./pi) * 1./2.
            // * -1.0
            ;
        } else if(m == 0){
            return (3. * p.z * p.z - 1.0) * sqrt(5./pi) * 1./4.;
        } else if(m == 1){
            return p.z * p.x * sqrt(15./pi) * 1./2.
            //* -1.0
            ;
        } else if(m == 2){
            return (p.x*p.x - p.y*p.y) * sqrt(15./pi) * 1./4.;
        }
    } else {
        return 0.;
    }
    return 0.0;
}


SH mul_sh(SH a, SH b){
    SH c;

    c.l00 = a.l00 * b.l00;
    c.l1m1 = a.l1m1 * b.l1m1;
    c.l10 = a.l10 * b.l10;
    c.l11 = a.l11 * b.l11;
    c.l2m2 = a.l2m2 * b.l2m2;
    c.l2m1 = a.l2m1 * b.l2m1;
    c.l20 = a.l20 * b.l20;
    c.l21 = a.l21 * b.l21;
    c.l22 = a.l22 * b.l22;

    return c;
}

void encode_sh(vec3 C, vec3 r, inout SH sh){
    //C = clamp(C,0.0,1.0);
    C = abs(C);
    //C = max(C,0.00001);
    sh.l00 += C*Y(r,0,0);

    sh.l1m1 += C*Y(r,1,-1);
    sh.l10 += C*Y(r,1,0);
    sh.l11 += C*Y(r,1,1);

    sh.l2m2 += C*Y(r,2,-2);
    sh.l2m1 += C*Y(r,2,-1);
    sh.l20 += C*Y(r,2,0);
    sh.l21 += C*Y(r,2,1);
    sh.l22 += C*Y(r,2,2);
}


SH decode_sh_coeff(vec3 r, SH sh){

//    vec3 C = vec3(0);

    vec3 a = vec3(0);
    vec3 b = vec3(0);
    vec3 c = vec3(0);
    vec3 d = vec3(0);
    vec3 e = vec3(0);
    vec3 f = vec3(0);
    vec3 g = vec3(0);
    vec3 h = vec3(0);
    vec3 i = vec3(0);

    if(ORDER >= 0){
        a += sh.l00 * Y(r,0,0);
    }

    if(ORDER >= 1){
        b += sh.l1m1 * Y(r,1,-1);
        c += sh.l10 * Y(r,1,0);
        d += sh.l11 * Y(r,1,1);
    }

    if(ORDER >= 2){
        e += sh.l2m2 * Y(r,2,-2);
        f += sh.l2m1 * Y(r,2,-1);
        g += sh.l20 * Y(r,2,0);
        h += sh.l21 * Y(r,2,1);
        i += sh.l22 * Y(r,2,2);
    }
    if(false){
        if(true){
            i *= 0.;
            h *= 0.;
            g *= 0.04;
            f *= 0.0;
            e *= -0.13;
            b *= 2.09;
            c *= 0.785;
            d *= 0.;
            a *= 3.14;
        } else {
            a *= pi;

            float aHat1 = 2.09439;
            b *= 2.*pi/3.0;
            c *= 2.*pi/3.0;
            d *= 2.*pi/3.0;

            e *= pi/4.0;
            f *= pi/4.0;
            g *= pi/4.0;
            h *= pi/4.0;
            i *= pi/4.0;
        }


    }

//    C += a + b + c + d + e + f + g + h + i;
    //C *= 2.5;
    return SH(
        a,b,c,d,e,f,g,h,i
    );
}
vec3 decode_sh(vec3 r, SH sh){

    vec3 C = vec3(0);

    vec3 a = vec3(0);
    vec3 b = vec3(0);
    vec3 c = vec3(0);
    vec3 d = vec3(0);
    vec3 e = vec3(0);
    vec3 f = vec3(0);
    vec3 g = vec3(0);
    vec3 h = vec3(0);
    vec3 i = vec3(0);

    if(ORDER >= 0){
        a += sh.l00 * Y(r,0,0);
    }

    if(ORDER >= 1){
        b += sh.l1m1 * Y(r,1,-1);
        c += sh.l10 * Y(r,1,0);
        d += sh.l11 * Y(r,1,1);
    }

    if(ORDER >= 2){
        e += sh.l2m2 * Y(r,2,-2);
        f += sh.l2m1 * Y(r,2,-1);
        g += sh.l20 * Y(r,2,0);
        h += sh.l21 * Y(r,2,1);
        i += sh.l22 * Y(r,2,2);
    }
    if(false){
        if(true){
            i *= 0.;
            h *= 0.;
            g *= 0.04;
            f *= 0.0;
            e *= -0.13;
            b *= 2.09;
            c *= 0.785;
            d *= 0.;
            a *= 3.14;
        } else {
            a *= pi;

            float aHat1 = 2.09439;
            b *= 2.*pi/3.0;
            c *= 2.*pi/3.0;
            d *= 2.*pi/3.0;

            e *= pi/4.0;
            f *= pi/4.0;
            g *= pi/4.0;
            h *= pi/4.0;
            i *= pi/4.0;
        }


    }

    C += a + b + c + d + e + f + g + h + i;
    //C *= 2.5;
    return C;
}

SH sh_cos_lobe(vec3 dir){
    SH result;
    result.l00 = vec3(0.8862269254527580137);			// L=0 , M= 0
    result.l1m1 = vec3(-1.0233267079464884885) * dir.y;	// L=1 , M=-1
    result.l10 = vec3(1.0233267079464884885) * dir.z;	// L=1 , M= 0
    result.l11 = vec3(-1.0233267079464884885) * dir.x;	// L=1 , M= 1
    result.l2m2 = vec3(0);
    result.l2m1 = vec3(0);
    result.l20 = vec3(0);
    result.l21 = vec3(0);
    result.l22 = vec3(0);
    return result;
}

vec3 calcIrradiance(vec3 nor, float sca, SH c) {
    //const SHCoefficients c = stpeter;
    const float c1 = 0.429043;
    const float c2 = 0.511664;
    const float c3 = 0.743125;
    const float c4 = 0.886227;
    const float c5 = 0.247708;

    return (
    c1 * c.l22 * (nor.x * nor.x - nor.y * nor.y) +
    c3 * c.l20 * nor.z * nor.z +
    c4 * c.l00 -
    c5 * c.l20 +
    2.0 * c1 * c.l2m2 * nor.x * nor.y +
    2.0 * c1 * c.l21  * nor.x * nor.z +
    2.0 * c1 * c.l2m1 * nor.y * nor.z +
    2.0 * c2 * c.l11  * nor.x +
    2.0 * c2 * c.l1m1 * nor.y +
    2.0 * c2 * c.l10  * nor.z
    ) * sca;
}
vec3 Evaluate(vec3 dir, SH sh) {
    const float c1 = 0.42904276540489171563379376569857;
    const float c2 = 0.51166335397324424423977581244463;
    const float c3 = 0.24770795610037568833406429782001;
    const float c4 = 0.88622692545275801364908374167057;
    dir = vec3(-1,1,1)*dir.xyz;
    vec3 _SH[9];

    _SH[0] = sh.l00;
    _SH[1] = sh.l1m1;
    _SH[2] = sh.l10;
    _SH[3] = sh.l11;
    _SH[4] = sh.l2m2;
    _SH[5] = sh.l2m1;
    _SH[6] = sh.l20;
    _SH[7] = sh.l21;
    _SH[8] = sh.l22;

    return  0.6*max(vec3(0.0),
    +  c4 * _SH[0]                                                                           //   c4  L00
    +  c2 * 2.0 * (_SH[3] * dir.x + _SH[1]* dir.y + _SH[2]* dir.z)                           // 2 c2 (L11 x + L1-1 y + L10 z)
    +  c1 * 2.0 * (_SH[4] * dir.x * dir.y + _SH[7] * dir.x * dir.z + _SH[5] * dir.y * dir.z) // 2 c1 (L2-2 xy + L21 xz + L2-1 yz)
    + (c1 * (dir.x * dir.x - dir.y * dir.y)) * _SH[8]                                        //   c1 L22 (x�-y�)
    + (c3 * (3.0 * dir.z * dir.z - 1.0)) * _SH[6]                                            //   c3 L20 (3.z� - 1)
    );
}

// 4 out, 3 in...
vec4 hash43(vec3 p) {
    p += 15.;
    vec4 p4 = fract(vec4(p.xyzx)  * vec4(.1031, .1030, .0973, .1099));
    p4 += dot(p4, p4.wzxy+33.33);
    return fract((p4.xxyz+p4.yzzw)*p4.zywx);
}



vec3 rand_sphere_dir() {
    float phi = 2.0f * pi * hash_f();
    float z = 1.0f - 2.0f * hash_f();
    float r = sqrt(max(0.0f, 1.0f - z * z));
    return vec3(r * cos(phi), z, r * sin(phi));
}

vec3 rand_cos_hemi(vec3 n){
    return normalize(n + rand_sphere_dir());
}


vec3 randHemi(vec3 n){
    vec3 v = rand_sphere_dir();
    if(dot(v,n) < 0.){
        v = -v;
    }
    return v;
}

//float henyey_g = .8; // anisotropy
float henyey_greenstein( float henyey_g){
    float g = henyey_g;
    float sqrTerm = (1.0f - g * g) /
    (1.0f - g + 2.0f * g * hash_f());
    float cosTheta = (1.0f + g * g - sqrTerm * sqrTerm) / (2.0f * g);
    return cosTheta;
}

vec3 sample_hemisphere(float th, float phi){
    return vec3(
        sin(phi*tau) * sin(th),
        cos(phi*tau) * sin(th),
        cos(th)
    );
}



void basis(in vec3 n, out vec3 f, out vec3 r) {
    if(n.z < -0.999999) {
        f = vec3(0 , -1, 0);
        r = vec3(-1, 0, 0);
    } else {
        float a = 1./(1. + n.z);
        float b = -n.x*n.y*a;
        f = vec3(1. - n.x*n.x*a, b, -n.x);
        r = vec3(b, 1. - n.y*n.y*a , -n.y);
    }
}

mat3 get_orth_basis(in vec3 n) {
    vec3 x;
    vec3 y;
    basis(n, x, y);
    return mat3(x,y,n);
}


#line -4
//#line 0
