From 57640dd806a7823319421ad74281ce243f6cd275 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Verschelde?= Date: Thu, 24 Apr 2025 17:51:03 +0200 Subject: [PATCH] embree: Update to 4.4.0 --- modules/raycast/godot_update_embree.py | 2 +- thirdparty/README.md | 3 +- .../embree/common/lexers/stringstream.cpp | 5 +- thirdparty/embree/common/math/vec2fa_sycl.h | 4 +- thirdparty/embree/common/math/vec3fa_sycl.h | 12 +- thirdparty/embree/common/sys/alloc.cpp | 196 +++------ thirdparty/embree/common/sys/alloc.h | 62 +-- thirdparty/embree/common/sys/filename.cpp | 12 - thirdparty/embree/common/sys/filename.h | 3 - thirdparty/embree/common/sys/platform.h | 7 +- thirdparty/embree/common/sys/sycl.h | 21 +- thirdparty/embree/common/sys/sysinfo.cpp | 13 + thirdparty/embree/common/sys/sysinfo.h | 2 +- thirdparty/embree/common/sys/vector.h | 4 +- .../common/tasking/taskschedulerinternal.h | 14 +- .../embree/include/embree4/rtcore_buffer.h | 20 + .../embree/include/embree4/rtcore_config.h | 30 +- .../embree/include/embree4/rtcore_device.h | 40 +- .../embree/include/embree4/rtcore_geometry.h | 12 +- .../embree/include/embree4/rtcore_ray.h | 8 +- .../embree/include/embree4/rtcore_scene.h | 129 +++++- .../embree/kernels/builders/bvh_builder_sah.h | 3 +- .../builders/heuristic_spatial_array.h | 8 +- .../kernels/bvh/bvh_builder_sah_spatial.cpp | 6 +- .../embree/kernels/bvh/bvh_statistics.cpp | 3 +- thirdparty/embree/kernels/common/alloc.h | 14 +- thirdparty/embree/kernels/common/buffer.h | 303 +++++++++---- thirdparty/embree/kernels/common/device.cpp | 144 ++++-- thirdparty/embree/kernels/common/device.h | 52 ++- thirdparty/embree/kernels/common/geometry.cpp | 2 +- thirdparty/embree/kernels/common/geometry.h | 19 +- thirdparty/embree/kernels/common/ray.h | 2 +- thirdparty/embree/kernels/common/rtcore.cpp | 414 ++++++++++++++++-- thirdparty/embree/kernels/common/scene.cpp | 90 +++- thirdparty/embree/kernels/common/scene.h | 86 +++- .../embree/kernels/common/scene_curves.h | 4 +- .../embree/kernels/common/scene_grid_mesh.h | 9 +- .../embree/kernels/common/scene_instance.h | 3 +- .../kernels/common/scene_instance_array.h | 5 +- .../kernels/common/scene_line_segments.h | 4 +- .../embree/kernels/common/scene_points.h | 4 +- .../embree/kernels/common/scene_quad_mesh.h | 4 +- .../embree/kernels/common/scene_subdiv_mesh.h | 2 +- .../kernels/common/scene_triangle_mesh.cpp | 40 +- .../kernels/common/scene_triangle_mesh.h | 30 +- .../kernels/common/scene_user_geometry.h | 14 +- thirdparty/embree/kernels/common/state.cpp | 18 +- thirdparty/embree/kernels/common/state.h | 13 +- thirdparty/embree/kernels/hash.h | 2 +- .../patches/0001-disable-exceptions.patch | 268 +++--------- .../embree/patches/0002-godot-config.patch | 8 +- .../patches/0003-emscripten-nthreads.patch | 8 +- .../0006-include-order-dllexport.patch | 20 - 53 files changed, 1448 insertions(+), 753 deletions(-) delete mode 100644 thirdparty/embree/patches/0006-include-order-dllexport.patch diff --git a/modules/raycast/godot_update_embree.py b/modules/raycast/godot_update_embree.py index c4fff330c93..b52613d8385 100644 --- a/modules/raycast/godot_update_embree.py +++ b/modules/raycast/godot_update_embree.py @@ -7,7 +7,7 @@ import subprocess import sys from typing import Any, Callable -git_tag = "v4.3.1" +git_tag = "v4.4.0" include_dirs = [ "common/tasking", diff --git a/thirdparty/README.md b/thirdparty/README.md index b55df0bab4f..23704252574 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -199,7 +199,7 @@ Files extracted from upstream source: ## embree - Upstream: https://github.com/embree/embree -- Version: 4.3.1 (daa8de0e714e18ad5e5c9841b67c1950d9c91c51, 2024) +- Version: 4.4.0 (ff9381774dc99fea81a932ad276677aad6a3d4dd, 2025) - License: Apache 2.0 Files extracted from upstream: @@ -216,7 +216,6 @@ Patches: - `0003-emscripten-nthreads.patch` (GH-69799) - `0004-mingw-no-cpuidex.patch` (GH-92488) - `0005-mingw-llvm-arm64.patch` (GH-93364) -- `0006-include-order-dllexport.patch` (GH-94256) The `modules/raycast/godot_update_embree.py` script can be used to pull the relevant files from the latest Embree release and apply patches automatically. diff --git a/thirdparty/embree/common/lexers/stringstream.cpp b/thirdparty/embree/common/lexers/stringstream.cpp index fa4266d0b9d..9779fc74c78 100644 --- a/thirdparty/embree/common/lexers/stringstream.cpp +++ b/thirdparty/embree/common/lexers/stringstream.cpp @@ -39,10 +39,7 @@ namespace embree std::vector str; str.reserve(64); while (cin->peek() != EOF && !isSeparator(cin->peek())) { int c = cin->get(); - //if (!isValidChar(c)) throw std::runtime_error("invalid character "+std::string(1,c)+" in input"); - if (!isValidChar(c)) { - abort(); - } + if (!isValidChar(c)) abort(); //throw std::runtime_error("invalid character "+std::string(1,c)+" in input"); str.push_back((char)c); } str.push_back(0); diff --git a/thirdparty/embree/common/math/vec2fa_sycl.h b/thirdparty/embree/common/math/vec2fa_sycl.h index 62d62bdd01d..9408e9cf86f 100644 --- a/thirdparty/embree/common/math/vec2fa_sycl.h +++ b/thirdparty/embree/common/math/vec2fa_sycl.h @@ -95,8 +95,8 @@ namespace embree __forceinline Vec2fa abs ( const Vec2fa& a ) { return Vec2fa(sycl::fabs(a.x),sycl::fabs(a.y)); } __forceinline Vec2fa sign ( const Vec2fa& a ) { return Vec2fa(sycl::sign(a.x),sycl::sign(a.y)); } - //__forceinline Vec2fa rcp ( const Vec2fa& a ) { return Vec2fa(sycl::recip(a.x),sycl::recip(a.y)); } - __forceinline Vec2fa rcp ( const Vec2fa& a ) { return Vec2fa(__sycl_std::__invoke_native_recip(a.x),__sycl_std::__invoke_native_recip(a.y)); } + //__forceinline Vec2fa rcp ( const Vec2fa& a ) { return Vec2fa(sycl::native::recip(a.x),sycl::native::recip(a.y)); } + __forceinline Vec2fa rcp ( const Vec2fa& a ) { return Vec2fa(sycl::native::recip(a.x),sycl::native::recip(a.y)); } __forceinline Vec2fa sqrt ( const Vec2fa& a ) { return Vec2fa(sycl::sqrt(a.x),sycl::sqrt(a.y)); } __forceinline Vec2fa sqr ( const Vec2fa& a ) { return Vec2fa(a.x*a.x,a.y*a.y); } diff --git a/thirdparty/embree/common/math/vec3fa_sycl.h b/thirdparty/embree/common/math/vec3fa_sycl.h index 5fdb00ab999..642a63052a2 100644 --- a/thirdparty/embree/common/math/vec3fa_sycl.h +++ b/thirdparty/embree/common/math/vec3fa_sycl.h @@ -92,7 +92,7 @@ namespace embree __forceinline Vec3fa sign ( const Vec3fa& a ) { return Vec3fa(sycl::sign(a.x),sycl::sign(a.y),sycl::sign(a.z)); } //__forceinline Vec3fa rcp ( const Vec3fa& a ) { return Vec3fa(sycl::recip(a.x),sycl::recip(a.y),sycl::recip(a.z)); } - __forceinline Vec3fa rcp ( const Vec3fa& a ) { return Vec3fa(__sycl_std::__invoke_native_recip(a.x),__sycl_std::__invoke_native_recip(a.y),__sycl_std::__invoke_native_recip(a.z)); } + __forceinline Vec3fa rcp ( const Vec3fa& a ) { return Vec3fa(sycl::native::recip(a.x),sycl::native::recip(a.y),sycl::native::recip(a.z)); } __forceinline Vec3fa sqrt ( const Vec3fa& a ) { return Vec3fa(sycl::sqrt(a.x),sycl::sqrt(a.y),sycl::sqrt(a.z)); } __forceinline Vec3fa sqr ( const Vec3fa& a ) { return Vec3fa(a.x*a.x,a.y*a.y,a.z*a.z); } @@ -393,7 +393,7 @@ namespace embree __forceinline Vec3fx sign ( const Vec3fx& a ) { return Vec3fx(sycl::sign(a.x),sycl::sign(a.y),sycl::sign(a.z),sycl::sign(a.z)); } //__forceinline Vec3fx rcp ( const Vec3fx& a ) { return Vec3fx(sycl::recip(a.x),sycl::recip(a.y),sycl::recip(a.z)); } - __forceinline Vec3fx rcp ( const Vec3fx& a ) { return Vec3fx(__sycl_std::__invoke_native_recip(a.x),__sycl_std::__invoke_native_recip(a.y),__sycl_std::__invoke_native_recip(a.z),__sycl_std::__invoke_native_recip(a.w)); } + __forceinline Vec3fx rcp ( const Vec3fx& a ) { return Vec3fx(sycl::native::recip(a.x),sycl::native::recip(a.y),sycl::native::recip(a.z),sycl::native::recip(a.w)); } __forceinline Vec3fx sqrt ( const Vec3fx& a ) { return Vec3fx(sycl::sqrt(a.x),sycl::sqrt(a.y),sycl::sqrt(a.z),sycl::sqrt(a.w)); } __forceinline Vec3fx sqr ( const Vec3fx& a ) { return Vec3fx(a.x*a.x,a.y*a.y,a.z*a.z,a.w*a.w); } @@ -614,4 +614,12 @@ namespace embree //__forceinline Vec3ia::Vec3ia( const Vec3fx& a ) // : x((int)a.x), y((int)a.y), z((int)a.z) {} + } + +#if __SYCL_COMPILER_VERSION >= 20210801 +namespace sycl { + template<> struct is_device_copyable : std::true_type {}; + template<> struct is_device_copyable : std::true_type {}; +} +#endif \ No newline at end of file diff --git a/thirdparty/embree/common/sys/alloc.cpp b/thirdparty/embree/common/sys/alloc.cpp index 8e836460314..2288df76ef7 100644 --- a/thirdparty/embree/common/sys/alloc.cpp +++ b/thirdparty/embree/common/sys/alloc.cpp @@ -12,69 +12,6 @@ namespace embree { - size_t total_allocations = 0; - -#if defined(EMBREE_SYCL_SUPPORT) - - __thread sycl::context* tls_context_tutorial = nullptr; - __thread sycl::device* tls_device_tutorial = nullptr; - - __thread sycl::context* tls_context_embree = nullptr; - __thread sycl::device* tls_device_embree = nullptr; - - void enableUSMAllocEmbree(sycl::context* context, sycl::device* device) - { - //if (tls_context_embree != nullptr) throw std::runtime_error("USM allocation already enabled"); - //if (tls_device_embree != nullptr) throw std::runtime_error("USM allocation already enabled"); - if (tls_context_embree != nullptr) { - abort(); - } - if (tls_device_embree != nullptr) { - abort(); - } - tls_context_embree = context; - tls_device_embree = device; - } - - void disableUSMAllocEmbree() - { - //if (tls_context_embree == nullptr) throw std::runtime_error("USM allocation not enabled"); - //if (tls_device_embree == nullptr) throw std::runtime_error("USM allocation not enabled"); - if (tls_context_embree == nullptr) { - abort(); - } - if (tls_device_embree == nullptr) { - abort(); - } - tls_context_embree = nullptr; - tls_device_embree = nullptr; - } - - void enableUSMAllocTutorial(sycl::context* context, sycl::device* device) - { - //if (tls_context_tutorial != nullptr) throw std::runtime_error("USM allocation already enabled"); - //if (tls_device_tutorial != nullptr) throw std::runtime_error("USM allocation already enabled"); - tls_context_tutorial = context; - tls_device_tutorial = device; - } - - void disableUSMAllocTutorial() - { - //if (tls_context_tutorial == nullptr) throw std::runtime_error("USM allocation not enabled"); - //if (tls_device_tutorial == nullptr) throw std::runtime_error("USM allocation not enabled"); - if (tls_context_tutorial == nullptr) { - abort(); - } - if (tls_device_tutorial == nullptr) { - abort(); - } - - tls_context_tutorial = nullptr; - tls_device_tutorial = nullptr; - } - -#endif - void* alignedMalloc(size_t size, size_t align) { if (size == 0) @@ -82,18 +19,16 @@ namespace embree assert((align & (align-1)) == 0); void* ptr = _mm_malloc(size,align); - //if (size != 0 && ptr == nullptr) - // throw std::bad_alloc(); - if (size != 0 && ptr == nullptr) { - abort(); - } + if (size != 0 && ptr == nullptr) + abort(); //throw std::bad_alloc(); return ptr; } void alignedFree(void* ptr) { - if (ptr) + if (ptr) { _mm_free(ptr); + } } #if defined(EMBREE_SYCL_SUPPORT) @@ -107,68 +42,67 @@ namespace embree return nullptr; assert((align & (align-1)) == 0); - total_allocations++; void* ptr = nullptr; - if (mode == EMBREE_USM_SHARED_DEVICE_READ_ONLY) + if (mode == EmbreeUSMMode::DEVICE_READ_ONLY) ptr = sycl::aligned_alloc_shared(align,size,*device,*context,sycl::ext::oneapi::property::usm::device_read_only()); else ptr = sycl::aligned_alloc_shared(align,size,*device,*context); - - //if (size != 0 && ptr == nullptr) - // throw std::bad_alloc(); - if (size != 0 && ptr == nullptr) { - abort(); + + if (size != 0 && ptr == nullptr) + abort(); //throw std::bad_alloc(); + + return ptr; + } + + void* alignedSYCLMalloc(sycl::context* context, sycl::device* device, size_t size, size_t align, EmbreeUSMMode mode, EmbreeMemoryType type) + { + assert(context); + assert(device); + + if (size == 0) + return nullptr; + + assert((align & (align-1)) == 0); + + void* ptr = nullptr; + if (type == EmbreeMemoryType::USM_SHARED) { + if (mode == EmbreeUSMMode::DEVICE_READ_ONLY) + ptr = sycl::aligned_alloc_shared(align,size,*device,*context,sycl::ext::oneapi::property::usm::device_read_only()); + else + ptr = sycl::aligned_alloc_shared(align,size,*device,*context); } + else if (type == EmbreeMemoryType::USM_HOST) { + ptr = sycl::aligned_alloc_host(align,size,*context); + } + else if (type == EmbreeMemoryType::USM_DEVICE) { + ptr = sycl::aligned_alloc_device(align,size,*device,*context); + } + else { + ptr = alignedMalloc(size,align); + } + + if (size != 0 && ptr == nullptr) + abort(); //throw std::bad_alloc(); return ptr; } - static MutexSys g_alloc_mutex; - - void* alignedSYCLMalloc(size_t size, size_t align, EmbreeUSMMode mode) - { - if (tls_context_tutorial) return alignedSYCLMalloc(tls_context_tutorial, tls_device_tutorial, size, align, mode); - if (tls_context_embree ) return alignedSYCLMalloc(tls_context_embree, tls_device_embree, size, align, mode); - return nullptr; - } - void alignedSYCLFree(sycl::context* context, void* ptr) { assert(context); if (ptr) { - sycl::free(ptr,*context); + sycl::usm::alloc type = sycl::get_pointer_type(ptr, *context); + if (type == sycl::usm::alloc::host || type == sycl::usm::alloc::device || type == sycl::usm::alloc::shared) + sycl::free(ptr,*context); + else { + alignedFree(ptr); + } } } - void alignedSYCLFree(void* ptr) - { - if (tls_context_tutorial) return alignedSYCLFree(tls_context_tutorial, ptr); - if (tls_context_embree ) return alignedSYCLFree(tls_context_embree, ptr); - } - #endif - void* alignedUSMMalloc(size_t size, size_t align, EmbreeUSMMode mode) - { -#if defined(EMBREE_SYCL_SUPPORT) - if (tls_context_embree || tls_context_tutorial) - return alignedSYCLMalloc(size,align,mode); - else -#endif - return alignedMalloc(size,align); - } - - void alignedUSMFree(void* ptr) - { -#if defined(EMBREE_SYCL_SUPPORT) - if (tls_context_embree || tls_context_tutorial) - return alignedSYCLFree(ptr); - else -#endif - return alignedFree(ptr); - } - static bool huge_pages_enabled = false; static MutexSys os_init_mutex; @@ -265,10 +199,7 @@ namespace embree /* fall back to 4k pages */ int flags = MEM_COMMIT | MEM_RESERVE; char* ptr = (char*) VirtualAlloc(nullptr,bytes,flags,PAGE_READWRITE); - //if (ptr == nullptr) throw std::bad_alloc(); - if (ptr == nullptr) { - abort(); - } + if (ptr == nullptr) abort(); //throw std::bad_alloc(); hugepages = false; return ptr; } @@ -284,11 +215,8 @@ namespace embree if (bytesNew >= bytesOld) return bytesOld; - //if (!VirtualFree((char*)ptr+bytesNew,bytesOld-bytesNew,MEM_DECOMMIT)) - // throw std::bad_alloc(); - if (!VirtualFree((char*)ptr+bytesNew,bytesOld-bytesNew,MEM_DECOMMIT)) { - abort(); - } + if (!VirtualFree((char*)ptr+bytesNew,bytesOld-bytesNew,MEM_DECOMMIT)) + abort(); //throw std::bad_alloc(); return bytesNew; } @@ -298,11 +226,8 @@ namespace embree if (bytes == 0) return; - //if (!VirtualFree(ptr,0,MEM_RELEASE)) - // throw std::bad_alloc(); - if (!VirtualFree(ptr,0,MEM_RELEASE)) { - abort(); - } + if (!VirtualFree(ptr,0,MEM_RELEASE)) + abort(); //throw std::bad_alloc(); } void os_advise(void *ptr, size_t bytes) @@ -406,10 +331,7 @@ namespace embree /* fallback to 4k pages */ void* ptr = (char*) mmap(0, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); - //if (ptr == MAP_FAILED) throw std::bad_alloc(); - if (ptr == MAP_FAILED) { - abort(); - } + if (ptr == MAP_FAILED) abort(); //throw std::bad_alloc(); hugepages = false; /* advise huge page hint for THP */ @@ -425,11 +347,8 @@ namespace embree if (bytesNew >= bytesOld) return bytesOld; - //if (munmap((char*)ptr+bytesNew,bytesOld-bytesNew) == -1) - // throw std::bad_alloc(); - if (munmap((char*)ptr+bytesNew,bytesOld-bytesNew) == -1) { - abort(); - } + if (munmap((char*)ptr+bytesNew,bytesOld-bytesNew) == -1) + abort(); //throw std::bad_alloc(); return bytesNew; } @@ -442,11 +361,8 @@ namespace embree /* for hugepages we need to also align the size */ const size_t pageSize = hugepages ? PAGE_SIZE_2M : PAGE_SIZE_4K; bytes = (bytes+pageSize-1) & ~(pageSize-1); - //if (munmap(ptr,bytes) == -1) - // throw std::bad_alloc(); - if (munmap(ptr,bytes) == -1) { - abort(); - } + if (munmap(ptr,bytes) == -1) + abort(); //throw std::bad_alloc(); } /* hint for transparent huge pages (THP) */ diff --git a/thirdparty/embree/common/sys/alloc.h b/thirdparty/embree/common/sys/alloc.h index e2c942049a3..1f6f230ed3e 100644 --- a/thirdparty/embree/common/sys/alloc.h +++ b/thirdparty/embree/common/sys/alloc.h @@ -9,71 +9,42 @@ namespace embree { -#if defined(EMBREE_SYCL_SUPPORT) - - /* enables SYCL USM allocation */ - void enableUSMAllocEmbree(sycl::context* context, sycl::device* device); - void enableUSMAllocTutorial(sycl::context* context, sycl::device* device); - - /* disables SYCL USM allocation */ - void disableUSMAllocEmbree(); - void disableUSMAllocTutorial(); - -#endif - #define ALIGNED_STRUCT_(align) \ void* operator new(size_t size) { return alignedMalloc(size,align); } \ void operator delete(void* ptr) { alignedFree(ptr); } \ void* operator new[](size_t size) { return alignedMalloc(size,align); } \ void operator delete[](void* ptr) { alignedFree(ptr); } -#define ALIGNED_STRUCT_USM_(align) \ - void* operator new(size_t size) { return alignedUSMMalloc(size,align); } \ - void operator delete(void* ptr) { alignedUSMFree(ptr); } \ - void* operator new[](size_t size) { return alignedUSMMalloc(size,align); } \ - void operator delete[](void* ptr) { alignedUSMFree(ptr); } - #define ALIGNED_CLASS_(align) \ public: \ ALIGNED_STRUCT_(align) \ private: -#define ALIGNED_CLASS_USM_(align) \ - public: \ - ALIGNED_STRUCT_USM_(align) \ - private: - - enum EmbreeUSMMode { - EMBREE_USM_SHARED = 0, - EMBREE_USM_SHARED_DEVICE_READ_WRITE = 0, - EMBREE_USM_SHARED_DEVICE_READ_ONLY = 1 - }; - /*! aligned allocation */ void* alignedMalloc(size_t size, size_t align); void alignedFree(void* ptr); - /*! aligned allocation using SYCL USM */ - void* alignedUSMMalloc(size_t size, size_t align = 16, EmbreeUSMMode mode = EMBREE_USM_SHARED_DEVICE_READ_ONLY); - void alignedUSMFree(void* ptr); + + enum class EmbreeUSMMode { + DEFAULT = 0, + DEVICE_READ_WRITE = 0, + DEVICE_READ_ONLY = 1 + }; + + enum class EmbreeMemoryType { + USM_HOST = 0, + USM_DEVICE = 1, + USM_SHARED = 2, + MALLOC = 3 + }; #if defined(EMBREE_SYCL_SUPPORT) - + /*! aligned allocation using SYCL USM */ void* alignedSYCLMalloc(sycl::context* context, sycl::device* device, size_t size, size_t align, EmbreeUSMMode mode); + void* alignedSYCLMalloc(sycl::context* context, sycl::device* device, size_t size, size_t align, EmbreeUSMMode mode, EmbreeMemoryType type); void alignedSYCLFree(sycl::context* context, void* ptr); - // deleter functor to use as deleter in std unique or shared pointers that - // capture raw pointers created by sycl::malloc and it's variants - template - struct sycl_deleter - { - void operator()(T const* ptr) - { - alignedUSMFree((void*)ptr); - } - }; - #endif /*! allocator that performs aligned allocations */ @@ -160,8 +131,7 @@ namespace embree typedef std::ptrdiff_t difference_type; __forceinline pointer allocate( size_type n ) { - //throw std::runtime_error("no allocation supported"); - abort(); + abort(); //throw std::runtime_error("no allocation supported"); } __forceinline void deallocate( pointer p, size_type n ) { diff --git a/thirdparty/embree/common/sys/filename.cpp b/thirdparty/embree/common/sys/filename.cpp index f55b2243029..862edfba084 100644 --- a/thirdparty/embree/common/sys/filename.cpp +++ b/thirdparty/embree/common/sys/filename.cpp @@ -35,18 +35,6 @@ namespace embree filename.resize(filename.size()-1); } - /*! returns path to home folder */ - FileName FileName::homeFolder() - { -#ifdef __WIN32__ - const char* home = getenv("UserProfile"); -#else - const char* home = getenv("HOME"); -#endif - if (home) return home; - return ""; - } - /*! returns path to executable */ FileName FileName::executableFolder() { return FileName(getExecutableFileName()).path(); diff --git a/thirdparty/embree/common/sys/filename.h b/thirdparty/embree/common/sys/filename.h index d5929cd8369..986fb87c392 100644 --- a/thirdparty/embree/common/sys/filename.h +++ b/thirdparty/embree/common/sys/filename.h @@ -20,9 +20,6 @@ namespace embree /*! create a valid filename from a string */ FileName (const std::string& filename); - - /*! returns path to home folder */ - static FileName homeFolder(); /*! returns path to executable */ static FileName executableFolder(); diff --git a/thirdparty/embree/common/sys/platform.h b/thirdparty/embree/common/sys/platform.h index 9f08cd1516b..1e5b02550ec 100644 --- a/thirdparty/embree/common/sys/platform.h +++ b/thirdparty/embree/common/sys/platform.h @@ -213,15 +213,12 @@ #define UPRINT4(x,y,z,w) embree_cout_uniform << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << ", " << STRING(z) << " = " << (z) << ", " << STRING(w) << " = " << (w) << embree_endl #if defined(DEBUG) // only report file and line in debug mode - //#define THROW_RUNTIME_ERROR(str) \ - // throw std::runtime_error(std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str)); #define THROW_RUNTIME_ERROR(str) \ printf("%s (%d): %s", __FILE__, __LINE__, std::string(str).c_str()), abort(); + //throw std::runtime_error(std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str)); #else - //#define THROW_RUNTIME_ERROR(str) \ - // throw std::runtime_error(str); #define THROW_RUNTIME_ERROR(str) \ - abort(); + abort(); //throw std::runtime_error(str); #endif #define FATAL(x) THROW_RUNTIME_ERROR(x) diff --git a/thirdparty/embree/common/sys/sycl.h b/thirdparty/embree/common/sys/sycl.h index 2558eb052f7..6bef829fc23 100644 --- a/thirdparty/embree/common/sys/sycl.h +++ b/thirdparty/embree/common/sys/sycl.h @@ -181,8 +181,15 @@ namespace embree return sycl::select(b,a,mask); } +#define XSTR(x) STR(x) +#define STR(x) #x + __forceinline const sycl::sub_group this_sub_group() { - return sycl::ext::oneapi::experimental::this_sub_group(); +#if __LIBSYCL_MAJOR_VERSION >= 8 + return sycl::ext::oneapi::this_work_item::get_sub_group(); +#else + return sycl::ext::oneapi::experimental::this_sub_group(); +#endif } __forceinline const uint32_t get_sub_group_local_id() { @@ -275,18 +282,6 @@ namespace embree return sycl::inclusive_scan_over_group(this_sub_group(),x,binary_op,init); } - template __forceinline T sub_group_shuffle(T x, sycl::id<1> local_id) { - return this_sub_group().shuffle(x, local_id); - } - - template __forceinline T sub_group_shuffle_down(T x, uint32_t delta) { - return this_sub_group().shuffle_down(x, delta); - } - - template __forceinline T sub_group_shuffle_up(T x, uint32_t delta) { - return this_sub_group().shuffle_up(x, delta); - } - template __forceinline T sub_group_load(const void* src) { return this_sub_group().load(sycl::multi_ptr((T*)src)); } diff --git a/thirdparty/embree/common/sys/sysinfo.cpp b/thirdparty/embree/common/sys/sysinfo.cpp index 4583e49b1ca..f3046d89f27 100644 --- a/thirdparty/embree/common/sys/sysinfo.cpp +++ b/thirdparty/embree/common/sys/sysinfo.cpp @@ -344,6 +344,19 @@ namespace embree if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512VL ) cpu_features |= CPU_FEATURE_AVX512VL; if (cpuid_leaf_7[ECX] & CPU_FEATURE_BIT_AVX512VBMI) cpu_features |= CPU_FEATURE_AVX512VBMI; +#if defined(__MACOSX__) + if ( (cpu_features & CPU_FEATURE_AVX512F) + || (cpu_features & CPU_FEATURE_AVX512DQ) + || (cpu_features & CPU_FEATURE_AVX512CD) + || (cpu_features & CPU_FEATURE_AVX512BW) + || (cpu_features & CPU_FEATURE_AVX512VL) ) + { + // on macOS AVX512 will be enabled automatically by the kernel when the first AVX512 instruction is called + // see https://github.com/apple/darwin-xnu/blob/0a798f6738bc1db01281fc08ae024145e84df927/osfmk/i386/fpu.c#L176 + // therefore we ignore the state of XCR0 + cpu_features |= CPU_FEATURE_ZMM_ENABLED; + } +#endif return cpu_features; #elif defined(__ARM_NEON) || defined(__EMSCRIPTEN__) diff --git a/thirdparty/embree/common/sys/sysinfo.h b/thirdparty/embree/common/sys/sysinfo.h index cefd39a0f6b..02eacbe7172 100644 --- a/thirdparty/embree/common/sys/sysinfo.h +++ b/thirdparty/embree/common/sys/sysinfo.h @@ -158,7 +158,7 @@ namespace embree static const int SSE41 = SSSE3 | CPU_FEATURE_SSE41; static const int SSE42 = SSE41 | CPU_FEATURE_SSE42 | CPU_FEATURE_POPCNT; static const int AVX = SSE42 | CPU_FEATURE_AVX | CPU_FEATURE_YMM_ENABLED; - static const int AVXI = AVX | CPU_FEATURE_F16C | CPU_FEATURE_RDRAND; + static const int AVXI = AVX | CPU_FEATURE_F16C; static const int AVX2 = AVXI | CPU_FEATURE_AVX2 | CPU_FEATURE_FMA3 | CPU_FEATURE_BMI1 | CPU_FEATURE_BMI2 | CPU_FEATURE_LZCNT; static const int AVX512 = AVX2 | CPU_FEATURE_AVX512F | CPU_FEATURE_AVX512DQ | CPU_FEATURE_AVX512CD | CPU_FEATURE_AVX512BW | CPU_FEATURE_AVX512VL | CPU_FEATURE_ZMM_ENABLED; static const int NEON = CPU_FEATURE_NEON | CPU_FEATURE_SSE | CPU_FEATURE_SSE2; diff --git a/thirdparty/embree/common/sys/vector.h b/thirdparty/embree/common/sys/vector.h index 226cd34c213..2d30d6725b6 100644 --- a/thirdparty/embree/common/sys/vector.h +++ b/thirdparty/embree/common/sys/vector.h @@ -124,8 +124,10 @@ namespace embree __forceinline T* data() { return items; }; __forceinline const T* data() const { return items; }; + + /* dangerous only use if you know what you're doing */ + __forceinline void setDataPtr(T* data) { items = data; } - /******************** Modifiers **************************/ __forceinline void push_back(const T& nt) diff --git a/thirdparty/embree/common/tasking/taskschedulerinternal.h b/thirdparty/embree/common/tasking/taskschedulerinternal.h index 4a04323b80a..d4e0c7386bf 100644 --- a/thirdparty/embree/common/tasking/taskschedulerinternal.h +++ b/thirdparty/embree/common/tasking/taskschedulerinternal.h @@ -130,11 +130,8 @@ namespace embree __forceinline void* alloc(size_t bytes, size_t align = 64) { size_t ofs = bytes + ((align - stackPtr) & (align-1)); - //if (stackPtr + ofs > CLOSURE_STACK_SIZE) - // throw std::runtime_error("closure stack overflow"); - if (stackPtr + ofs > CLOSURE_STACK_SIZE) { - abort(); - } + if (stackPtr + ofs > CLOSURE_STACK_SIZE) + abort(); //throw std::runtime_error("closure stack overflow"); stackPtr += ofs; return &stack[stackPtr-bytes]; } @@ -142,11 +139,8 @@ namespace embree template __forceinline void push_right(Thread& thread, const size_t size, const Closure& closure, TaskGroupContext* context) { - //if (right >= TASK_STACK_SIZE) - // throw std::runtime_error("task stack overflow"); - if (right >= TASK_STACK_SIZE) { - abort(); - } + if (right >= TASK_STACK_SIZE) + abort(); //throw std::runtime_error("task stack overflow"); /* allocate new task on right side of stack */ size_t oldStackPtr = stackPtr; diff --git a/thirdparty/embree/include/embree4/rtcore_buffer.h b/thirdparty/embree/include/embree4/rtcore_buffer.h index 8721ce4a441..eb7deca26c1 100644 --- a/thirdparty/embree/include/embree4/rtcore_buffer.h +++ b/thirdparty/embree/include/embree4/rtcore_buffer.h @@ -38,12 +38,32 @@ typedef struct RTCBufferTy* RTCBuffer; /* Creates a new buffer. */ RTC_API RTCBuffer rtcNewBuffer(RTCDevice device, size_t byteSize); +/* Creates a new buffer using explicit host device memory. */ +RTC_API RTCBuffer rtcNewBufferHostDevice(RTCDevice device, size_t byteSize); + /* Creates a new shared buffer. */ RTC_API RTCBuffer rtcNewSharedBuffer(RTCDevice device, void* ptr, size_t byteSize); +/* Creates a new shared buffer using explicit host device memory. */ +RTC_API RTCBuffer rtcNewSharedBufferHostDevice(RTCDevice device, void* ptr, size_t byteSize); + +/* Synchronize host and device memory by copying data from host to device. */ +RTC_API void rtcCommitBuffer(RTCBuffer buffer); + +#if defined(EMBREE_SYCL_SUPPORT) && defined(SYCL_LANGUAGE_VERSION) + +RTC_API_CPP sycl::event rtcCommitBufferWithQueue(RTCBuffer buffer, sycl::queue queue); + +#endif + /* Returns a pointer to the buffer data. */ RTC_API void* rtcGetBufferData(RTCBuffer buffer); +/* Returns a pointer to the buffer data on the device. Returns the same pointer as + rtcGetBufferData if the device is no SYCL device or if Embree is executed on a + system with unified memory (e.g., iGPUs). */ +RTC_API void* rtcGetBufferDataDevice(RTCBuffer buffer); + /* Retains the buffer (increments the reference count). */ RTC_API void rtcRetainBuffer(RTCBuffer buffer); diff --git a/thirdparty/embree/include/embree4/rtcore_config.h b/thirdparty/embree/include/embree4/rtcore_config.h index cb3a8678a70..91cf129dc65 100644 --- a/thirdparty/embree/include/embree4/rtcore_config.h +++ b/thirdparty/embree/include/embree4/rtcore_config.h @@ -8,10 +8,10 @@ #endif #define RTC_VERSION_MAJOR 4 -#define RTC_VERSION_MINOR 3 -#define RTC_VERSION_PATCH 1 -#define RTC_VERSION 40301 -#define RTC_VERSION_STRING "4.3.1" +#define RTC_VERSION_MINOR 4 +#define RTC_VERSION_PATCH 0 +#define RTC_VERSION 40400 +#define RTC_VERSION_STRING "4.4.0" #define RTC_MAX_INSTANCE_LEVEL_COUNT 1 @@ -36,6 +36,7 @@ # define RTC_NAMESPACE_END } # define RTC_NAMESPACE_USE using namespace; # define RTC_API_EXTERN_C +# define RTC_API_EXTERN_CPP # undef EMBREE_API_NAMESPACE #else # define RTC_NAMESPACE_BEGIN @@ -43,6 +44,7 @@ # define RTC_NAMESPACE_USE # if defined(__cplusplus) # define RTC_API_EXTERN_C extern "C" +# define RTC_API_EXTERN_CPP extern "C++" # else # define RTC_API_EXTERN_C # endif @@ -62,12 +64,32 @@ # define RTC_API_EXPORT RTC_API_EXTERN_C __attribute__ ((visibility ("default"))) #endif +#if defined(ISPC) +# define RTC_API_IMPORT_CPP extern "C++" unmasked +# define RTC_API_EXPORT_CPP extern "C++" unmasked +#elif defined(EMBREE_STATIC_LIB) +# define RTC_API_IMPORT_CPP RTC_API_EXTERN_CPP +# define RTC_API_EXPORT_CPP RTC_API_EXTERN_CPP +#elif defined(_WIN32) +# define RTC_API_IMPORT_CPP RTC_API_EXTERN_CPP __declspec(dllimport) +# define RTC_API_EXPORT_CPP RTC_API_EXTERN_CPP __declspec(dllexport) +#else +# define RTC_API_IMPORT_CPP RTC_API_EXTERN_CPP +# define RTC_API_EXPORT_CPP RTC_API_EXTERN_CPP __attribute__ ((visibility ("default"))) +#endif + #if defined(RTC_EXPORT_API) # define RTC_API RTC_API_EXPORT #else # define RTC_API RTC_API_IMPORT #endif +#if defined(RTC_EXPORT_API) +# define RTC_API_CPP RTC_API_EXPORT_CPP +#else +# define RTC_API_CPP RTC_API_IMPORT_CPP +#endif + #if defined(ISPC) # define RTC_SYCL_INDIRECTLY_CALLABLE #elif defined(__SYCL_DEVICE_ONLY__) diff --git a/thirdparty/embree/include/embree4/rtcore_device.h b/thirdparty/embree/include/embree4/rtcore_device.h index 5ca99bca0a7..9762f43e4ac 100644 --- a/thirdparty/embree/include/embree4/rtcore_device.h +++ b/thirdparty/embree/include/embree4/rtcore_device.h @@ -9,14 +9,18 @@ RTC_NAMESPACE_BEGIN /* Opaque device type */ typedef struct RTCDeviceTy* RTCDevice; +typedef struct RTCSceneTy* RTCScene; /* Creates a new Embree device. */ RTC_API RTCDevice rtcNewDevice(const char* config); #if defined(EMBREE_SYCL_SUPPORT) && defined(SYCL_LANGUAGE_VERSION) - -/* Creates a new Embree SYCL device. */ +/* + Creates a new Embree SYCL device. It will internally select the first SYCL device of + the SYCL context as the default device for memory allocations. You can set a specific + SYCL device that's part of the SYCL context by calling rtcSetDeviceSYCLDevice. +*/ RTC_API_EXTERN_C RTCDevice rtcNewSYCLDevice(sycl::context context, const char* config); /* Checks if SYCL device is supported by Embree. */ @@ -28,6 +32,10 @@ RTC_API int rtcSYCLDeviceSelector(const sycl::device sycl_device); /* Set the SYCL device to be used to allocate data */ RTC_API void rtcSetDeviceSYCLDevice(RTCDevice device, const sycl::device sycl_device); +/* rtcCommitSceneWithQueue is asynchronous, user has to call queue.wait() + for synchronization. rtcCommitScene is blocking. */ +RTC_API_CPP sycl::event rtcCommitSceneWithQueue(RTCScene scene, sycl::queue queue); + #endif @@ -66,7 +74,10 @@ enum RTCDeviceProperty RTC_DEVICE_PROPERTY_TASKING_SYSTEM = 128, RTC_DEVICE_PROPERTY_JOIN_COMMIT_SUPPORTED = 129, - RTC_DEVICE_PROPERTY_PARALLEL_COMMIT_SUPPORTED = 130 + RTC_DEVICE_PROPERTY_PARALLEL_COMMIT_SUPPORTED = 130, + + RTC_DEVICE_PROPERTY_CPU_DEVICE = 140, + RTC_DEVICE_PROPERTY_SYCL_DEVICE = 141 }; /* Gets a device property. */ @@ -78,18 +89,27 @@ RTC_API void rtcSetDeviceProperty(RTCDevice device, const enum RTCDeviceProperty /* Error codes */ enum RTCError { - RTC_ERROR_NONE = 0, - RTC_ERROR_UNKNOWN = 1, - RTC_ERROR_INVALID_ARGUMENT = 2, - RTC_ERROR_INVALID_OPERATION = 3, - RTC_ERROR_OUT_OF_MEMORY = 4, - RTC_ERROR_UNSUPPORTED_CPU = 5, - RTC_ERROR_CANCELLED = 6, + RTC_ERROR_NONE = 0, + RTC_ERROR_UNKNOWN = 1, + RTC_ERROR_INVALID_ARGUMENT = 2, + RTC_ERROR_INVALID_OPERATION = 3, + RTC_ERROR_OUT_OF_MEMORY = 4, + RTC_ERROR_UNSUPPORTED_CPU = 5, + RTC_ERROR_CANCELLED = 6, + RTC_ERROR_LEVEL_ZERO_RAYTRACING_SUPPORT_MISSING = 7, }; +/* Returns the string representation for the error code. For example, for RTC_ERROR_UNKNOWN the string "RTC_ERROR_UNKNOWN" will be returned. */ +RTC_API const char* rtcGetErrorString(enum RTCError error); + /* Returns the error code. */ RTC_API enum RTCError rtcGetDeviceError(RTCDevice device); +/* Returns a message corresponding to the last error code (returned by rtcGetDeviceError) which provides details about the error that happened. + The same message will be written to console when verbosity is > 0 or when an error callback function is set for the device. + However, when device creation itself fails this is the only way to get additional information about the error. */ +RTC_API const char* rtcGetDeviceLastErrorMessage(RTCDevice device); + /* Error callback function */ typedef void (*RTCErrorFunction)(void* userPtr, enum RTCError code, const char* str); diff --git a/thirdparty/embree/include/embree4/rtcore_geometry.h b/thirdparty/embree/include/embree4/rtcore_geometry.h index 140d6721d5c..aa75708508a 100644 --- a/thirdparty/embree/include/embree4/rtcore_geometry.h +++ b/thirdparty/embree/include/embree4/rtcore_geometry.h @@ -171,16 +171,26 @@ RTC_API void rtcSetGeometryBuffer(RTCGeometry geometry, enum RTCBufferType type, /* Sets a shared geometry buffer. */ RTC_API void rtcSetSharedGeometryBuffer(RTCGeometry geometry, enum RTCBufferType type, unsigned int slot, enum RTCFormat format, const void* ptr, size_t byteOffset, size_t byteStride, size_t itemCount); +/* Sets a shared host/device geometry buffer pair. */ +RTC_API void rtcSetSharedGeometryBufferHostDevice(RTCGeometry geometry, enum RTCBufferType bufferType, unsigned int slot, enum RTCFormat format, const void* ptr, const void* dptr, size_t byteOffset, size_t byteStride, size_t itemCount); + /* Creates and sets a new geometry buffer. */ RTC_API void* rtcSetNewGeometryBuffer(RTCGeometry geometry, enum RTCBufferType type, unsigned int slot, enum RTCFormat format, size_t byteStride, size_t itemCount); +/* Creates and sets a new host/device geometry buffer pair. */ +RTC_API void rtcSetNewGeometryBufferHostDevice(RTCGeometry geometry, enum RTCBufferType bufferType, unsigned int slot, enum RTCFormat format, size_t byteStride, size_t itemCount, void** ptr, void** dptr); + /* Returns the pointer to the data of a buffer. */ RTC_API void* rtcGetGeometryBufferData(RTCGeometry geometry, enum RTCBufferType type, unsigned int slot); +/* Returns a pointer to the buffer data on the device. Returns the same pointer as + rtcGetGeometryBufferData if the device is no SYCL device or if Embree is executed on a + system with unified memory (e.g., iGPUs). */ +RTC_API void* rtcGetGeometryBufferDataDevice(RTCGeometry geometry, enum RTCBufferType type, unsigned int slot); + /* Updates a geometry buffer. */ RTC_API void rtcUpdateGeometryBuffer(RTCGeometry geometry, enum RTCBufferType type, unsigned int slot); - /* Sets the intersection filter callback function of the geometry. */ RTC_API void rtcSetGeometryIntersectFilterFunction(RTCGeometry geometry, RTCFilterFunctionN filter); diff --git a/thirdparty/embree/include/embree4/rtcore_ray.h b/thirdparty/embree/include/embree4/rtcore_ray.h index 7fc3f00cf94..73e95632aa3 100644 --- a/thirdparty/embree/include/embree4/rtcore_ray.h +++ b/thirdparty/embree/include/embree4/rtcore_ray.h @@ -225,8 +225,8 @@ RTC_FORCEINLINE RTCRayN* RTCRayHitN_RayN(RTCRayHitN* rayhit, unsigned int N) { r RTC_FORCEINLINE RTCHitN* RTCRayHitN_HitN(RTCRayHitN* rayhit, unsigned int N) { return (RTCHitN*)&((float*)rayhit)[12*N]; } /* Helper structure for a ray packet of compile-time size N */ -template -struct RTCRayNt +template +struct RTC_ALIGN((N && !(N & (N - 1)) ? (N * 4 > 16 ? N * 4 : 16) : 16)) RTCRayNt { float org_x[N]; float org_y[N]; @@ -245,8 +245,8 @@ struct RTCRayNt }; /* Helper structure for a hit packet of compile-time size N */ -template -struct RTCHitNt +template +struct RTC_ALIGN((N && !(N & (N - 1)) ? (N * 4 > 16 ? N * 4 : 16) : 16)) RTCHitNt { float Ng_x[N]; float Ng_y[N]; diff --git a/thirdparty/embree/include/embree4/rtcore_scene.h b/thirdparty/embree/include/embree4/rtcore_scene.h index e37af4f9622..74f7ccff2bb 100644 --- a/thirdparty/embree/include/embree4/rtcore_scene.h +++ b/thirdparty/embree/include/embree4/rtcore_scene.h @@ -6,7 +6,10 @@ #include "rtcore_device.h" RTC_NAMESPACE_BEGIN - + +/* Opaque traversable type */ +typedef struct RTCTraversableTy* RTCTraversable; + /* Forward declarations for ray structures */ struct RTCRayHit; struct RTCRayHit4; @@ -16,11 +19,12 @@ struct RTCRayHit16; /* Scene flags */ enum RTCSceneFlags { - RTC_SCENE_FLAG_NONE = 0, - RTC_SCENE_FLAG_DYNAMIC = (1 << 0), - RTC_SCENE_FLAG_COMPACT = (1 << 1), - RTC_SCENE_FLAG_ROBUST = (1 << 2), - RTC_SCENE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS = (1 << 3) + RTC_SCENE_FLAG_NONE = 0, + RTC_SCENE_FLAG_DYNAMIC = (1 << 0), + RTC_SCENE_FLAG_COMPACT = (1 << 1), + RTC_SCENE_FLAG_ROBUST = (1 << 2), + RTC_SCENE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS = (1 << 3), + RTC_SCENE_FLAG_PREFETCH_USM_SHARED_ON_GPU = (1 << 4), }; /* Additional arguments for rtcIntersect1/4/8/16 calls */ @@ -91,6 +95,8 @@ RTC_API void rtcRetainScene(RTCScene scene); /* Releases the scene (decrements the reference count). */ RTC_API void rtcReleaseScene(RTCScene scene); +/* Returns the traversable object of the scene which can be passed to ray queries. */ +RTC_API RTCTraversable rtcGetSceneTraversable(RTCScene scene); /* Attaches the geometry to a scene. */ RTC_API unsigned int rtcAttachGeometry(RTCScene scene, RTCGeometry geometry); @@ -107,12 +113,6 @@ RTC_API RTCGeometry rtcGetGeometry(RTCScene scene, unsigned int geomID); /* Gets a geometry handle from the scene. This function is thread safe and should NOT get used during rendering. */ RTC_API RTCGeometry rtcGetGeometryThreadSafe(RTCScene scene, unsigned int geomID); -/* Gets the user-defined data pointer of the geometry. This function is not thread safe and should get used during rendering. */ -RTC_SYCL_API void* rtcGetGeometryUserDataFromScene(RTCScene scene, unsigned int geomID); - -/* Returns the interpolated transformation of an instance for the specified time. */ -RTC_SYCL_API void rtcGetGeometryTransformFromScene(RTCScene scene, unsigned int geomID, float time, enum RTCFormat format, void* xfm); - /* Commits the scene. */ RTC_API void rtcCommitScene(RTCScene scene); @@ -142,6 +142,13 @@ RTC_API void rtcGetSceneBounds(RTCScene scene, struct RTCBounds* bounds_o); /* Returns the linear axis-aligned bounds of the scene. */ RTC_API void rtcGetSceneLinearBounds(RTCScene scene, struct RTCLinearBounds* bounds_o); +#if !defined(__SYCL_DEVICE_ONLY__) + +/* Gets the user-defined data pointer of the geometry. This function is not thread safe and should get used during rendering. */ +RTC_SYCL_API void* rtcGetGeometryUserDataFromScene(RTCScene scene, unsigned int geomID); + +/* Returns the interpolated transformation of an instance for the specified time. */ +RTC_SYCL_API void rtcGetGeometryTransformFromScene(RTCScene scene, unsigned int geomID, float time, enum RTCFormat format, void* xfm); /* Perform a closest point query of the scene. */ RTC_API bool rtcPointQuery(RTCScene scene, struct RTCPointQuery* query, struct RTCPointQueryContext* context, RTCPointQueryFunction queryFunc, void* userPtr); @@ -231,6 +238,102 @@ RTC_API void rtcForwardOccluded16(const int* valid, const struct RTCOccludedFunc /* Forwards occlusion ray packet of size 16 inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */ RTC_API void rtcForwardOccluded16Ex(const int* valid, const struct RTCOccludedFunctionNArguments* args, RTCScene scene, struct RTCRay16* ray, unsigned int instID, unsigned int instPrimID); +#endif + +/* Gets the user-defined data pointer of the geometry. This function is not thread safe and should get used during rendering. */ +RTC_SYCL_API void* rtcGetGeometryUserDataFromTraversable(RTCTraversable traversable, unsigned int geomID); + +/* Returns the interpolated transformation of an instance for the specified time. */ +RTC_SYCL_API void rtcGetGeometryTransformFromTraversable(RTCTraversable traversable, unsigned int geomID, float time, enum RTCFormat format, void* xfm); + +/* Perform a closest point query of the scene. */ +RTC_API bool rtcTraversablePointQuery(RTCTraversable traversable, struct RTCPointQuery* query, struct RTCPointQueryContext* context, RTCPointQueryFunction queryFunc, void* userPtr); + +/* Perform a closest point query with a packet of 4 points with the scene. */ +RTC_API bool rtcTraversablePointQuery4(const int* valid, RTCTraversable traversable, struct RTCPointQuery4* query, struct RTCPointQueryContext* context, RTCPointQueryFunction queryFunc, void** userPtr); + +/* Perform a closest point query with a packet of 4 points with the scene. */ +RTC_API bool rtcTraversablePointQuery8(const int* valid, RTCTraversable traversable, struct RTCPointQuery8* query, struct RTCPointQueryContext* context, RTCPointQueryFunction queryFunc, void** userPtr); + +/* Perform a closest point query with a packet of 4 points with the scene. */ +RTC_API bool rtcTraversablePointQuery16(const int* valid, RTCTraversable traversable, struct RTCPointQuery16* query, struct RTCPointQueryContext* context, RTCPointQueryFunction queryFunc, void** userPtr); + + +/* Intersects a single ray with the scene. */ +RTC_SYCL_API void rtcTraversableIntersect1(RTCTraversable traversable, struct RTCRayHit* rayhit, struct RTCIntersectArguments* args RTC_OPTIONAL_ARGUMENT); + +/* Intersects a packet of 4 rays with the scene. */ +RTC_API void rtcTraversableIntersect4(const int* valid, RTCTraversable traversable, struct RTCRayHit4* rayhit, struct RTCIntersectArguments* args RTC_OPTIONAL_ARGUMENT); + +/* Intersects a packet of 8 rays with the scene. */ +RTC_API void rtcTraversableIntersect8(const int* valid, RTCTraversable traversable, struct RTCRayHit8* rayhit, struct RTCIntersectArguments* args RTC_OPTIONAL_ARGUMENT); + +/* Intersects a packet of 16 rays with the scene. */ +RTC_API void rtcTraversableIntersect16(const int* valid, RTCTraversable traversable, struct RTCRayHit16* rayhit, struct RTCIntersectArguments* args RTC_OPTIONAL_ARGUMENT); + + +/* Forwards ray inside user geometry callback. */ +RTC_SYCL_API void rtcTraversableForwardIntersect1(const struct RTCIntersectFunctionNArguments* args, RTCTraversable traversable, struct RTCRay* ray, unsigned int instID); + +/* Forwards ray inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */ +RTC_SYCL_API void rtcTraversableForwardIntersect1Ex(const struct RTCIntersectFunctionNArguments* args, RTCTraversable traversable, struct RTCRay* ray, unsigned int instID, unsigned int instPrimID); + +/* Forwards ray packet of size 4 inside user geometry callback. */ +RTC_API void rtcTraversableForwardIntersect4(const int* valid, const struct RTCIntersectFunctionNArguments* args, RTCTraversable traversable, struct RTCRay4* ray, unsigned int instID); + +/* Forwards ray packet of size 4 inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */ +RTC_API void rtcTraversableForwardIntersect4Ex(const int* valid, const struct RTCIntersectFunctionNArguments* args, RTCTraversable traversable, struct RTCRay4* ray, unsigned int instID, unsigned int primInstID); + +/* Forwards ray packet of size 8 inside user geometry callback. */ +RTC_API void rtcTraversableForwardIntersect8(const int* valid, const struct RTCIntersectFunctionNArguments* args, RTCTraversable traversable, struct RTCRay8* ray, unsigned int instID); + +/* Forwards ray packet of size 4 inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */ +RTC_API void rtcTraversableForwardIntersect8Ex(const int* valid, const struct RTCIntersectFunctionNArguments* args, RTCTraversable traversable, struct RTCRay8* ray, unsigned int instID, unsigned int primInstID); + +/* Forwards ray packet of size 16 inside user geometry callback. */ +RTC_API void rtcTraversableForwardIntersect16(const int* valid, const struct RTCIntersectFunctionNArguments* args, RTCTraversable traversable, struct RTCRay16* ray, unsigned int instID); + +/* Forwards ray packet of size 4 inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */ +RTC_API void rtcTraversableForwardIntersect16Ex(const int* valid, const struct RTCIntersectFunctionNArguments* args, RTCTraversable traversable, struct RTCRay16* ray, unsigned int instID, unsigned int primInstID); + + +/* Tests a single ray for occlusion with the scene. */ +RTC_SYCL_API void rtcTraversableOccluded1(RTCTraversable traversable, struct RTCRay* ray, struct RTCOccludedArguments* args RTC_OPTIONAL_ARGUMENT); + +/* Tests a packet of 4 rays for occlusion occluded with the scene. */ +RTC_API void rtcTraversableOccluded4(const int* valid, RTCTraversable traversable, struct RTCRay4* ray, struct RTCOccludedArguments* args RTC_OPTIONAL_ARGUMENT); + +/* Tests a packet of 8 rays for occlusion with the scene. */ +RTC_API void rtcTraversableOccluded8(const int* valid, RTCTraversable traversable, struct RTCRay8* ray, struct RTCOccludedArguments* args RTC_OPTIONAL_ARGUMENT); + +/* Tests a packet of 16 rays for occlusion with the scene. */ +RTC_API void rtcTraversableOccluded16(const int* valid, RTCTraversable traversable, struct RTCRay16* ray, struct RTCOccludedArguments* args RTC_OPTIONAL_ARGUMENT); + + +/* Forwards single occlusion ray inside user geometry callback. */ +RTC_SYCL_API void rtcTraversableForwardOccluded1(const struct RTCOccludedFunctionNArguments* args, RTCTraversable traversable, struct RTCRay* ray, unsigned int instID); + +/* Forwards single occlusion ray inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */ +RTC_SYCL_API void rtcTraversableForwardOccluded1Ex(const struct RTCOccludedFunctionNArguments* args, RTCTraversable traversable, struct RTCRay* ray, unsigned int instID, unsigned int instPrimID); + +/* Forwards occlusion ray packet of size 4 inside user geometry callback. */ +RTC_API void rtcTraversableForwardOccluded4(const int* valid, const struct RTCOccludedFunctionNArguments* args, RTCTraversable traversable, struct RTCRay4* ray, unsigned int instID); + +/* Forwards occlusion ray packet of size 4 inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */ +RTC_API void rtcTraversableForwardOccluded4Ex(const int* valid, const struct RTCOccludedFunctionNArguments* args, RTCTraversable traversable, struct RTCRay4* ray, unsigned int instID, unsigned int instPrimID); + +/* Forwards occlusion ray packet of size 8 inside user geometry callback. */ +RTC_API void rtcTraversableForwardOccluded8(const int* valid, const struct RTCOccludedFunctionNArguments* args, RTCTraversable traversable, struct RTCRay8* ray, unsigned int instID); + +/* Forwards occlusion ray packet of size 8 inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */ +RTC_API void rtcTraversableForwardOccluded8Ex(const int* valid, const struct RTCOccludedFunctionNArguments* args, RTCTraversable traversable, struct RTCRay8* ray, unsigned int instID, unsigned int instPrimID); + +/* Forwards occlusion ray packet of size 16 inside user geometry callback. */ +RTC_API void rtcTraversableForwardOccluded16(const int* valid, const struct RTCOccludedFunctionNArguments* args, RTCTraversable traversable, struct RTCRay16* ray, unsigned int instID); + +/* Forwards occlusion ray packet of size 16 inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */ +RTC_API void rtcTraversableForwardOccluded16Ex(const int* valid, const struct RTCOccludedFunctionNArguments* args, RTCTraversable traversable, struct RTCRay16* ray, unsigned int instID, unsigned int instPrimID); + /*! collision callback */ struct RTCCollision { unsigned int geomID0; unsigned int primID0; unsigned int geomID1; unsigned int primID1; }; @@ -238,7 +341,7 @@ typedef void (*RTCCollideFunc) (void* userPtr, struct RTCCollision* collisions, /*! Performs collision detection of two scenes */ RTC_API void rtcCollide (RTCScene scene0, RTCScene scene1, RTCCollideFunc callback, void* userPtr); - + #if defined(__cplusplus) /* Helper for easily combining scene flags */ diff --git a/thirdparty/embree/kernels/builders/bvh_builder_sah.h b/thirdparty/embree/kernels/builders/bvh_builder_sah.h index 7a0c0bdb8ec..034e067d1a7 100644 --- a/thirdparty/embree/kernels/builders/bvh_builder_sah.h +++ b/thirdparty/embree/kernels/builders/bvh_builder_sah.h @@ -48,10 +48,9 @@ namespace embree } Settings (size_t sahBlockSize, size_t minLeafSize, size_t maxLeafSize, float travCost, float intCost, size_t singleThreadThreshold, size_t primrefarrayalloc = inf) - : branchingFactor(2), maxDepth(32), logBlockSize(bsr(sahBlockSize)), minLeafSize(minLeafSize), maxLeafSize(maxLeafSize), + : branchingFactor(2), maxDepth(32), logBlockSize(bsr(sahBlockSize)), minLeafSize(min(minLeafSize,maxLeafSize)), maxLeafSize(maxLeafSize), travCost(travCost), intCost(intCost), singleThreadThreshold(singleThreadThreshold), primrefarrayalloc(primrefarrayalloc) { - minLeafSize = min(minLeafSize,maxLeafSize); } public: diff --git a/thirdparty/embree/kernels/builders/heuristic_spatial_array.h b/thirdparty/embree/kernels/builders/heuristic_spatial_array.h index 2584c19bda1..161595ec791 100644 --- a/thirdparty/embree/kernels/builders/heuristic_spatial_array.h +++ b/thirdparty/embree/kernels/builders/heuristic_spatial_array.h @@ -283,11 +283,9 @@ namespace embree if (likely(splits <= 1)) continue; /* todo: does this ever happen ? */ - //int bin0 = split.mapping.bin(prims0[i].lower)[split.dim]; - //int bin1 = split.mapping.bin(prims0[i].upper)[split.dim]; - //if (unlikely(bin0 < split.pos && bin1 >= split.pos)) - - if (unlikely(prims0[i].lower[split.dim] < fpos && prims0[i].upper[split.dim] > fpos)) + const int bin0 = split.mapping.bin(prims0[i].lower)[split.dim]; + const int bin1 = split.mapping.bin(prims0[i].upper)[split.dim]; + if (unlikely(bin0 < split.pos && bin1 >= split.pos)) { assert(splits > 1); diff --git a/thirdparty/embree/kernels/bvh/bvh_builder_sah_spatial.cpp b/thirdparty/embree/kernels/bvh/bvh_builder_sah_spatial.cpp index a4e55d74842..1621993d287 100644 --- a/thirdparty/embree/kernels/bvh/bvh_builder_sah_spatial.cpp +++ b/thirdparty/embree/kernels/bvh/bvh_builder_sah_spatial.cpp @@ -179,9 +179,9 @@ namespace embree #if defined(EMBREE_GEOMETRY_TRIANGLE) - Builder* BVH4Triangle4SceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,inf,mode); } - Builder* BVH4Triangle4vSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4v,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,inf,mode); } - Builder* BVH4Triangle4iSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4i,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,inf,mode); } + Builder* BVH4Triangle4SceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,scene->device->max_triangles_per_leaf,mode); } + Builder* BVH4Triangle4vSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4v,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,scene->device->max_triangles_per_leaf,mode); } + Builder* BVH4Triangle4iSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4i,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,scene->device->max_triangles_per_leaf,mode); } #if defined(__AVX__) Builder* BVH8Triangle4SceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<8,TriangleMesh,Triangle4,TriangleSplitterFactory>((BVH8*)bvh,scene,4,1.0f,4,inf,mode); } diff --git a/thirdparty/embree/kernels/bvh/bvh_statistics.cpp b/thirdparty/embree/kernels/bvh/bvh_statistics.cpp index 7ea9736c5c8..f3b93e59251 100644 --- a/thirdparty/embree/kernels/bvh/bvh_statistics.cpp +++ b/thirdparty/embree/kernels/bvh/bvh_statistics.cpp @@ -150,8 +150,7 @@ namespace embree } } else { - //throw std::runtime_error("not supported node type in bvh_statistics"); - abort(); + abort(); //throw std::runtime_error("not supported node type in bvh_statistics"); } return s; } diff --git a/thirdparty/embree/kernels/common/alloc.h b/thirdparty/embree/kernels/common/alloc.h index 8ac22e53ec7..10f629a244d 100644 --- a/thirdparty/embree/kernels/common/alloc.h +++ b/thirdparty/embree/kernels/common/alloc.h @@ -189,11 +189,8 @@ namespace embree , atype(osAllocation ? EMBREE_OS_MALLOC : ALIGNED_MALLOC) , primrefarray(device,0) { - //if (osAllocation && useUSM) - // throw std::runtime_error("USM allocation cannot be combined with OS allocation."); - if (osAllocation && useUSM) { - abort(); - } + if (osAllocation && useUSM) + abort(); //throw std::runtime_error("USM allocation cannot be combined with OS allocation."); for (size_t i=0; imalloc(device,bytes,align,partial); - //if (ptr == nullptr && !blockAllocation) - // throw std::bad_alloc(); - if (ptr == nullptr && !blockAllocation) { - abort(); - } + if (ptr == nullptr && !blockAllocation) + abort(); //throw std::bad_alloc(); if (ptr) return ptr; } diff --git a/thirdparty/embree/kernels/common/buffer.h b/thirdparty/embree/kernels/common/buffer.h index 831f5815e8a..2306757f295 100644 --- a/thirdparty/embree/kernels/common/buffer.h +++ b/thirdparty/embree/kernels/common/buffer.h @@ -8,120 +8,232 @@ namespace embree { + enum class BufferDataPointerType { + HOST = 0, + DEVICE = 1, + UNKNOWN = 2 + }; + /*! Implements an API data buffer object. This class may or may not own the data. */ class Buffer : public RefCount { - public: - /*! Buffer construction */ - //Buffer() - //: device(nullptr), ptr(nullptr), numBytes(0), shared(false) {} - - /*! Buffer construction */ - Buffer(Device* device, size_t numBytes_in, void* ptr_in = nullptr) - : device(device), numBytes(numBytes_in) + private: + char* alloc(void* ptr_in, bool &shared, EmbreeMemoryType memoryType) { - device->refInc(); - if (ptr_in) { shared = true; - ptr = (char*)ptr_in; + return (char*)ptr_in; } else { shared = false; - alloc(); + device->memoryMonitor(this->bytes(), false); + size_t b = (this->bytes()+15) & ssize_t(-16); + return (char*)device->malloc(b,16,memoryType); } } - + + public: + Buffer(Device* device, size_t numBytes_in, void* ptr_in) + : device(device), numBytes(numBytes_in) + { + device->refInc(); + + ptr = alloc(ptr_in, shared, EmbreeMemoryType::USM_SHARED); +#if defined(EMBREE_SYCL_SUPPORT) + dshared = true; + dptr = ptr; + modified = true; +#endif + } + + Buffer(Device* device, size_t numBytes_in, void* ptr_in, void* dptr_in) + : device(device), numBytes(numBytes_in) + { + device->refInc(); + +#if defined(EMBREE_SYCL_SUPPORT) + modified = true; + if (device->is_gpu() && !device->has_unified_memory()) + { + ptr = alloc( ptr_in, shared, EmbreeMemoryType::MALLOC); + dptr = alloc(dptr_in, dshared, EmbreeMemoryType::USM_DEVICE); + } + else if (device->is_gpu() && device->has_unified_memory()) + { + ptr = alloc(ptr_in, shared, EmbreeMemoryType::USM_SHARED); + + if (device->get_memory_type(ptr) != EmbreeMemoryType::USM_SHARED) + { + dptr = alloc(dptr_in, dshared, EmbreeMemoryType::USM_DEVICE); + } + else + { + dshared = true; + dptr = ptr; + } + } + else +#endif + { + ptr = alloc(ptr_in, shared, EmbreeMemoryType::MALLOC); +#if defined(EMBREE_SYCL_SUPPORT) + dshared = true; + dptr = ptr; +#endif + } + } + /*! Buffer destruction */ - ~Buffer() { + virtual ~Buffer() { free(); device->refDec(); } - + /*! this class is not copyable */ private: Buffer(const Buffer& other) DELETED; // do not implement Buffer& operator =(const Buffer& other) DELETED; // do not implement - + public: - /* inits and allocates the buffer */ - void create(Device* device_in, size_t numBytes_in) + + /*! frees the buffer */ + virtual void free() { - init(device_in, numBytes_in); - alloc(); - } - - /* inits the buffer */ - void init(Device* device_in, size_t numBytes_in) - { - free(); - device = device_in; - ptr = nullptr; - numBytes = numBytes_in; - shared = false; + if (!shared && ptr) { +#if defined(EMBREE_SYCL_SUPPORT) + if (dptr == ptr) { + dptr = nullptr; + } +#endif + device->free(ptr); + device->memoryMonitor(-ssize_t(this->bytes()), true); + ptr = nullptr; + } +#if defined(EMBREE_SYCL_SUPPORT) + if (!dshared && dptr) { + device->free(dptr); + device->memoryMonitor(-ssize_t(this->bytes()), true); + dptr = nullptr; + } +#endif } - /*! sets shared buffer */ - void set(Device* device_in, void* ptr_in, size_t numBytes_in) - { - free(); - device = device_in; - ptr = (char*)ptr_in; - if (numBytes_in != (size_t)-1) - numBytes = numBytes_in; - shared = true; - } - - /*! allocated buffer */ - void alloc() - { - device->memoryMonitor(this->bytes(), false); - size_t b = (this->bytes()+15) & ssize_t(-16); - ptr = (char*)device->malloc(b,16); - } - - /*! frees the buffer */ - void free() - { - if (shared) return; - device->free(ptr); - device->memoryMonitor(-ssize_t(this->bytes()), true); - ptr = nullptr; - } - /*! gets buffer pointer */ void* data() { /* report error if buffer is not existing */ if (!device) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer specified"); - + /* return buffer */ return ptr; } + /*! gets buffer pointer */ + void* dataDevice() + { + /* report error if buffer is not existing */ + if (!device) + throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer specified"); + + /* return buffer */ +#if defined(EMBREE_SYCL_SUPPORT) + return dptr; +#else + return ptr; +#endif + } + /*! returns pointer to first element */ - __forceinline char* getPtr() const { + __forceinline char* getPtr(BufferDataPointerType type) const + { + if (type == BufferDataPointerType::HOST) return getHostPtr(); + else if (type == BufferDataPointerType::DEVICE) return getDevicePtr(); + + throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer data pointer type specified"); + return nullptr; + } + + /*! returns pointer to first element */ + __forceinline virtual char* getHostPtr() const { return ptr; } + /*! returns pointer to first element */ + __forceinline virtual char* getDevicePtr() const { +#if defined(EMBREE_SYCL_SUPPORT) + return dptr; +#else + return ptr; +#endif + } + /*! returns the number of bytes of the buffer */ - __forceinline size_t bytes() const { + __forceinline size_t bytes() const { return numBytes; } - + /*! returns true of the buffer is not empty */ - __forceinline operator bool() const { - return ptr; + __forceinline operator bool() const { + return ptr; + } + + __forceinline void commit() { +#if defined(EMBREE_SYCL_SUPPORT) + DeviceGPU* gpu_device = dynamic_cast(device); + if (gpu_device) { + sycl::queue queue(gpu_device->getGPUDevice()); + commit(queue); + queue.wait_and_throw(); + } + modified = false; +#endif + } + +#if defined(EMBREE_SYCL_SUPPORT) + __forceinline sycl::event commit(sycl::queue queue) { + if (dptr == ptr) + return sycl::event(); + + modified = false; + return queue.memcpy(dptr, ptr, numBytes); + } +#endif + + __forceinline bool needsCommit() const { +#if defined(EMBREE_SYCL_SUPPORT) + return (dptr == ptr) ? false : modified; +#else + return false; +#endif + } + + __forceinline void setNeedsCommit(bool isModified = true) { +#if defined(EMBREE_SYCL_SUPPORT) + modified = isModified; +#endif + } + + __forceinline void commitIfNeeded() { + if (needsCommit()) { + commit(); + } } public: - Device* device; //!< device to report memory usage to - char* ptr; //!< pointer to buffer data - size_t numBytes; //!< number of bytes in the buffer - bool shared; //!< set if memory is shared with application + Device* device; //!< device to report memory usage to + size_t numBytes; //!< number of bytes in the buffer + char* ptr; //!< pointer to buffer data +#if defined(EMBREE_SYCL_SUPPORT) + char* dptr; //!< pointer to buffer data on device +#endif + bool shared; //!< set if memory is shared with application +#if defined(EMBREE_SYCL_SUPPORT) + bool dshared; //!< set if device memory is shared with application + bool modified; //!< to be set when host memory has been modified and dev needs update +#endif }; /*! An untyped contiguous range of a buffer. This class does not own the buffer content. */ @@ -130,7 +242,7 @@ namespace embree public: /*! Buffer construction */ RawBufferView() - : ptr_ofs(nullptr), stride(0), num(0), format(RTC_FORMAT_UNDEFINED), modCounter(1), modified(true), userData(0) {} + : ptr_ofs(nullptr), dptr_ofs(nullptr), stride(0), num(0), format(RTC_FORMAT_UNDEFINED), modCounter(1), modified(true), userData(0) {} public: /*! sets the buffer view */ @@ -139,7 +251,8 @@ namespace embree if ((offset_in + stride_in * num_in) > (stride_in * buffer_in->numBytes)) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "buffer range out of bounds"); - ptr_ofs = buffer_in->ptr + offset_in; + ptr_ofs = buffer_in->getHostPtr() + offset_in; + dptr_ofs = buffer_in->getDevicePtr() + offset_in; stride = stride_in; num = num_in; format = format_in; @@ -148,28 +261,48 @@ namespace embree buffer = buffer_in; } + /*! returns pointer to the i'th element */ + __forceinline char* getPtr(BufferDataPointerType pointerType) const + { + if (pointerType == BufferDataPointerType::HOST) + return ptr_ofs; + else if (pointerType == BufferDataPointerType::DEVICE) + return dptr_ofs; + + throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer data pointer type specified"); + return nullptr; + } + /*! returns pointer to the first element */ __forceinline char* getPtr() const { - return ptr_ofs; + #if defined(__SYCL_DEVICE_ONLY__) + return dptr_ofs; + #else + return ptr_ofs; + #endif } /*! returns pointer to the i'th element */ __forceinline char* getPtr(size_t i) const { - assert(isetNeedsCommit(); } /*! mark buffer as modified or unmodified */ @@ -205,7 +339,7 @@ namespace embree /*! returns true of the buffer is not empty */ __forceinline operator bool() const { - return ptr_ofs; + return ptr_ofs; } /*! checks padding to 16 byte check, fails hard */ @@ -217,6 +351,7 @@ namespace embree public: char* ptr_ofs; //!< base pointer plus offset + char* dptr_ofs; //!< base pointer plus offset in device memory size_t stride; //!< stride of the buffer in bytes size_t num; //!< number of elements in the buffer RTCFormat format; //!< format of the buffer @@ -233,9 +368,15 @@ namespace embree public: typedef T value_type; +#if defined(__SYCL_DEVICE_ONLY__) + /*! access to the ith element of the buffer */ + __forceinline T& operator [](size_t i) { assert(i @@ -250,14 +391,14 @@ namespace embree __forceinline const Vec3fa operator [](size_t i) const { assert(ierror == RTC_ERROR_NONE) { + stored_error->error = error; + if (msg != "") + stored_error->msg = msg; + } } RTCError Device::getDeviceErrorCode() { - RTCError* stored_error = errorHandler.error(); - RTCError error = *stored_error; - *stored_error = RTC_ERROR_NONE; - return error; + RTCErrorMessage* stored_error = errorHandler.error(); + RTCErrorMessage error = *stored_error; + stored_error->error = RTC_ERROR_NONE; + return error.error; } - void Device::setThreadErrorCode(RTCError error) + const char* Device::getDeviceLastErrorMessage() { - RTCError* stored_error = g_errorHandler.error(); - if (*stored_error == RTC_ERROR_NONE) - *stored_error = error; + RTCErrorMessage* stored_error = errorHandler.error(); + return stored_error->msg.c_str(); + } + + void Device::setThreadErrorCode(RTCError error, std::string const& msg) + { + RTCErrorMessage* stored_error = g_errorHandler.error(); + if (stored_error->error == RTC_ERROR_NONE) { + stored_error->error = error; + if (msg != "") + stored_error->msg = msg; + } } RTCError Device::getThreadErrorCode() { - RTCError* stored_error = g_errorHandler.error(); - RTCError error = *stored_error; - *stored_error = RTC_ERROR_NONE; - return error; + RTCErrorMessage* stored_error = g_errorHandler.error(); + RTCErrorMessage error = *stored_error; + stored_error->error = RTC_ERROR_NONE; + return error.error; + } + + const char* Device::getThreadLastErrorMessage() + { + RTCErrorMessage* stored_error = g_errorHandler.error(); + return stored_error->msg.c_str(); } void Device::process_error(Device* device, RTCError error, const char* str) - { + { /* store global error code when device construction failed */ if (!device) - return setThreadErrorCode(error); + return setThreadErrorCode(error, str ? std::string(str) : std::string()); /* print error when in verbose mode */ - if (device->verbosity(1)) + if (device->verbosity(1)) { - switch (error) { - case RTC_ERROR_NONE : std::cerr << "Embree: No error"; break; - case RTC_ERROR_UNKNOWN : std::cerr << "Embree: Unknown error"; break; - case RTC_ERROR_INVALID_ARGUMENT : std::cerr << "Embree: Invalid argument"; break; - case RTC_ERROR_INVALID_OPERATION: std::cerr << "Embree: Invalid operation"; break; - case RTC_ERROR_OUT_OF_MEMORY : std::cerr << "Embree: Out of memory"; break; - case RTC_ERROR_UNSUPPORTED_CPU : std::cerr << "Embree: Unsupported CPU"; break; - default : std::cerr << "Embree: Invalid error code"; break; - }; + std::cerr << "Embree: " << getErrorString(error); if (str) std::cerr << ", (" << str << ")"; std::cerr << std::endl; } @@ -312,7 +324,7 @@ namespace embree device->error_function(device->error_function_userptr,error,str); /* record error code */ - device->setDeviceErrorCode(error); + device->setDeviceErrorCode(error, str ? std::string(str) : std::string()); } void Device::memoryMonitor(ssize_t bytes, bool post) @@ -570,6 +582,22 @@ namespace embree case RTC_DEVICE_PROPERTY_PARALLEL_COMMIT_SUPPORTED: return 0; #endif +#if defined(EMBREE_SYCL_SUPPORT) + case RTC_DEVICE_PROPERTY_CPU_DEVICE: { + if (!dynamic_cast(this)) + return 1; + return 0; + }; + case RTC_DEVICE_PROPERTY_SYCL_DEVICE: { + if (!dynamic_cast(this)) + return 0; + return 1; + }; +#else + case RTC_DEVICE_PROPERTY_CPU_DEVICE: return 1; + case RTC_DEVICE_PROPERTY_SYCL_DEVICE: return 0; +#endif + default: throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown readable property"); break; }; } @@ -578,10 +606,31 @@ namespace embree return alignedMalloc(size,align); } + void* Device::malloc(size_t size, size_t align, EmbreeMemoryType type) { + return alignedMalloc(size,align); + } + void Device::free(void* ptr) { alignedFree(ptr); } + const std::vector Device::error_strings = { + "No Error", + "Unknown error", + "Invalid argument", + "Invalid operation", + "Out of Memory", + "Unsupported CPU", + "Build cancelled", + "Level Zero raytracing support missing" + }; + + const char* Device::getErrorString(RTCError error) { + if (error >= 0 && error < error_strings.size()) { + return error_strings.at(error).c_str(); + } + return "Invalid error code"; + } #if defined(EMBREE_SYCL_SUPPORT) @@ -613,7 +662,6 @@ namespace embree if (result != ZE_RESULT_SUCCESS) throw_RTCError(RTC_ERROR_UNKNOWN, "zeDriverGetExtensionProperties failed"); -#if defined(EMBREE_SYCL_L0_RTAS_BUILDER) bool ze_rtas_builder = false; for (uint32_t i=0; i(); + } +#pragma GCC diagnostic pop + #endif DeviceEnterLeave::DeviceEnterLeave (RTCDevice hdevice) diff --git a/thirdparty/embree/kernels/common/device.h b/thirdparty/embree/kernels/common/device.h index c9e8888a5af..ab3d773225f 100644 --- a/thirdparty/embree/kernels/common/device.h +++ b/thirdparty/embree/kernels/common/device.h @@ -38,7 +38,7 @@ namespace embree __forceinline pointer allocate( size_type n ) { assert(device); - return (pointer) device->malloc(n*sizeof(T),alignment); + return (pointer) device->malloc(n*sizeof(T),alignment,EmbreeMemoryType::MALLOC); } __forceinline void deallocate( pointer p, size_type n ) { @@ -75,17 +75,27 @@ namespace embree void print(); /*! sets the error code */ - void setDeviceErrorCode(RTCError error); + void setDeviceErrorCode(RTCError error, std::string const& msg = ""); /*! returns and clears the error code */ RTCError getDeviceErrorCode(); + /*! Returns the string representation for the error code. For example, for RTC_ERROR_UNKNOWN the string "RTC_ERROR_UNKNOWN" will be returned. */ + static char* getDeviceErrorString(); + + /*! returns the last error message */ + const char* getDeviceLastErrorMessage(); + /*! sets the error code */ - static void setThreadErrorCode(RTCError error); + static void setThreadErrorCode(RTCError error, std::string const& msg = ""); /*! returns and clears the error code */ static RTCError getThreadErrorCode(); + + /*! returns the last error message */ + static const char* getThreadLastErrorMessage(); + /*! processes error codes, do not call directly */ static void process_error(Device* device, RTCError error, const char* str); @@ -107,12 +117,23 @@ namespace embree /*! leave device by setting up some global state */ virtual void leave() {} - /*! buffer allocation */ + /*! buffer allocation - using USM shared */ virtual void* malloc(size_t size, size_t align); + /*! buffer allocation */ + virtual void* malloc(size_t size, size_t align, EmbreeMemoryType type); + /*! buffer deallocation */ virtual void free(void* ptr); + /*! returns true if device is of type DeviceGPU */ + virtual bool is_gpu() const { return false; } + + /*! returns true if device and host have shared memory system (e.g., integrated GPU) */ + virtual bool has_unified_memory() const { return true; } + + virtual EmbreeMemoryType get_memory_type(void* ptr) const { return EmbreeMemoryType::MALLOC; } + private: /*! initializes the tasking system */ @@ -140,6 +161,13 @@ namespace embree #if defined(EMBREE_TARGET_SIMD8) std::unique_ptr bvh8_factory; #endif + + private: + static const std::vector error_strings; + + public: + static const char* getErrorString(RTCError error); + }; #if defined(EMBREE_SYCL_SUPPORT) @@ -154,11 +182,27 @@ namespace embree virtual void enter() override; virtual void leave() override; virtual void* malloc(size_t size, size_t align) override; + virtual void* malloc(size_t size, size_t align, EmbreeMemoryType type) override; virtual void free(void* ptr) override; /* set SYCL device */ void setSYCLDevice(const sycl::device sycl_device); + /*! returns true if device is of type DeviceGPU */ + virtual bool is_gpu() const override { return true; } + + /*! returns true if device and host have shared memory system (e.g., integrated GPU) */ + virtual bool has_unified_memory() const override; + + virtual EmbreeMemoryType get_memory_type(void* ptr) const override { + switch(sycl::get_pointer_type(ptr, gpu_context)) { + case sycl::usm::alloc::host: return EmbreeMemoryType::USM_HOST; + case sycl::usm::alloc::device: return EmbreeMemoryType::USM_DEVICE; + case sycl::usm::alloc::shared: return EmbreeMemoryType::USM_SHARED; + default: return EmbreeMemoryType::MALLOC; + } + } + private: sycl::context gpu_context; sycl::device gpu_device; diff --git a/thirdparty/embree/kernels/common/geometry.cpp b/thirdparty/embree/kernels/common/geometry.cpp index 79a6eb00d70..b2a604fd453 100644 --- a/thirdparty/embree/kernels/common/geometry.cpp +++ b/thirdparty/embree/kernels/common/geometry.cpp @@ -116,7 +116,7 @@ namespace embree { } - void Geometry::enable () + void Geometry::enable () { if (isEnabled()) return; diff --git a/thirdparty/embree/kernels/common/geometry.h b/thirdparty/embree/kernels/common/geometry.h index 00e3c5ede3d..3c7ce99564f 100644 --- a/thirdparty/embree/kernels/common/geometry.h +++ b/thirdparty/embree/kernels/common/geometry.h @@ -126,10 +126,8 @@ namespace embree }; /*! Base class all geometries are derived from */ - class Geometry : public RefCount + class __aligned(16) Geometry : public RefCount { - ALIGNED_CLASS_USM_(16); - friend class Scene; public: @@ -372,7 +370,7 @@ namespace embree /*! called before every build */ virtual void preCommit(); - + /*! called after every build */ virtual void postCommit(); @@ -469,7 +467,7 @@ namespace embree } /*! Gets specified buffer. */ - virtual void* getBuffer(RTCBufferType type, unsigned int slot) { + virtual void* getBufferData(RTCBufferType type, unsigned int slot, BufferDataPointerType pointerType) { throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry"); } @@ -543,6 +541,17 @@ namespace embree return numTimeSteps-1; } + public: + + /*! methods for converting host geometry data to device geometry data */ + virtual size_t getGeometryDataDeviceByteSize() const { + throw_RTCError(RTC_ERROR_INVALID_OPERATION,"getGeometryDataDeviceByteSize not implemented for this geometry"); + } + + virtual void convertToDeviceRepresentation(size_t offset, char* data_host, char* data_device) const { + throw_RTCError(RTC_ERROR_INVALID_OPERATION,"convertToDeviceRepresentation not implemented for this geometry"); + } + public: virtual PrimInfo createPrimRefArray(PrimRef* prims, const range& r, size_t k, unsigned int geomID) const { diff --git a/thirdparty/embree/kernels/common/ray.h b/thirdparty/embree/kernels/common/ray.h index c8860133542..a7b3bf33d06 100644 --- a/thirdparty/embree/kernels/common/ray.h +++ b/thirdparty/embree/kernels/common/ray.h @@ -143,7 +143,7 @@ namespace embree vbool valid = valid0 & geomID != vuint(RTC_INVALID_GEOMETRY_ID); const vbool vt = (abs(tfar) <= vfloat(FLT_LARGE)) | (tfar == vfloat(neg_inf)); const vbool vu = (abs(u) <= vfloat(FLT_LARGE)); - const vbool vv = (abs(u) <= vfloat(FLT_LARGE)); + const vbool vv = (abs(v) <= vfloat(FLT_LARGE)); const vbool vnx = abs(Ng.x) <= vfloat(FLT_LARGE); const vbool vny = abs(Ng.y) <= vfloat(FLT_LARGE); const vbool vnz = abs(Ng.z) <= vfloat(FLT_LARGE); diff --git a/thirdparty/embree/kernels/common/rtcore.cpp b/thirdparty/embree/kernels/common/rtcore.cpp index e19c243bf68..a34eb2a0c41 100644 --- a/thirdparty/embree/kernels/common/rtcore.cpp +++ b/thirdparty/embree/kernels/common/rtcore.cpp @@ -46,19 +46,23 @@ RTC_NAMESPACE_BEGIN; RTC_API bool rtcIsSYCLDeviceSupported(const sycl::device device) { - RTC_CATCH_BEGIN; - RTC_TRACE(rtcIsSYCLDeviceSupported); - return rthwifIsSYCLDeviceSupported(device) > 0; - RTC_CATCH_END(nullptr); + try { + RTC_TRACE(rtcIsSYCLDeviceSupported); + return rthwifIsSYCLDeviceSupported(device) > 0; + } catch (...) { + return false; + } return false; } RTC_API int rtcSYCLDeviceSelector(const sycl::device device) { - RTC_CATCH_BEGIN; - RTC_TRACE(rtcSYCLDeviceSelector); - return rthwifIsSYCLDeviceSupported(device); - RTC_CATCH_END(nullptr); + try { + RTC_TRACE(rtcSYCLDeviceSelector); + return rthwifIsSYCLDeviceSupported(device); + } catch (...) { + return -1; + } return -1; } @@ -79,6 +83,29 @@ RTC_NAMESPACE_BEGIN; RTC_CATCH_END(nullptr); } + RTC_API_CPP sycl::event rtcCommitSceneWithQueue (RTCScene hscene, sycl::queue queue) + { + Scene* scene = (Scene*) hscene; + RTC_CATCH_BEGIN; + RTC_TRACE(rtcCommitSceneWithQueue); + RTC_VERIFY_HANDLE(hscene); + RTC_ENTER_DEVICE(hscene); + return scene->commit(false, queue); + RTC_CATCH_END2(scene); + return sycl::event(); + } + + RTC_API_CPP sycl::event rtcCommitBufferWithQueue(RTCBuffer hbuffer, sycl::queue queue) { + Buffer* buffer = (Buffer*)hbuffer; + RTC_CATCH_BEGIN; + RTC_TRACE(rtcCommitBufferWithQueue); + RTC_VERIFY_HANDLE(hbuffer); + RTC_ENTER_DEVICE(hbuffer); + return buffer->commit(queue); + RTC_CATCH_END2(buffer); + return sycl::event(); + } + #endif RTC_API void rtcRetainDevice(RTCDevice hdevice) @@ -138,6 +165,17 @@ RTC_NAMESPACE_BEGIN; return RTC_ERROR_UNKNOWN; } + RTC_API const char* rtcGetDeviceLastErrorMessage(RTCDevice hdevice) + { + Device* device = (Device*) hdevice; + RTC_CATCH_BEGIN; + RTC_TRACE(rtcGetDeviceLastErrorMessage); + if (device == nullptr) return Device::getThreadLastErrorMessage(); + else return device->getDeviceLastErrorMessage(); + RTC_CATCH_END(device); + return ""; + } + RTC_API void rtcSetDeviceErrorFunction(RTCDevice hdevice, RTCErrorFunction error, void* userPtr) { Device* device = (Device*) hdevice; @@ -163,7 +201,19 @@ RTC_NAMESPACE_BEGIN; RTC_TRACE(rtcNewBuffer); RTC_VERIFY_HANDLE(hdevice); RTC_ENTER_DEVICE(hdevice); - Buffer* buffer = new Buffer((Device*)hdevice, byteSize); + Buffer* buffer = new Buffer((Device*)hdevice, byteSize, nullptr); + return (RTCBuffer)buffer->refInc(); + RTC_CATCH_END((Device*)hdevice); + return nullptr; + } + + RTC_API RTCBuffer rtcNewBufferHostDevice(RTCDevice hdevice, size_t byteSize) + { + RTC_CATCH_BEGIN; + RTC_TRACE(rtcNewBufferHostDevice); + RTC_VERIFY_HANDLE(hdevice); + RTC_ENTER_DEVICE(hdevice); + Buffer* buffer = new Buffer((Device*)hdevice, byteSize, nullptr, nullptr); return (RTCBuffer)buffer->refInc(); RTC_CATCH_END((Device*)hdevice); return nullptr; @@ -181,6 +231,30 @@ RTC_NAMESPACE_BEGIN; return nullptr; } + RTC_API RTCBuffer rtcNewSharedBufferHostDevice(RTCDevice hdevice, void* ptr, size_t byteSize) + { + RTC_CATCH_BEGIN; + RTC_TRACE(rtcNewSharedBufferHostDevice); + RTC_VERIFY_HANDLE(hdevice); + RTC_ENTER_DEVICE(hdevice); + Buffer* buffer = new Buffer((Device*)hdevice, byteSize, ptr, nullptr); + return (RTCBuffer)buffer->refInc(); + RTC_CATCH_END((Device*)hdevice); + return nullptr; + } + + RTC_API void* rtcGetBufferDataDevice(RTCBuffer hbuffer) + { + Buffer* buffer = (Buffer*)hbuffer; + RTC_CATCH_BEGIN; + RTC_TRACE(rtcGetBufferDataDevice); + RTC_VERIFY_HANDLE(hbuffer); + RTC_ENTER_DEVICE(hbuffer); + return buffer->dataDevice(); + RTC_CATCH_END2(buffer); + return nullptr; + } + RTC_API void* rtcGetBufferData(RTCBuffer hbuffer) { Buffer* buffer = (Buffer*)hbuffer; @@ -215,6 +289,16 @@ RTC_NAMESPACE_BEGIN; RTC_CATCH_END2(buffer); } + RTC_API void rtcCommitBuffer(RTCBuffer hbuffer) { + Buffer* buffer = (Buffer*)hbuffer; + RTC_CATCH_BEGIN; + RTC_TRACE(rtcCommitBuffer); + RTC_VERIFY_HANDLE(hbuffer); + RTC_ENTER_DEVICE(hbuffer); + buffer->commit(); + RTC_CATCH_END2(buffer); + } + RTC_API RTCScene rtcNewScene (RTCDevice hdevice) { RTC_CATCH_BEGIN; @@ -238,6 +322,20 @@ RTC_NAMESPACE_BEGIN; return (RTCDevice)nullptr; } + RTC_API RTCTraversable rtcGetSceneTraversable(RTCScene hscene) + { + Scene* scene = (Scene*) hscene; + RTC_CATCH_BEGIN; + RTC_TRACE(rtcGetSceneTraversable); + RTC_VERIFY_HANDLE(hscene); + RTCTraversable traversable = (RTCTraversable)scene->getTraversable(); + if (!traversable) + throw_RTCError(RTC_ERROR_INVALID_OPERATION,"Traversable is NULL. The scene has to be committed first."); + return traversable; + RTC_CATCH_END2(scene); + return (RTCTraversable)nullptr; + } + RTC_API void rtcSetSceneProgressMonitorFunction(RTCScene hscene, RTCProgressMonitorFunction progress, void* ptr) { Scene* scene = (Scene*) hscene; @@ -257,15 +355,10 @@ RTC_NAMESPACE_BEGIN; RTC_TRACE(rtcSetSceneBuildQuality); RTC_VERIFY_HANDLE(hscene); RTC_ENTER_DEVICE(hscene); - //if (quality != RTC_BUILD_QUALITY_LOW && - // quality != RTC_BUILD_QUALITY_MEDIUM && - // quality != RTC_BUILD_QUALITY_HIGH) - // throw std::runtime_error("invalid build quality"); if (quality != RTC_BUILD_QUALITY_LOW && quality != RTC_BUILD_QUALITY_MEDIUM && - quality != RTC_BUILD_QUALITY_HIGH) { - abort(); - } + quality != RTC_BUILD_QUALITY_HIGH) + abort(); //throw std::runtime_error("invalid build quality"); scene->setBuildQuality(quality); RTC_CATCH_END2(scene); } @@ -287,20 +380,28 @@ RTC_NAMESPACE_BEGIN; RTC_CATCH_BEGIN; RTC_TRACE(rtcGetSceneFlags); RTC_VERIFY_HANDLE(hscene); - RTC_ENTER_DEVICE(hscene); + //RTC_ENTER_DEVICE(hscene); return scene->getSceneFlags(); RTC_CATCH_END2(scene); return RTC_SCENE_FLAG_NONE; } - - RTC_API void rtcCommitScene (RTCScene hscene) + + RTC_API_EXTERN_C bool prefetchUSMSharedOnGPU(RTCScene scene); + + RTC_API void rtcCommitScene (RTCScene hscene) { Scene* scene = (Scene*) hscene; RTC_CATCH_BEGIN; RTC_TRACE(rtcCommitScene); RTC_VERIFY_HANDLE(hscene); RTC_ENTER_DEVICE(hscene); + scene->commit(false); + +#if defined(EMBREE_SYCL_SUPPORT) + //prefetchUSMSharedOnGPU(hscene); +#endif + RTC_CATCH_END2(scene); } @@ -311,6 +412,7 @@ RTC_NAMESPACE_BEGIN; RTC_TRACE(rtcJoinCommitScene); RTC_VERIFY_HANDLE(hscene); RTC_ENTER_DEVICE(hscene); + scene->commit(true); RTC_CATCH_END2(scene); } @@ -1130,7 +1232,159 @@ RTC_NAMESPACE_BEGIN; rtcForwardOccludedN(valid, args, hscene, iray, instID, instPrimID); RTC_CATCH_END2(scene); } - + + RTC_API bool rtcTraversablePointQuery(RTCTraversable htraversable, RTCPointQuery* query, RTCPointQueryContext* userContext, RTCPointQueryFunction queryFunc, void* userPtr) + { + return rtcPointQuery((RTCScene)htraversable, query, userContext, queryFunc, userPtr); + } + + RTC_API bool rtcTraversablePointQuery4 (const int* valid, RTCTraversable htraversable, RTCPointQuery4* query, struct RTCPointQueryContext* userContext, RTCPointQueryFunction queryFunc, void** userPtrN) + { + return rtcPointQuery4(valid, (RTCScene)htraversable, query, userContext, queryFunc, userPtrN); + } + + RTC_API bool rtcTraversablePointQuery8 (const int* valid, RTCTraversable htraversable, RTCPointQuery8* query, struct RTCPointQueryContext* userContext, RTCPointQueryFunction queryFunc, void** userPtrN) + { + return rtcPointQuery8(valid, (RTCScene)htraversable, query, userContext, queryFunc, userPtrN); + } + + RTC_API bool rtcTraversablePointQuery16 (const int* valid, RTCTraversable htraversable, RTCPointQuery16* query, struct RTCPointQueryContext* userContext, RTCPointQueryFunction queryFunc, void** userPtrN) + { + return rtcPointQuery16(valid, (RTCScene)htraversable, query, userContext, queryFunc, userPtrN); + } + + RTC_API void rtcTraversableIntersect1 (RTCTraversable htraversable, RTCRayHit* rayhit, RTCIntersectArguments* args) + { + rtcIntersect1((RTCScene)htraversable, rayhit, args); + } + + RTC_API void rtcTraversableForwardIntersect1 (const RTCIntersectFunctionNArguments* args, RTCTraversable htraversable, RTCRay* iray_, unsigned int instID) + { + rtcForwardIntersect1(args, (RTCScene)htraversable, iray_, instID); + } + + RTC_API void rtcTraversableForwardIntersect1Ex(const RTCIntersectFunctionNArguments* args, RTCTraversable htraversable, RTCRay* iray_, unsigned int instID, unsigned int instPrimID) + { + rtcForwardIntersect1Ex(args, (RTCScene)htraversable, iray_, instID, instPrimID); + } + + RTC_API void rtcTraversableIntersect4 (const int* valid, RTCTraversable htraversable, RTCRayHit4* rayhit, RTCIntersectArguments* args) + { + rtcIntersect4(valid, (RTCScene)htraversable, rayhit, args); + } + + template + __forceinline void rtcTraversableForwardIntersectN(const int* valid, const RTCIntersectFunctionNArguments* args, RTCTraversable htraversable, RTCRay* iray, unsigned int instID, unsigned int instPrimID) + { + rtcForwardIntersetN(valid, args, (RTCScene)htraversable, iray, instID, instPrimID); + } + + RTC_API void rtcTraversableForwardIntersect4(const int* valid, const RTCIntersectFunctionNArguments* args, RTCTraversable htraversable, RTCRay4* iray, unsigned int instID) + { + rtcForwardIntersect4(valid, args, (RTCScene)htraversable, iray, instID); + } + + RTC_API void rtcTraversableForwardIntersect4Ex(const int* valid, const RTCIntersectFunctionNArguments* args, RTCTraversable htraversable, RTCRay4* iray, unsigned int instID, unsigned int instPrimID) + { + rtcForwardIntersect4Ex(valid, args, (RTCScene)htraversable, iray, instID, instPrimID); + } + + RTC_API void rtcTraversableIntersect8 (const int* valid, RTCTraversable htraversable, RTCRayHit8* rayhit, RTCIntersectArguments* args) + { + rtcIntersect8(valid, (RTCScene)htraversable, rayhit, args); + } + + RTC_API void rtcTraversableForwardIntersect8(const int* valid, const RTCIntersectFunctionNArguments* args, RTCTraversable htraversable, RTCRay8* iray, unsigned int instID) + { + rtcForwardIntersect8(valid, args, (RTCScene)htraversable, iray, instID); + } + + RTC_API void rtcTraversableForwardIntersect8Ex(const int* valid, const RTCIntersectFunctionNArguments* args, RTCTraversable htraversable, RTCRay8* iray, unsigned int instID, unsigned int instPrimID) + { + rtcForwardIntersect8Ex(valid, args, (RTCScene)htraversable, iray, instID, instPrimID); + } + + RTC_API void rtcTraversableIntersect16 (const int* valid, RTCTraversable htraversable, RTCRayHit16* rayhit, RTCIntersectArguments* args) + { + rtcIntersect16(valid, (RTCScene)htraversable, rayhit, args); + } + + RTC_API void rtcTraversableForwardIntersect16(const int* valid, const RTCIntersectFunctionNArguments* args, RTCTraversable htraversable, RTCRay16* iray, unsigned int instID) + { + rtcForwardIntersect16(valid, args, (RTCScene)htraversable, iray, instID); + } + + RTC_API void rtcTraversableForwardIntersect16Ex(const int* valid, const RTCIntersectFunctionNArguments* args, RTCTraversable htraversable, RTCRay16* iray, unsigned int instID, unsigned int instPrimID) + { + rtcForwardIntersect16Ex(valid, args, (RTCScene)htraversable, iray, instID, instPrimID); + } + + RTC_API void rtcTraversableOccluded1 (RTCTraversable htraversable, RTCRay* ray, RTCOccludedArguments* args) + { + rtcOccluded1((RTCScene)htraversable, ray, args); + } + + RTC_API void rtcTraversableForwardOccluded1 (const RTCOccludedFunctionNArguments* args, RTCTraversable htraversable, RTCRay* iray_, unsigned int instID) + { + rtcForwardOccluded1(args, (RTCScene)htraversable, iray_, instID); + } + + RTC_API void rtcTraversableForwardOccluded1Ex(const RTCOccludedFunctionNArguments* args, RTCTraversable htraversable, RTCRay* iray_, unsigned int instID, unsigned int instPrimID) + { + rtcForwardOccluded1Ex(args, (RTCScene)htraversable, iray_, instID, instPrimID); + } + + RTC_API void rtcTraversableOccluded4 (const int* valid, RTCTraversable htraversable, RTCRay4* ray, RTCOccludedArguments* args) + { + rtcOccluded4(valid, (RTCScene)htraversable, ray, args); + } + + template + __forceinline void rtcTraversableForwardOccludedN (const int* valid, const RTCOccludedFunctionNArguments* args, RTCTraversable htraversable, RTCRay* iray, unsigned int instID, unsigned int instPrimID) + { + rtcForwardOccludedN(valid, args, (RTCScene)htraversable, iray, instID, instPrimID); + } + + RTC_API void rtcTraversableForwardOccluded4(const int* valid, const RTCOccludedFunctionNArguments* args, RTCTraversable htraversable, RTCRay4* iray, unsigned int instID) + { + rtcForwardOccluded4(valid, args, (RTCScene)htraversable, iray, instID); + } + + RTC_API void rtcTraversableForwardOccluded4Ex(const int* valid, const RTCOccludedFunctionNArguments* args, RTCTraversable htraversable, RTCRay4* iray, unsigned int instID, unsigned int instPrimID) + { + rtcForwardOccluded4Ex(valid, args, (RTCScene)htraversable, iray, instID, instPrimID); + } + + RTC_API void rtcTraversableOccluded8 (const int* valid, RTCTraversable htraversable, RTCRay8* ray, RTCOccludedArguments* args) + { + rtcOccluded8(valid, (RTCScene)htraversable, ray, args); + } + + RTC_API void rtcTraversableForwardOccluded8(const int* valid, const RTCOccludedFunctionNArguments* args, RTCTraversable htraversable, RTCRay8* iray, unsigned int instID) + { + rtcForwardOccluded8(valid, args, (RTCScene)htraversable, iray, instID); + } + + RTC_API void rtcTraversableForwardOccluded8Ex(const int* valid, const RTCOccludedFunctionNArguments* args, RTCTraversable htraversable, RTCRay8* iray, unsigned int instID, unsigned int instPrimID) + { + rtcForwardOccluded8Ex(valid, args, (RTCScene)htraversable, iray, instID, instPrimID); + } + + RTC_API void rtcTraversableOccluded16 (const int* valid, RTCTraversable htraversable, RTCRay16* ray, RTCOccludedArguments* args) + { + rtcOccluded16(valid, (RTCScene)htraversable, ray, args); + } + + RTC_API void rtcTraversableForwardOccluded16(const int* valid, const RTCOccludedFunctionNArguments* args, RTCTraversable htraversable, RTCRay16* iray, unsigned int instID) + { + rtcForwardOccluded16(valid, args, (RTCScene)htraversable, iray, instID); + } + + RTC_API void rtcTraversableForwardOccluded16Ex(const int* valid, const RTCOccludedFunctionNArguments* args, RTCTraversable htraversable, RTCRay16* iray, unsigned int instID, unsigned int instPrimID) + { + rtcForwardOccluded16Ex(valid, args, (RTCScene)htraversable, iray, instID, instPrimID); + } + RTC_API void rtcRetainScene (RTCScene hscene) { Scene* scene = (Scene*) hscene; @@ -1292,6 +1546,11 @@ RTC_API void rtcSetGeometryTransform(RTCGeometry hgeometry, unsigned int timeSte RTC_CATCH_END2(scene); } + RTC_API void rtcGetGeometryTransformFromTraversable(RTCTraversable htraversable, unsigned int geomID, float time, RTCFormat format, void* xfm) + { + rtcGetGeometryTransformFromScene((RTCScene)htraversable, geomID, time, format, xfm); + } + RTC_API void rtcInvokeIntersectFilterFromGeometry(const struct RTCIntersectFunctionNArguments* const args_i, const struct RTCFilterFunctionNArguments* filter_args) { IntersectFunctionNArguments* args = (IntersectFunctionNArguments*) args_i; @@ -1568,17 +1827,11 @@ RTC_API void rtcSetGeometryTransform(RTCGeometry hgeometry, unsigned int timeSte RTC_TRACE(rtcSetGeometryBuildQuality); RTC_VERIFY_HANDLE(hgeometry); RTC_ENTER_DEVICE(hgeometry); - //if (quality != RTC_BUILD_QUALITY_LOW && - // quality != RTC_BUILD_QUALITY_MEDIUM && - // quality != RTC_BUILD_QUALITY_HIGH && - // quality != RTC_BUILD_QUALITY_REFIT) - // throw std::runtime_error("invalid build quality"); if (quality != RTC_BUILD_QUALITY_LOW && quality != RTC_BUILD_QUALITY_MEDIUM && quality != RTC_BUILD_QUALITY_HIGH && - quality != RTC_BUILD_QUALITY_REFIT) { - abort(); - } + quality != RTC_BUILD_QUALITY_REFIT) + abort(); //throw std::runtime_error("invalid build quality"); geometry->setBuildQuality(quality); RTC_CATCH_END2(geometry); } @@ -1667,6 +1920,37 @@ RTC_API void rtcSetGeometryTransform(RTCGeometry hgeometry, unsigned int timeSte RTC_CATCH_END2(geometry); } + RTC_API void rtcSetSharedGeometryBufferHostDevice(RTCGeometry hgeometry, RTCBufferType type, unsigned int slot, RTCFormat format, const void* ptr, const void* dptr, size_t byteOffset, size_t byteStride, size_t itemCount) + { + Geometry* geometry = (Geometry*) hgeometry; + RTC_CATCH_BEGIN; + RTC_TRACE(rtcSetSharedGeometryBufferHostDevice); + RTC_VERIFY_HANDLE(hgeometry); + RTC_ENTER_DEVICE(hgeometry); + +#if defined(EMBREE_SYCL_SUPPORT) + if (geometry->device->is_gpu()) + { + if (itemCount > 0xFFFFFFFFu) + throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"buffer too large"); + + if ((ptr == nullptr) || (dptr == nullptr)) + throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"host and device pointer may not be NULL pointers when using SYCL devices"); + + Ref buffer = new Buffer(geometry->device, itemCount*byteStride, (char*)ptr + byteOffset, (char*)dptr + byteOffset); + geometry->setBuffer(type, slot, format, buffer, 0, byteStride, (unsigned int)itemCount); + } + else +#endif + { + if (dptr != nullptr) + throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"Embree device is no SYCL device. Device pointer argument must not be valid, pass NULL instead"); + rtcSetSharedGeometryBuffer(hgeometry, type, slot, format, ptr, byteOffset, byteStride, itemCount); + } + + RTC_CATCH_END2(geometry); + } + RTC_API void* rtcSetNewGeometryBuffer(RTCGeometry hgeometry, RTCBufferType type, unsigned int slot, RTCFormat format, size_t byteStride, size_t itemCount) { Geometry* geometry = (Geometry*) hgeometry; @@ -1683,13 +1967,54 @@ RTC_API void rtcSetGeometryTransform(RTCGeometry hgeometry, unsigned int timeSte if (type == RTC_BUFFER_TYPE_VERTEX || type == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE) bytes += (16 - (byteStride%16))%16; - Ref buffer = new Buffer(geometry->device, bytes); + Ref buffer = new Buffer(geometry->device, bytes, nullptr); geometry->setBuffer(type, slot, format, buffer, 0, byteStride, (unsigned int)itemCount); return buffer->data(); RTC_CATCH_END2(geometry); return nullptr; } + RTC_API void rtcSetNewGeometryBufferHostDevice(RTCGeometry hgeometry, RTCBufferType bufferType, unsigned int slot, RTCFormat format, size_t byteStride, size_t itemCount, void** ptr, void** dptr) + { + Geometry* geometry = (Geometry*) hgeometry; + RTC_CATCH_BEGIN; + RTC_TRACE(rtcSetNewGeometryBufferHostDevice); + RTC_VERIFY_HANDLE(hgeometry); + RTC_ENTER_DEVICE(hgeometry); + +#if defined(EMBREE_SYCL_SUPPORT) + if (geometry->device->is_gpu()) + { + if (itemCount > 0xFFFFFFFFu) + throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"buffer too large"); + + /* vertex buffers need to get overallocated slightly as elements are accessed using SSE loads */ + size_t bytes = itemCount*byteStride; + if (bufferType == RTC_BUFFER_TYPE_VERTEX || bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE) + bytes += (16 - (byteStride%16))%16; + + Ref buffer = new Buffer(geometry->device, bytes, nullptr, nullptr); + geometry->setBuffer(bufferType, slot, format, buffer, 0, byteStride, (unsigned int)itemCount); + + if(ptr) + *ptr = buffer->getHostPtr(); + if (dptr) + *dptr = buffer->getDevicePtr(); + } + else +#endif + { + void* tmp = rtcSetNewGeometryBuffer(hgeometry, bufferType, slot, format, byteStride, itemCount); + if(ptr) + *ptr = tmp; + if (dptr) { + *dptr = tmp; + } + } + + RTC_CATCH_END2(geometry); + } + RTC_API void* rtcGetGeometryBufferData(RTCGeometry hgeometry, RTCBufferType type, unsigned int slot) { Geometry* geometry = (Geometry*) hgeometry; @@ -1697,7 +2022,19 @@ RTC_API void rtcSetGeometryTransform(RTCGeometry hgeometry, unsigned int timeSte RTC_TRACE(rtcGetGeometryBufferData); RTC_VERIFY_HANDLE(hgeometry); RTC_ENTER_DEVICE(hgeometry); - return geometry->getBuffer(type, slot); + return geometry->getBufferData(type, slot, BufferDataPointerType::HOST); + RTC_CATCH_END2(geometry); + return nullptr; + } + + RTC_API void* rtcGetGeometryBufferDataDevice(RTCGeometry hgeometry, RTCBufferType type, unsigned int slot) + { + Geometry* geometry = (Geometry*) hgeometry; + RTC_CATCH_BEGIN; + RTC_TRACE(rtcGetGeometryBufferDataDevice); + RTC_VERIFY_HANDLE(hgeometry); + RTC_ENTER_DEVICE(hgeometry); + return geometry->getBufferData(type, slot, BufferDataPointerType::DEVICE); RTC_CATCH_END2(geometry); return nullptr; } @@ -1713,7 +2050,7 @@ RTC_API void rtcSetGeometryTransform(RTCGeometry hgeometry, unsigned int timeSte RTC_CATCH_END2(geometry); } - RTC_API void rtcUpdateGeometryBuffer (RTCGeometry hgeometry, RTCBufferType type, unsigned int slot) + RTC_API void rtcUpdateGeometryBuffer (RTCGeometry hgeometry, RTCBufferType type, unsigned int slot) { Geometry* geometry = (Geometry*) hgeometry; RTC_CATCH_BEGIN; @@ -1784,6 +2121,11 @@ RTC_API void rtcSetGeometryTransform(RTCGeometry hgeometry, unsigned int timeSte return nullptr; } + RTC_API void* rtcGetGeometryUserDataFromTraversable (RTCTraversable htraversable, unsigned int geomID) + { + return rtcGetGeometryUserDataFromScene((RTCScene)htraversable, geomID); + } + RTC_API void rtcSetGeometryBoundsFunction (RTCGeometry hgeometry, RTCBoundsFunction bounds, void* userPtr) { Geometry* geometry = (Geometry*) hgeometry; @@ -2060,4 +2402,14 @@ RTC_API void rtcSetGeometryTransform(RTCGeometry hgeometry, unsigned int timeSte return nullptr; } + RTC_API const char* rtcGetErrorString(RTCError error) + { + RTC_CATCH_BEGIN; + RTC_TRACE(rtcGetErrorString); + return Device::getErrorString(error); + RTC_CATCH_END(nullptr); + return nullptr; + } + + RTC_NAMESPACE_END diff --git a/thirdparty/embree/kernels/common/scene.cpp b/thirdparty/embree/kernels/common/scene.cpp index 706cc512dfb..3bfcebd2986 100644 --- a/thirdparty/embree/kernels/common/scene.cpp +++ b/thirdparty/embree/kernels/common/scene.cpp @@ -42,10 +42,21 @@ namespace embree Scene::Scene (Device* device) : device(device), + scene_device(nullptr), flags_modified(true), enabled_geometry_types(0), scene_flags(RTC_SCENE_FLAG_NONE), quality_flags(RTC_BUILD_QUALITY_MEDIUM), modified(true), + maxTimeSegments(0), +#if defined(EMBREE_SYCL_SUPPORT) + geometries_device(nullptr), + geometry_data_device(nullptr), + num_geometries(0), + geometry_data_byte_size(0), + offsets(nullptr), + geometries_host(nullptr), + geometry_data_host(nullptr), +#endif taskGroup(new TaskGroup()), progressInterface(this), progress_monitor_function(nullptr), progress_monitor_ptr(nullptr), progress_monitor_counter(0) { @@ -55,8 +66,8 @@ namespace embree /* use proper device and context for SYCL allocations */ #if defined(EMBREE_SYCL_SUPPORT) - if (DeviceGPU* gpu_device = dynamic_cast(device)) - hwaccel = AccelBuffer(AccelAllocator(device,gpu_device->getGPUDevice(),gpu_device->getGPUContext()),0); + if (dynamic_cast(device)) + accelBuffer = AccelBuffer(device); #endif /* one can overwrite flags through device for debugging */ @@ -68,6 +79,27 @@ namespace embree Scene::~Scene() noexcept { +#if defined(EMBREE_SYCL_SUPPORT) + if (geometry_data_device) { + device->free(geometry_data_device); + } + if (geometries_device) { + device->free(geometries_device); + } + if (scene_device) { + device->free(scene_device); + } + if (offsets) { + device->free(offsets); + } + if (geometries_host) { + device->free(geometries_host); + } + if (geometry_data_host) { + device->free(geometry_data_host); + } +#endif + device->refDec(); } @@ -788,9 +820,8 @@ namespace embree void Scene::build_gpu_accels() { #if defined(EMBREE_SYCL_SUPPORT) - const BBox3f aabb = rthwifBuild(this,hwaccel); - bounds = LBBox(aabb); - hwaccel_bounds = aabb; + accelBuffer.build(this); + bounds = LBBox(accelBuffer.getBounds()); #endif } @@ -799,6 +830,7 @@ namespace embree checkIfModifiedAndSet(); if (!isModified()) return; + /* print scene statistics */ if (device->verbosity(2)) printStatistics(); @@ -825,8 +857,18 @@ namespace embree std::plus() ); + /* calculate maximal number of motion blur time segments in scene */ + maxTimeSegments = 1; + for (size_t geomID=0; geomIDnumTimeSegments()); + } + #if defined(EMBREE_SYCL_SUPPORT) - if (DeviceGPU* gpu_device = dynamic_cast(device)) + DeviceGPU* gpu_device = dynamic_cast(device); + if (gpu_device) build_gpu_accels(); else #endif @@ -865,10 +907,36 @@ namespace embree RTCSceneFlags Scene::getSceneFlags() const { return scene_flags; } - + +#if defined(EMBREE_SYCL_SUPPORT) + sycl::event Scene::commit (bool join, sycl::queue queue) + { + commit_internal(join); + return syncWithDevice(queue); + } +#endif + + void Scene::commit (bool join) + { + commit_internal(join); + +#if defined(EMBREE_SYCL_SUPPORT) + syncWithDevice(); +#endif + } + + Scene* Scene::getTraversable() { +#if defined(EMBREE_SYCL_SUPPORT) + if(device->is_gpu()) { + return scene_device; + } +#endif + return this; + } + #if defined(TASKING_INTERNAL) - void Scene::commit (bool join) + void Scene::commit_internal (bool join) { Lock buildLock(buildMutex,false); @@ -910,7 +978,7 @@ namespace embree #if defined(TASKING_TBB) - void Scene::commit (bool join) + void Scene::commit_internal (bool join) { #if defined(TASKING_TBB) && (TBB_INTERFACE_VERSION_MAJOR < 8) if (join) @@ -974,7 +1042,7 @@ namespace embree #if defined(TASKING_PPL) - void Scene::commit (bool join) + void Scene::commit_internal (bool join) { #if defined(TASKING_PPL) if (join) @@ -1011,6 +1079,7 @@ namespace embree accels_clear(); throw; } + } #endif @@ -1029,4 +1098,5 @@ namespace embree } } } + } diff --git a/thirdparty/embree/kernels/common/scene.h b/thirdparty/embree/kernels/common/scene.h index d9acca1065d..9ac1dd5eebd 100644 --- a/thirdparty/embree/kernels/common/scene.h +++ b/thirdparty/embree/kernels/common/scene.h @@ -25,15 +25,18 @@ #include "../sycl/rthwif_embree_builder.h" #endif +#if !defined(EMBREE_SYCL_SUPPORT) +namespace sycl { + struct queue; +} +#endif namespace embree { struct TaskGroup; /*! Base class all scenes are derived from */ - class Scene : public AccelN + class __aligned(16) Scene : public AccelN { - ALIGNED_CLASS_USM_(std::alignment_of::value); - public: template class Iterator @@ -59,10 +62,6 @@ namespace embree return at(i); } - __forceinline size_t size() const { - return scene->size(); - } - __forceinline size_t numPrimitives() const { return scene->getNumPrimitives(Ty::geom_type,mblur); } @@ -99,6 +98,11 @@ namespace embree } return ret; } + + __forceinline size_t size() const { + return scene->size(); + } + private: Scene* scene; @@ -188,12 +192,22 @@ namespace embree void build_cpu_accels(); void build_gpu_accels(); + void commit_internal (bool join); +#if defined(EMBREE_SYCL_SUPPORT) + sycl::event commit (bool join, sycl::queue queue); +#endif void commit (bool join); void commit_task (); void build () {} + Scene* getTraversable(); + /* return number of geometries */ +#if defined(__SYCL_DEVICE_ONLY__) + __forceinline size_t size() const { return num_geometries; } +#else __forceinline size_t size() const { return geometries.size(); } +#endif /* bind geometry to the scene */ unsigned int bind (unsigned geomID, Ref geometry); @@ -206,12 +220,18 @@ namespace embree modified = f; } + __forceinline bool hasMotionBlur() const { return maxTimeSegments > 1; }; + + __forceinline uint32_t getMaxTimeSegments() const { return maxTimeSegments; }; + + #if !defined(__SYCL_DEVICE_ONLY__) __forceinline bool isGeometryModified(size_t geomID) { Ref& g = geometries[geomID]; if (!g) return false; return g->getModCounter() > geometryModCounters_[geomID]; } + #endif protected: @@ -219,6 +239,27 @@ namespace embree public: +#if defined(__SYCL_DEVICE_ONLY__) + /* get mesh by ID */ + __forceinline Geometry* get(size_t i) { return geometries_device[i]; } + __forceinline const Geometry* get(size_t i) const { return geometries_device[i]; } + + template + __forceinline Mesh* get(size_t i) { + return (Mesh*)geometries_device[i]; + } + template + __forceinline const Mesh* get(size_t i) const { + return (Mesh*)geometries_device[i]; + } + + template + __forceinline Mesh* getSafe(size_t i) { + if (geometries_device[i] == nullptr) return nullptr; + if (!(geometries_device[i]->getTypeMask() & Mesh::geom_type)) return nullptr; + else return (Mesh*) geometries_device[i]; + } +#else /* get mesh by ID */ __forceinline Geometry* get(size_t i) { assert(i < geometries.size()); return geometries[i].ptr; } __forceinline const Geometry* get(size_t i) const { assert(i < geometries.size()); return geometries[i].ptr; } @@ -243,12 +284,16 @@ namespace embree if (!(geometries[i]->getTypeMask() & Mesh::geom_type)) return nullptr; else return (Mesh*) geometries[i].ptr; } +#endif + + #if !defined(__SYCL_DEVICE_ONLY__) __forceinline Ref get_locked(size_t i) { Lock lock(geometriesMutex); assert(i < geometries.size()); return geometries[i]; } + #endif /* flag decoding */ __forceinline bool isFastAccel() const { return !isCompactAccel() && !isRobustAccel(); } @@ -270,9 +315,16 @@ namespace embree } void* createQBVH6Accel(); + +#if defined(EMBREE_SYCL_SUPPORT) + private: + void syncWithDevice(); + sycl::event syncWithDevice(sycl::queue queue); +#endif public: Device* device; + Scene* scene_device; public: IDPool id_pool; @@ -292,12 +344,26 @@ namespace embree #if defined(EMBREE_SYCL_SUPPORT) public: - BBox3f hwaccel_bounds = empty; - AccelBuffer hwaccel; + AccelBuffer accelBuffer; #endif private: - bool modified; //!< true if scene got modified + bool modified; //!< true if scene got modified + uint32_t maxTimeSegments; //!< maximal number of motion blur time segments in scene + +#if defined(EMBREE_SYCL_SUPPORT) + Geometry** geometries_device; //!< list of all geometries on device + char* geometry_data_device; //!< data buffer of all geometries on device + size_t num_geometries; + size_t geometry_data_byte_size; + + // host buffers used for creating representation of scene/geometry for device + // will be freed after scene commit if the scene is static, otherwise the + // buffers will stay for quicker rebuild. + size_t *offsets; + Geometry **geometries_host; + char *geometry_data_host; +#endif public: diff --git a/thirdparty/embree/kernels/common/scene_curves.h b/thirdparty/embree/kernels/common/scene_curves.h index fd6ed81d7de..2b74b320b53 100644 --- a/thirdparty/embree/kernels/common/scene_curves.h +++ b/thirdparty/embree/kernels/common/scene_curves.h @@ -31,13 +31,15 @@ namespace embree void setNumTimeSteps (unsigned int numTimeSteps); void setVertexAttributeCount (unsigned int N); void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref& buffer, size_t offset, size_t stride, unsigned int num); - void* getBuffer(RTCBufferType type, unsigned int slot); + void* getBufferData(RTCBufferType type, unsigned int slot, BufferDataPointerType pointerType); void updateBuffer(RTCBufferType type, unsigned int slot); void commit(); bool verify(); void setTessellationRate(float N); void setMaxRadiusScale(float s); void addElementsToCount (GeometryCounts & counts) const; + size_t getGeometryDataDeviceByteSize() const; + void convertToDeviceRepresentation(size_t offset, char* data_host, char* data_device) const; public: diff --git a/thirdparty/embree/kernels/common/scene_grid_mesh.h b/thirdparty/embree/kernels/common/scene_grid_mesh.h index eb2048b2865..c4fbab8930a 100644 --- a/thirdparty/embree/kernels/common/scene_grid_mesh.h +++ b/thirdparty/embree/kernels/common/scene_grid_mesh.h @@ -50,12 +50,19 @@ namespace embree void setNumTimeSteps (unsigned int numTimeSteps); void setVertexAttributeCount (unsigned int N); void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref& buffer, size_t offset, size_t stride, unsigned int num); - void* getBuffer(RTCBufferType type, unsigned int slot); + void* getBufferData(RTCBufferType type, unsigned int slot, BufferDataPointerType pointerType); void updateBuffer(RTCBufferType type, unsigned int slot); void commit(); bool verify(); void interpolate(const RTCInterpolateArguments* const args); +#if defined(EMBREE_SYCL_SUPPORT) + + size_t getGeometryDataDeviceByteSize() const; + void convertToDeviceRepresentation(size_t offset, char* data_host, char* data_device) const; + +#endif + template void interpolate_impl(const RTCInterpolateArguments* const args) { diff --git a/thirdparty/embree/kernels/common/scene_instance.h b/thirdparty/embree/kernels/common/scene_instance.h index 1176018777a..7d3633d7bd3 100644 --- a/thirdparty/embree/kernels/common/scene_instance.h +++ b/thirdparty/embree/kernels/common/scene_instance.h @@ -13,7 +13,6 @@ namespace embree /*! Instanced acceleration structure */ struct Instance : public Geometry { - //ALIGNED_STRUCT_(16); static const Geometry::GTypeMask geom_type = Geometry::MTY_INSTANCE; public: @@ -55,6 +54,8 @@ namespace embree virtual void build() {} virtual void addElementsToCount (GeometryCounts & counts) const override; virtual void commit() override; + virtual size_t getGeometryDataDeviceByteSize() const override; + virtual void convertToDeviceRepresentation(size_t offset, char* data_host, char* data_device) const override; public: diff --git a/thirdparty/embree/kernels/common/scene_instance_array.h b/thirdparty/embree/kernels/common/scene_instance_array.h index 3cf4d68feb6..f3caa06e87a 100644 --- a/thirdparty/embree/kernels/common/scene_instance_array.h +++ b/thirdparty/embree/kernels/common/scene_instance_array.h @@ -13,7 +13,6 @@ namespace embree /*! Instanced acceleration structure */ struct InstanceArray : public Geometry { - //ALIGNED_STRUCT_(16); static const Geometry::GTypeMask geom_type = Geometry::MTY_INSTANCE_ARRAY; public: @@ -48,7 +47,7 @@ namespace embree public: virtual void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref& buffer, size_t offset, size_t stride, unsigned int num) override; - virtual void* getBuffer(RTCBufferType type, unsigned int slot) override; + virtual void* getBufferData(RTCBufferType type, unsigned int slot, BufferDataPointerType pointerType) override; virtual void updateBuffer(RTCBufferType type, unsigned int slot) override; virtual void setNumTimeSteps (unsigned int numTimeSteps) override; @@ -59,6 +58,8 @@ namespace embree virtual void build() {} virtual void addElementsToCount (GeometryCounts & counts) const override; virtual void commit() override; + size_t getGeometryDataDeviceByteSize() const override; + void convertToDeviceRepresentation(size_t offset, char* data_host, char* data_device) const override; public: diff --git a/thirdparty/embree/kernels/common/scene_line_segments.h b/thirdparty/embree/kernels/common/scene_line_segments.h index e58fd1b7eb4..065d68c2c43 100644 --- a/thirdparty/embree/kernels/common/scene_line_segments.h +++ b/thirdparty/embree/kernels/common/scene_line_segments.h @@ -25,7 +25,7 @@ namespace embree void setNumTimeSteps (unsigned int numTimeSteps); void setVertexAttributeCount (unsigned int N); void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref& buffer, size_t offset, size_t stride, unsigned int num); - void* getBuffer(RTCBufferType type, unsigned int slot); + void* getBufferData(RTCBufferType type, unsigned int slot, BufferDataPointerType pointerType); void updateBuffer(RTCBufferType type, unsigned int slot); void commit(); bool verify (); @@ -33,6 +33,8 @@ namespace embree void setTessellationRate(float N); void setMaxRadiusScale(float s); void addElementsToCount (GeometryCounts & counts) const; + size_t getGeometryDataDeviceByteSize() const; + void convertToDeviceRepresentation(size_t offset, char* data_host, char* data_device) const; template void interpolate_impl(const RTCInterpolateArguments* const args) diff --git a/thirdparty/embree/kernels/common/scene_points.h b/thirdparty/embree/kernels/common/scene_points.h index 937a8f1806f..ea24277bc97 100644 --- a/thirdparty/embree/kernels/common/scene_points.h +++ b/thirdparty/embree/kernels/common/scene_points.h @@ -30,12 +30,14 @@ namespace embree size_t offset, size_t stride, unsigned int num); - void* getBuffer(RTCBufferType type, unsigned int slot); + void* getBufferData(RTCBufferType type, unsigned int slot, BufferDataPointerType pointerType); void updateBuffer(RTCBufferType type, unsigned int slot); void commit(); bool verify(); void setMaxRadiusScale(float s); void addElementsToCount (GeometryCounts & counts) const; + size_t getGeometryDataDeviceByteSize() const; + void convertToDeviceRepresentation(size_t offset, char* data_host, char* data_device) const; public: /*! returns the number of vertices */ diff --git a/thirdparty/embree/kernels/common/scene_quad_mesh.h b/thirdparty/embree/kernels/common/scene_quad_mesh.h index 09a8b8ddd95..646b08c1abd 100644 --- a/thirdparty/embree/kernels/common/scene_quad_mesh.h +++ b/thirdparty/embree/kernels/common/scene_quad_mesh.h @@ -42,12 +42,14 @@ namespace embree void setNumTimeSteps (unsigned int numTimeSteps); void setVertexAttributeCount (unsigned int N); void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref& buffer, size_t offset, size_t stride, unsigned int num); - void* getBuffer(RTCBufferType type, unsigned int slot); + void* getBufferData(RTCBufferType type, unsigned int slot, BufferDataPointerType pointerType); void updateBuffer(RTCBufferType type, unsigned int slot); void commit(); bool verify(); void interpolate(const RTCInterpolateArguments* const args); void addElementsToCount (GeometryCounts & counts) const; + size_t getGeometryDataDeviceByteSize() const; + void convertToDeviceRepresentation(size_t offset, char* data_host, char* data_device) const; template void interpolate_impl(const RTCInterpolateArguments* const args) diff --git a/thirdparty/embree/kernels/common/scene_subdiv_mesh.h b/thirdparty/embree/kernels/common/scene_subdiv_mesh.h index b213a9b7baf..49bb8e769a6 100644 --- a/thirdparty/embree/kernels/common/scene_subdiv_mesh.h +++ b/thirdparty/embree/kernels/common/scene_subdiv_mesh.h @@ -61,7 +61,7 @@ namespace embree void setVertexAttributeCount (unsigned int N); void setTopologyCount (unsigned int N); void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref& buffer, size_t offset, size_t stride, unsigned int num); - void* getBuffer(RTCBufferType type, unsigned int slot); + void* getBufferData(RTCBufferType type, unsigned int slot, BufferDataPointerType pointerType); void updateBuffer(RTCBufferType type, unsigned int slot); void setTessellationRate(float N); bool verify(); diff --git a/thirdparty/embree/kernels/common/scene_triangle_mesh.cpp b/thirdparty/embree/kernels/common/scene_triangle_mesh.cpp index 6cdd542a65d..8b6e7dce054 100644 --- a/thirdparty/embree/kernels/common/scene_triangle_mesh.cpp +++ b/thirdparty/embree/kernels/common/scene_triangle_mesh.cpp @@ -35,7 +35,7 @@ namespace embree void TriangleMesh::setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref& buffer, size_t offset, size_t stride, unsigned int num) { /* verify that all accesses are 4 bytes aligned */ - if (((size_t(buffer->getPtr()) + offset) & 0x3) || (stride & 0x3)) + if (((size_t(buffer->getHostPtr()) + offset) & 0x3) || (stride & 0x3)) throw_RTCError(RTC_ERROR_INVALID_OPERATION, "data must be 4 bytes aligned"); if (type == RTC_BUFFER_TYPE_VERTEX) @@ -79,25 +79,25 @@ namespace embree throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown buffer type"); } - void* TriangleMesh::getBuffer(RTCBufferType type, unsigned int slot) + void* TriangleMesh::getBufferData(RTCBufferType type, unsigned int slot, BufferDataPointerType pointerType) { if (type == RTC_BUFFER_TYPE_INDEX) { if (slot != 0) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer slot"); - return triangles.getPtr(); + return triangles.getPtr(pointerType); } else if (type == RTC_BUFFER_TYPE_VERTEX) { if (slot >= vertices.size()) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer slot"); - return vertices[slot].getPtr(); + return vertices[slot].getPtr(pointerType); } else if (type == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE) { if (slot >= vertexAttribs.size()) throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer slot"); - return vertexAttribs[slot].getPtr(); + return vertexAttribs[slot].getPtr(pointerType); } else { @@ -137,10 +137,12 @@ namespace embree void TriangleMesh::commit() { /* verify that stride of all time steps are identical */ - for (unsigned int t=0; tcommitIfNeeded(); + } + if (triangles) triangles.buffer->commitIfNeeded(); Geometry::commit(); } @@ -182,7 +184,29 @@ namespace embree void TriangleMesh::interpolate(const RTCInterpolateArguments* const args) { interpolate_impl<4>(args); } - + + size_t TriangleMesh::getGeometryDataDeviceByteSize() const { + size_t byte_size = sizeof(TriangleMesh); + byte_size += numTimeSteps * sizeof(BufferView); + return 16 * ((byte_size + 15) / 16); + } + + void TriangleMesh::convertToDeviceRepresentation(size_t offset, char* data_host, char* data_device) const { + TriangleMesh* mesh = (TriangleMesh*)(data_host + offset); + std::memcpy(data_host + offset, (void*)this, sizeof(TriangleMesh)); + offset += sizeof(TriangleMesh); + + // store offset for overriding vertices pointer with device pointer after copying + const size_t offsetVertices = offset; + // copy vertices BufferViews for each time step + for (size_t t = 0; t < numTimeSteps; ++t) { + std::memcpy(data_host + offset, &(vertices[t]), sizeof(BufferView)); + offset += sizeof(BufferView); + } + // override vertices pointer with device ptr + mesh->vertices.setDataPtr((BufferView*)(data_device + offsetVertices)); + } + #endif namespace isa diff --git a/thirdparty/embree/kernels/common/scene_triangle_mesh.h b/thirdparty/embree/kernels/common/scene_triangle_mesh.h index 0d28219b966..3f014d85a25 100644 --- a/thirdparty/embree/kernels/common/scene_triangle_mesh.h +++ b/thirdparty/embree/kernels/common/scene_triangle_mesh.h @@ -32,16 +32,18 @@ namespace embree /* geometry interface */ public: - void setMask(unsigned mask); - void setNumTimeSteps (unsigned int numTimeSteps); - void setVertexAttributeCount (unsigned int N); - void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref& buffer, size_t offset, size_t stride, unsigned int num); - void* getBuffer(RTCBufferType type, unsigned int slot); - void updateBuffer(RTCBufferType type, unsigned int slot); - void commit(); - bool verify(); - void interpolate(const RTCInterpolateArguments* const args); - void addElementsToCount (GeometryCounts & counts) const; + virtual void setMask(unsigned mask) override; + virtual void setNumTimeSteps (unsigned int numTimeSteps) override; + virtual void setVertexAttributeCount (unsigned int N) override; + virtual void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref& buffer, size_t offset, size_t stride, unsigned int num) override; + virtual void* getBufferData(RTCBufferType type, unsigned int slot, BufferDataPointerType pointerType) override; + virtual void updateBuffer(RTCBufferType type, unsigned int slot) override; + virtual void commit() override; + virtual bool verify() override; + virtual void interpolate(const RTCInterpolateArguments* const args) override; + virtual void addElementsToCount (GeometryCounts & counts) const override; + virtual size_t getGeometryDataDeviceByteSize() const override; + virtual void convertToDeviceRepresentation(size_t offset, char* data_host, char* data_device) const override; template void interpolate_impl(const RTCInterpolateArguments* const args) @@ -98,12 +100,12 @@ namespace embree } public: - + /*! returns number of vertices */ __forceinline size_t numVertices() const { return vertices[0].size(); } - + /*! returns i'th triangle*/ __forceinline const Triangle& triangle(size_t i) const { return triangles[i]; @@ -246,7 +248,7 @@ namespace embree } /*! get fast access to first vertex buffer */ - __forceinline float * getCompactVertexArray () const { + __forceinline float * getCompactVertexArray () const override { return (float*) vertices0.getPtr(); } @@ -283,6 +285,7 @@ namespace embree TriangleMeshISA (Device* device) : TriangleMesh(device) {} +#if !defined(__SYCL_DEVICE_ONLY__) LBBox3fa vlinearBounds(size_t primID, const BBox1f& time_range) const { return linearBounds(primID,time_range); } @@ -344,6 +347,7 @@ namespace embree } return pinfo; } +#endif }; } diff --git a/thirdparty/embree/kernels/common/scene_user_geometry.h b/thirdparty/embree/kernels/common/scene_user_geometry.h index 033476f658d..b1376cdf7f3 100644 --- a/thirdparty/embree/kernels/common/scene_user_geometry.h +++ b/thirdparty/embree/kernels/common/scene_user_geometry.h @@ -15,12 +15,14 @@ namespace embree public: UserGeometry (Device* device, unsigned int items = 0, unsigned int numTimeSteps = 1); - virtual void setMask (unsigned mask); - virtual void setBoundsFunction (RTCBoundsFunction bounds, void* userPtr); - virtual void setIntersectFunctionN (RTCIntersectFunctionN intersect); - virtual void setOccludedFunctionN (RTCOccludedFunctionN occluded); - virtual void build() {} - virtual void addElementsToCount (GeometryCounts & counts) const; + virtual void setMask (unsigned mask) override; + virtual void setBoundsFunction (RTCBoundsFunction bounds, void* userPtr) override; + virtual void setIntersectFunctionN (RTCIntersectFunctionN intersect) override; + virtual void setOccludedFunctionN (RTCOccludedFunctionN occluded) override; + virtual void build() override {} + virtual void addElementsToCount (GeometryCounts & counts) const override; + virtual size_t getGeometryDataDeviceByteSize() const override; + virtual void convertToDeviceRepresentation(size_t offset, char* data_host, char* data_device) const override; __forceinline float projectedPrimitiveArea(const size_t i) const { return 0.0f; } }; diff --git a/thirdparty/embree/kernels/common/state.cpp b/thirdparty/embree/kernels/common/state.cpp index 8e83c95bd7e..509dbc7120f 100644 --- a/thirdparty/embree/kernels/common/state.cpp +++ b/thirdparty/embree/kernels/common/state.cpp @@ -16,19 +16,22 @@ namespace embree State::ErrorHandler::~ErrorHandler() { Lock lock(errors_mutex); - for (size_t i=0; i lock(errors_mutex); - stored_error = new RTCError(RTC_ERROR_NONE); + stored_error = new RTCErrorMessage(RTC_ERROR_NONE, ""); thread_errors.push_back(stored_error); setTls(thread_error,stored_error); return stored_error; @@ -84,6 +87,8 @@ namespace embree max_spatial_split_replications = 1.2f; useSpatialPreSplits = false; + max_triangles_per_leaf = inf; + tessellation_cache_size = 128*1024*1024; subdiv_accel = "default"; @@ -428,6 +433,9 @@ namespace embree else if (tok == Token::Id("max_spatial_split_replications") && cin->trySymbol("=")) max_spatial_split_replications = cin->get().Float(); + else if (tok == Token::Id("max_triangles_per_leaf") && cin->trySymbol("=")) + max_triangles_per_leaf = cin->get().Float(); + else if (tok == Token::Id("presplits") && cin->trySymbol("=")) useSpatialPreSplits = cin->get().Int() != 0 ? true : false; diff --git a/thirdparty/embree/kernels/common/state.h b/thirdparty/embree/kernels/common/state.h index 8c346141858..2617a783c28 100644 --- a/thirdparty/embree/kernels/common/state.h +++ b/thirdparty/embree/kernels/common/state.h @@ -9,6 +9,14 @@ namespace embree { /* mutex to make printing to cout thread safe */ extern MutexSys g_printMutex; + struct RTCErrorMessage + { + RTCErrorMessage(RTCError error, std::string const& msg) + : error(error), msg(msg) {} + + RTCError error; + std::string msg; + }; struct State : public RefCount { @@ -109,6 +117,7 @@ namespace embree float max_spatial_split_replications; //!< maximally replications*N many primitives in accel for spatial splits bool useSpatialPreSplits; //!< use spatial pre-splits instead of the full spatial split builder size_t tessellation_cache_size; //!< size of the shared tessellation cache + size_t max_triangles_per_leaf; public: size_t instancing_open_min; //!< instancing opens tree to minimally that number of subtrees @@ -163,11 +172,11 @@ namespace embree public: ErrorHandler(); ~ErrorHandler(); - RTCError* error(); + RTCErrorMessage* error(); public: tls_t thread_error; - std::vector thread_errors; + std::vector thread_errors; MutexSys errors_mutex; }; ErrorHandler errorHandler; diff --git a/thirdparty/embree/kernels/hash.h b/thirdparty/embree/kernels/hash.h index ec9759ee352..51abaa1eeb3 100644 --- a/thirdparty/embree/kernels/hash.h +++ b/thirdparty/embree/kernels/hash.h @@ -1,4 +1,4 @@ // Copyright 2009-2021 Intel Corporation // SPDX-License-Identifier: Apache-2.0 -#define RTC_HASH "daa8de0e714e18ad5e5c9841b67c1950d9c91c51" +#define RTC_HASH "ff9381774dc99fea81a932ad276677aad6a3d4dd" diff --git a/thirdparty/embree/patches/0001-disable-exceptions.patch b/thirdparty/embree/patches/0001-disable-exceptions.patch index df6ad86da8e..b2285f33679 100644 --- a/thirdparty/embree/patches/0001-disable-exceptions.patch +++ b/thirdparty/embree/patches/0001-disable-exceptions.patch @@ -23,216 +23,131 @@ index b52b1e2e13..fbff38f660 100644 #endif #else // TASKING_PPL diff --git a/thirdparty/embree/common/lexers/stringstream.cpp b/thirdparty/embree/common/lexers/stringstream.cpp -index 42ffb10176..fa4266d0b9 100644 +index 42ffb10176..9779fc74c7 100644 --- a/thirdparty/embree/common/lexers/stringstream.cpp +++ b/thirdparty/embree/common/lexers/stringstream.cpp -@@ -39,7 +39,10 @@ namespace embree +@@ -39,7 +39,7 @@ namespace embree std::vector str; str.reserve(64); while (cin->peek() != EOF && !isSeparator(cin->peek())) { int c = cin->get(); - if (!isValidChar(c)) throw std::runtime_error("invalid character "+std::string(1,c)+" in input"); -+ //if (!isValidChar(c)) throw std::runtime_error("invalid character "+std::string(1,c)+" in input"); -+ if (!isValidChar(c)) { -+ abort(); -+ } ++ if (!isValidChar(c)) abort(); //throw std::runtime_error("invalid character "+std::string(1,c)+" in input"); str.push_back((char)c); } str.push_back(0); diff --git a/thirdparty/embree/common/sys/alloc.cpp b/thirdparty/embree/common/sys/alloc.cpp -index de225fafc6..8e83646031 100644 +index c92bb99ece..2288df76ef 100644 --- a/thirdparty/embree/common/sys/alloc.cpp +++ b/thirdparty/embree/common/sys/alloc.cpp -@@ -24,16 +24,28 @@ namespace embree - - void enableUSMAllocEmbree(sycl::context* context, sycl::device* device) - { -- if (tls_context_embree != nullptr) throw std::runtime_error("USM allocation already enabled"); -- if (tls_device_embree != nullptr) throw std::runtime_error("USM allocation already enabled"); -+ //if (tls_context_embree != nullptr) throw std::runtime_error("USM allocation already enabled"); -+ //if (tls_device_embree != nullptr) throw std::runtime_error("USM allocation already enabled"); -+ if (tls_context_embree != nullptr) { -+ abort(); -+ } -+ if (tls_device_embree != nullptr) { -+ abort(); -+ } - tls_context_embree = context; - tls_device_embree = device; - } - - void disableUSMAllocEmbree() - { -- if (tls_context_embree == nullptr) throw std::runtime_error("USM allocation not enabled"); -- if (tls_device_embree == nullptr) throw std::runtime_error("USM allocation not enabled"); -+ //if (tls_context_embree == nullptr) throw std::runtime_error("USM allocation not enabled"); -+ //if (tls_device_embree == nullptr) throw std::runtime_error("USM allocation not enabled"); -+ if (tls_context_embree == nullptr) { -+ abort(); -+ } -+ if (tls_device_embree == nullptr) { -+ abort(); -+ } - tls_context_embree = nullptr; - tls_device_embree = nullptr; - } -@@ -48,8 +60,14 @@ namespace embree - - void disableUSMAllocTutorial() - { -- if (tls_context_tutorial == nullptr) throw std::runtime_error("USM allocation not enabled"); -- if (tls_device_tutorial == nullptr) throw std::runtime_error("USM allocation not enabled"); -+ //if (tls_context_tutorial == nullptr) throw std::runtime_error("USM allocation not enabled"); -+ //if (tls_device_tutorial == nullptr) throw std::runtime_error("USM allocation not enabled"); -+ if (tls_context_tutorial == nullptr) { -+ abort(); -+ } -+ if (tls_device_tutorial == nullptr) { -+ abort(); -+ } - - tls_context_tutorial = nullptr; - tls_device_tutorial = nullptr; -@@ -64,8 +82,11 @@ namespace embree - +@@ -20,7 +20,7 @@ namespace embree assert((align & (align-1)) == 0); void* ptr = _mm_malloc(size,align); -- if (size != 0 && ptr == nullptr) + if (size != 0 && ptr == nullptr) - throw std::bad_alloc(); -+ //if (size != 0 && ptr == nullptr) -+ // throw std::bad_alloc(); -+ if (size != 0 && ptr == nullptr) { -+ abort(); -+ } ++ abort(); //throw std::bad_alloc(); return ptr; } -@@ -94,8 +115,11 @@ namespace embree - else +@@ -50,7 +50,7 @@ namespace embree ptr = sycl::aligned_alloc_shared(align,size,*device,*context); - -- if (size != 0 && ptr == nullptr) + + if (size != 0 && ptr == nullptr) - throw std::bad_alloc(); -+ //if (size != 0 && ptr == nullptr) -+ // throw std::bad_alloc(); -+ if (size != 0 && ptr == nullptr) { -+ abort(); -+ } ++ abort(); //throw std::bad_alloc(); return ptr; } -@@ -241,7 +265,10 @@ namespace embree +@@ -83,7 +83,7 @@ namespace embree + } + + if (size != 0 && ptr == nullptr) +- throw std::bad_alloc(); ++ abort(); //throw std::bad_alloc(); + + return ptr; + } +@@ -199,7 +199,7 @@ namespace embree /* fall back to 4k pages */ int flags = MEM_COMMIT | MEM_RESERVE; char* ptr = (char*) VirtualAlloc(nullptr,bytes,flags,PAGE_READWRITE); - if (ptr == nullptr) throw std::bad_alloc(); -+ //if (ptr == nullptr) throw std::bad_alloc(); -+ if (ptr == nullptr) { -+ abort(); -+ } ++ if (ptr == nullptr) abort(); //throw std::bad_alloc(); hugepages = false; return ptr; } -@@ -257,8 +284,11 @@ namespace embree - if (bytesNew >= bytesOld) +@@ -216,7 +216,7 @@ namespace embree return bytesOld; -- if (!VirtualFree((char*)ptr+bytesNew,bytesOld-bytesNew,MEM_DECOMMIT)) + if (!VirtualFree((char*)ptr+bytesNew,bytesOld-bytesNew,MEM_DECOMMIT)) - throw std::bad_alloc(); -+ //if (!VirtualFree((char*)ptr+bytesNew,bytesOld-bytesNew,MEM_DECOMMIT)) -+ // throw std::bad_alloc(); -+ if (!VirtualFree((char*)ptr+bytesNew,bytesOld-bytesNew,MEM_DECOMMIT)) { -+ abort(); -+ } ++ abort(); //throw std::bad_alloc(); return bytesNew; } -@@ -268,8 +298,11 @@ namespace embree - if (bytes == 0) +@@ -227,7 +227,7 @@ namespace embree return; -- if (!VirtualFree(ptr,0,MEM_RELEASE)) + if (!VirtualFree(ptr,0,MEM_RELEASE)) - throw std::bad_alloc(); -+ //if (!VirtualFree(ptr,0,MEM_RELEASE)) -+ // throw std::bad_alloc(); -+ if (!VirtualFree(ptr,0,MEM_RELEASE)) { -+ abort(); -+ } ++ abort(); //throw std::bad_alloc(); } void os_advise(void *ptr, size_t bytes) -@@ -373,7 +406,10 @@ namespace embree +@@ -331,7 +331,7 @@ namespace embree /* fallback to 4k pages */ void* ptr = (char*) mmap(0, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); - if (ptr == MAP_FAILED) throw std::bad_alloc(); -+ //if (ptr == MAP_FAILED) throw std::bad_alloc(); -+ if (ptr == MAP_FAILED) { -+ abort(); -+ } ++ if (ptr == MAP_FAILED) abort(); //throw std::bad_alloc(); hugepages = false; /* advise huge page hint for THP */ -@@ -389,8 +425,11 @@ namespace embree - if (bytesNew >= bytesOld) +@@ -348,7 +348,7 @@ namespace embree return bytesOld; -- if (munmap((char*)ptr+bytesNew,bytesOld-bytesNew) == -1) + if (munmap((char*)ptr+bytesNew,bytesOld-bytesNew) == -1) - throw std::bad_alloc(); -+ //if (munmap((char*)ptr+bytesNew,bytesOld-bytesNew) == -1) -+ // throw std::bad_alloc(); -+ if (munmap((char*)ptr+bytesNew,bytesOld-bytesNew) == -1) { -+ abort(); -+ } ++ abort(); //throw std::bad_alloc(); return bytesNew; } -@@ -403,8 +442,11 @@ namespace embree - /* for hugepages we need to also align the size */ +@@ -362,7 +362,7 @@ namespace embree const size_t pageSize = hugepages ? PAGE_SIZE_2M : PAGE_SIZE_4K; bytes = (bytes+pageSize-1) & ~(pageSize-1); -- if (munmap(ptr,bytes) == -1) + if (munmap(ptr,bytes) == -1) - throw std::bad_alloc(); -+ //if (munmap(ptr,bytes) == -1) -+ // throw std::bad_alloc(); -+ if (munmap(ptr,bytes) == -1) { -+ abort(); -+ } ++ abort(); //throw std::bad_alloc(); } /* hint for transparent huge pages (THP) */ diff --git a/thirdparty/embree/common/sys/alloc.h b/thirdparty/embree/common/sys/alloc.h -index e19c2c221a..e2c942049a 100644 +index 5c63d0bfaf..1f6f230ed3 100644 --- a/thirdparty/embree/common/sys/alloc.h +++ b/thirdparty/embree/common/sys/alloc.h -@@ -160,7 +160,8 @@ namespace embree +@@ -131,7 +131,7 @@ namespace embree typedef std::ptrdiff_t difference_type; __forceinline pointer allocate( size_type n ) { - throw std::runtime_error("no allocation supported"); -+ //throw std::runtime_error("no allocation supported"); -+ abort(); ++ abort(); //throw std::runtime_error("no allocation supported"); } __forceinline void deallocate( pointer p, size_type n ) { diff --git a/thirdparty/embree/common/sys/platform.h b/thirdparty/embree/common/sys/platform.h -index 6dc0cf3318..9f08cd1516 100644 +index 6dc0cf3318..1e5b02550e 100644 --- a/thirdparty/embree/common/sys/platform.h +++ b/thirdparty/embree/common/sys/platform.h -@@ -213,11 +213,15 @@ - #define UPRINT4(x,y,z,w) embree_cout_uniform << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << ", " << STRING(z) << " = " << (z) << ", " << STRING(w) << " = " << (w) << embree_endl +@@ -214,10 +214,11 @@ #if defined(DEBUG) // only report file and line in debug mode -+ //#define THROW_RUNTIME_ERROR(str) \ -+ // throw std::runtime_error(std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str)); #define THROW_RUNTIME_ERROR(str) \ - throw std::runtime_error(std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str)); + printf("%s (%d): %s", __FILE__, __LINE__, std::string(str).c_str()), abort(); ++ //throw std::runtime_error(std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str)); #else -+ //#define THROW_RUNTIME_ERROR(str) \ -+ // throw std::runtime_error(str); #define THROW_RUNTIME_ERROR(str) \ - throw std::runtime_error(str); -+ abort(); ++ abort(); //throw std::runtime_error(str); #endif #define FATAL(x) THROW_RUNTIME_ERROR(x) @@ -261,122 +176,81 @@ index 83ead95122..e89ae04f8b 100644 add_dependencies(-1); } diff --git a/thirdparty/embree/common/tasking/taskschedulerinternal.h b/thirdparty/embree/common/tasking/taskschedulerinternal.h -index b01bebf7c3..4a04323b80 100644 +index b01bebf7c3..d4e0c7386b 100644 --- a/thirdparty/embree/common/tasking/taskschedulerinternal.h +++ b/thirdparty/embree/common/tasking/taskschedulerinternal.h -@@ -130,8 +130,11 @@ namespace embree - __forceinline void* alloc(size_t bytes, size_t align = 64) +@@ -131,7 +131,7 @@ namespace embree { size_t ofs = bytes + ((align - stackPtr) & (align-1)); -- if (stackPtr + ofs > CLOSURE_STACK_SIZE) + if (stackPtr + ofs > CLOSURE_STACK_SIZE) - throw std::runtime_error("closure stack overflow"); -+ //if (stackPtr + ofs > CLOSURE_STACK_SIZE) -+ // throw std::runtime_error("closure stack overflow"); -+ if (stackPtr + ofs > CLOSURE_STACK_SIZE) { -+ abort(); -+ } ++ abort(); //throw std::runtime_error("closure stack overflow"); stackPtr += ofs; return &stack[stackPtr-bytes]; } -@@ -139,8 +142,11 @@ namespace embree - template +@@ -140,7 +140,7 @@ namespace embree __forceinline void push_right(Thread& thread, const size_t size, const Closure& closure, TaskGroupContext* context) { -- if (right >= TASK_STACK_SIZE) + if (right >= TASK_STACK_SIZE) - throw std::runtime_error("task stack overflow"); -+ //if (right >= TASK_STACK_SIZE) -+ // throw std::runtime_error("task stack overflow"); -+ if (right >= TASK_STACK_SIZE) { -+ abort(); -+ } ++ abort(); //throw std::runtime_error("task stack overflow"); /* allocate new task on right side of stack */ size_t oldStackPtr = stackPtr; diff --git a/thirdparty/embree/kernels/bvh/bvh_statistics.cpp b/thirdparty/embree/kernels/bvh/bvh_statistics.cpp -index 40f9043736..7ea9736c5c 100644 +index 40f9043736..f3b93e5925 100644 --- a/thirdparty/embree/kernels/bvh/bvh_statistics.cpp +++ b/thirdparty/embree/kernels/bvh/bvh_statistics.cpp -@@ -150,7 +150,8 @@ namespace embree +@@ -150,7 +150,7 @@ namespace embree } } else { - throw std::runtime_error("not supported node type in bvh_statistics"); -+ //throw std::runtime_error("not supported node type in bvh_statistics"); -+ abort(); ++ abort(); //throw std::runtime_error("not supported node type in bvh_statistics"); } return s; } diff --git a/thirdparty/embree/kernels/common/alloc.h b/thirdparty/embree/kernels/common/alloc.h -index 2bd292de4d..8ac22e53ec 100644 +index 2bd292de4d..10f629a244 100644 --- a/thirdparty/embree/kernels/common/alloc.h +++ b/thirdparty/embree/kernels/common/alloc.h -@@ -189,8 +189,11 @@ namespace embree - , atype(osAllocation ? EMBREE_OS_MALLOC : ALIGNED_MALLOC) +@@ -190,7 +190,7 @@ namespace embree , primrefarray(device,0) { -- if (osAllocation && useUSM) + if (osAllocation && useUSM) - throw std::runtime_error("USM allocation cannot be combined with OS allocation."); -+ //if (osAllocation && useUSM) -+ // throw std::runtime_error("USM allocation cannot be combined with OS allocation."); -+ if (osAllocation && useUSM) { -+ abort(); -+ } ++ abort(); //throw std::runtime_error("USM allocation cannot be combined with OS allocation."); for (size_t i=0; imalloc(device,bytes,align,partial); -- if (ptr == nullptr && !blockAllocation) + if (ptr == nullptr && !blockAllocation) - throw std::bad_alloc(); -+ //if (ptr == nullptr && !blockAllocation) -+ // throw std::bad_alloc(); -+ if (ptr == nullptr && !blockAllocation) { -+ abort(); -+ } ++ abort(); //throw std::bad_alloc(); if (ptr) return ptr; } diff --git a/thirdparty/embree/kernels/common/rtcore.cpp b/thirdparty/embree/kernels/common/rtcore.cpp -index 8dc5d7045b..e19c243bf6 100644 +index 8da5c9d86c..a34eb2a0c4 100644 --- a/thirdparty/embree/kernels/common/rtcore.cpp +++ b/thirdparty/embree/kernels/common/rtcore.cpp -@@ -257,10 +257,15 @@ RTC_NAMESPACE_BEGIN; - RTC_TRACE(rtcSetSceneBuildQuality); - RTC_VERIFY_HANDLE(hscene); - RTC_ENTER_DEVICE(hscene); -+ //if (quality != RTC_BUILD_QUALITY_LOW && -+ // quality != RTC_BUILD_QUALITY_MEDIUM && -+ // quality != RTC_BUILD_QUALITY_HIGH) -+ // throw std::runtime_error("invalid build quality"); +@@ -358,7 +358,7 @@ RTC_NAMESPACE_BEGIN; if (quality != RTC_BUILD_QUALITY_LOW && quality != RTC_BUILD_QUALITY_MEDIUM && -- quality != RTC_BUILD_QUALITY_HIGH) + quality != RTC_BUILD_QUALITY_HIGH) - throw std::runtime_error("invalid build quality"); -+ quality != RTC_BUILD_QUALITY_HIGH) { -+ abort(); -+ } ++ abort(); //throw std::runtime_error("invalid build quality"); scene->setBuildQuality(quality); RTC_CATCH_END2(scene); } -@@ -1563,11 +1568,17 @@ RTC_API void rtcSetGeometryTransform(RTCGeometry hgeometry, unsigned int timeSte - RTC_TRACE(rtcSetGeometryBuildQuality); - RTC_VERIFY_HANDLE(hgeometry); - RTC_ENTER_DEVICE(hgeometry); -+ //if (quality != RTC_BUILD_QUALITY_LOW && -+ // quality != RTC_BUILD_QUALITY_MEDIUM && -+ // quality != RTC_BUILD_QUALITY_HIGH && -+ // quality != RTC_BUILD_QUALITY_REFIT) -+ // throw std::runtime_error("invalid build quality"); - if (quality != RTC_BUILD_QUALITY_LOW && +@@ -1831,7 +1831,7 @@ RTC_API void rtcSetGeometryTransform(RTCGeometry hgeometry, unsigned int timeSte quality != RTC_BUILD_QUALITY_MEDIUM && quality != RTC_BUILD_QUALITY_HIGH && -- quality != RTC_BUILD_QUALITY_REFIT) + quality != RTC_BUILD_QUALITY_REFIT) - throw std::runtime_error("invalid build quality"); -+ quality != RTC_BUILD_QUALITY_REFIT) { -+ abort(); -+ } ++ abort(); //throw std::runtime_error("invalid build quality"); geometry->setBuildQuality(quality); RTC_CATCH_END2(geometry); } @@ -421,10 +295,10 @@ index 73a061de11..cd7a6f4395 100644 #define RTC_BUILD_ARGUMENTS_HAS(settings,member) \ diff --git a/thirdparty/embree/kernels/common/scene.cpp b/thirdparty/embree/kernels/common/scene.cpp -index fda8dd938a..706cc512df 100644 +index 84a84f8c69..3bfcebd298 100644 --- a/thirdparty/embree/kernels/common/scene.cpp +++ b/thirdparty/embree/kernels/common/scene.cpp -@@ -894,16 +894,16 @@ namespace embree +@@ -962,16 +962,16 @@ namespace embree } /* initiate build */ @@ -450,10 +324,10 @@ index fda8dd938a..706cc512df 100644 #endif diff --git a/thirdparty/embree/kernels/common/state.cpp b/thirdparty/embree/kernels/common/state.cpp -index 4e3ab6ddfb..8e83c95bd7 100644 +index cce5eafce1..509dbc7120 100644 --- a/thirdparty/embree/kernels/common/state.cpp +++ b/thirdparty/embree/kernels/common/state.cpp -@@ -194,13 +194,13 @@ namespace embree +@@ -199,13 +199,13 @@ namespace embree bool State::parseFile(const FileName& fileName) { Ref > file; diff --git a/thirdparty/embree/patches/0002-godot-config.patch b/thirdparty/embree/patches/0002-godot-config.patch index 88e15c282ca..f93519566b1 100644 --- a/thirdparty/embree/patches/0002-godot-config.patch +++ b/thirdparty/embree/patches/0002-godot-config.patch @@ -1,5 +1,5 @@ diff --git a/thirdparty/embree/include/embree4/rtcore_config.h b/thirdparty/embree/include/embree4/rtcore_config.h -index 8abd6954c3..cb3a8678a7 100644 +index eba966989e..91cf129dc6 100644 --- a/thirdparty/embree/include/embree4/rtcore_config.h +++ b/thirdparty/embree/include/embree4/rtcore_config.h @@ -4,7 +4,7 @@ @@ -12,8 +12,8 @@ index 8abd6954c3..cb3a8678a7 100644 #define RTC_VERSION_MAJOR 4 @@ -13,28 +13,28 @@ - #define RTC_VERSION 40301 - #define RTC_VERSION_STRING "4.3.1" + #define RTC_VERSION 40400 + #define RTC_VERSION_STRING "4.4.0" -#define RTC_MAX_INSTANCE_LEVEL_COUNT @EMBREE_MAX_INSTANCE_LEVEL_COUNT@ +#define RTC_MAX_INSTANCE_LEVEL_COUNT 1 @@ -47,8 +47,8 @@ index 8abd6954c3..cb3a8678a7 100644 -# define RTC_NAMESPACE_USE using namespace @EMBREE_API_NAMESPACE@; +# define RTC_NAMESPACE_USE using namespace; # define RTC_API_EXTERN_C + # define RTC_API_EXTERN_CPP # undef EMBREE_API_NAMESPACE - #else diff --git a/thirdparty/embree/kernels/config.h b/thirdparty/embree/kernels/config.h index 1669c4af72..5979b543c9 100644 --- a/thirdparty/embree/kernels/config.h diff --git a/thirdparty/embree/patches/0003-emscripten-nthreads.patch b/thirdparty/embree/patches/0003-emscripten-nthreads.patch index c12ea6f973d..691e64ae8b2 100644 --- a/thirdparty/embree/patches/0003-emscripten-nthreads.patch +++ b/thirdparty/embree/patches/0003-emscripten-nthreads.patch @@ -1,8 +1,8 @@ diff --git a/thirdparty/embree/common/sys/sysinfo.cpp b/thirdparty/embree/common/sys/sysinfo.cpp -index 61256b34f2..4583e49b1c 100644 +index 5f375cd95c..3c23fb1610 100644 --- a/thirdparty/embree/common/sys/sysinfo.cpp +++ b/thirdparty/embree/common/sys/sysinfo.cpp -@@ -646,6 +646,10 @@ namespace embree +@@ -659,6 +659,10 @@ namespace embree #if defined(__EMSCRIPTEN__) #include @@ -13,7 +13,7 @@ index 61256b34f2..4583e49b1c 100644 #endif namespace embree -@@ -659,6 +663,8 @@ namespace embree +@@ -672,6 +676,8 @@ namespace embree nThreads = sysconf(_SC_NPROCESSORS_ONLN); // does not work in Linux LXC container assert(nThreads); #elif defined(__EMSCRIPTEN__) @@ -22,7 +22,7 @@ index 61256b34f2..4583e49b1c 100644 // WebAssembly supports pthreads, but not pthread_getaffinity_np. Get the number of logical // threads from the browser or Node.js using JavaScript. nThreads = MAIN_THREAD_EM_ASM_INT({ -@@ -674,6 +680,7 @@ namespace embree +@@ -687,6 +693,7 @@ namespace embree return 1; } }); diff --git a/thirdparty/embree/patches/0006-include-order-dllexport.patch b/thirdparty/embree/patches/0006-include-order-dllexport.patch deleted file mode 100644 index ece57a8daf9..00000000000 --- a/thirdparty/embree/patches/0006-include-order-dllexport.patch +++ /dev/null @@ -1,20 +0,0 @@ -diff --git a/thirdparty/embree/common/tasking/taskschedulerinternal.h b/thirdparty/embree/common/tasking/taskschedulerinternal.h -index e72d3b72ba..8e3befb739 100644 ---- a/thirdparty/embree/common/tasking/taskschedulerinternal.h -+++ b/thirdparty/embree/common/tasking/taskschedulerinternal.h -@@ -3,6 +3,7 @@ - - #pragma once - -+#include "../../include/embree4/rtcore.h" - #include "../sys/platform.h" - #include "../sys/alloc.h" - #include "../sys/barrier.h" -@@ -12,7 +13,6 @@ - #include "../sys/ref.h" - #include "../sys/atomic.h" - #include "../math/range.h" --#include "../../include/embree4/rtcore.h" - - #include -