--- origsrc/pocl-3.1/CMakeLists.txt 2022-12-05 21:36:08.000000000 +0900 +++ src/pocl-3.1/CMakeLists.txt 2024-01-04 03:05:15.035161600 +0900 @@ -817,9 +817,9 @@ endif() # constant addrspace variables, and stack protector likely slows # down the kernels, so it needs to be determined whether it's worth # the trouble. -set(DEFAULT_KERNEL_CL_FLAGS "-xcl -fno-stack-protector -fPIC ${FLOATCONV_FLAG} ${OPAQUE_PTR_FLAGS}") -set(DEFAULT_KERNEL_C_FLAGS "-xc -std=c11 -D__CBUILD__ -fno-math-errno -fno-stack-protector -fPIC ${FLOATCONV_FLAG} ${OPAQUE_PTR_FLAGS}") -set(DEFAULT_KERNEL_CXX_FLAGS "-xc++ -std=c++11 -fno-stack-protector -fPIC ${FLOATCONV_FLAG} ${OPAQUE_PTR_FLAGS}") +set(DEFAULT_KERNEL_CL_FLAGS "-xcl -fno-stack-protector ${FLOATCONV_FLAG} ${OPAQUE_PTR_FLAGS}") +set(DEFAULT_KERNEL_C_FLAGS "-xc -std=c11 -D__CBUILD__ -fno-math-errno -fno-stack-protector ${FLOATCONV_FLAG} ${OPAQUE_PTR_FLAGS}") +set(DEFAULT_KERNEL_CXX_FLAGS "-xc++ -std=c++11 -fno-stack-protector ${FLOATCONV_FLAG} ${OPAQUE_PTR_FLAGS}") set(EXTRA_KERNEL_FLAGS "" CACHE STRING "Extra arguments to all kernel compilation commands (defaults to empty)") @@ -883,11 +883,12 @@ if(ENABLE_SPIR) endif() # required for the wrapper generator - if(CMAKE_VERSION VERSION_LESS 3.12.0) - find_program(Python3_EXECUTABLE NAMES "python3" REQUIRED) - else() - find_package(Python3 REQUIRED COMPONENTS Interpreter) - endif() + #if(CMAKE_VERSION VERSION_LESS 3.12.0) + # find_program(Python3_EXECUTABLE NAMES "python3" REQUIRED) + #else() + # find_package(Python3 REQUIRED COMPONENTS Interpreter) + #endif() + set(Python3_EXECUTABLE "/usr/bin/python3") endif() --- origsrc/pocl-3.1/cmake/LLVM.cmake 2022-12-05 21:36:08.000000000 +0900 +++ src/pocl-3.1/cmake/LLVM.cmake 2024-01-04 03:05:15.037160500 +0900 @@ -287,7 +287,7 @@ endforeach() macro(find_program_or_die OUTPUT_VAR PROG_NAME DOCSTRING) find_program(${OUTPUT_VAR} - NAMES "${PROG_NAME}${LLVM_BINARY_SUFFIX}${CMAKE_EXECUTABLE_SUFFIX}" "${PROG_NAME}${CMAKE_EXECUTABLE_SUFFIX}" + NAMES "${PROG_NAME}${LLVM_BINARY_SUFFIX}" "${PROG_NAME}" HINTS "${LLVM_BINDIR}" "${LLVM_CONFIG_LOCATION}" "${LLVM_PREFIX}" "${LLVM_PREFIX_BIN}" DOC "${DOCSTRING}" NO_CMAKE_PATH --- origsrc/pocl-3.1/lib/CL/CMakeLists.txt 2022-12-05 21:36:08.000000000 +0900 +++ src/pocl-3.1/lib/CL/CMakeLists.txt 2024-01-04 03:05:15.040159200 +0900 @@ -294,9 +294,10 @@ set_target_properties("${POCL_LIBRARY_NA target_link_libraries("${POCL_LIBRARY_NAME}" PRIVATE ${POCL_PRIVATE_LINK_LIST}) install(TARGETS "${POCL_LIBRARY_NAME}" - ARCHIVE DESTINATION ${POCL_INSTALL_PUBLIC_LIBDIR}/static + ARCHIVE DESTINATION ${POCL_INSTALL_PUBLIC_LIBDIR} COMPONENT "dev" LIBRARY DESTINATION ${POCL_INSTALL_PUBLIC_LIBDIR} + RUNTIME DESTINATION ${POCL_INSTALL_PUBLIC_BINDIR} COMPONENT "lib" ) --- origsrc/pocl-3.1/lib/CL/devices/cuda/CMakeLists.txt 2022-12-05 21:36:08.000000000 +0900 +++ src/pocl-3.1/lib/CL/devices/cuda/CMakeLists.txt 2024-01-04 03:05:15.042157700 +0900 @@ -23,16 +23,13 @@ # #============================================================================= -find_package(CUDA REQUIRED) -message(STATUS "CUDA_TOOLKIT_ROOT_DIR = ${CUDA_TOOLKIT_ROOT_DIR}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${LLVM_CXXFLAGS}") include_directories(${LLVM_INCLUDE_DIRS} ${CUDA_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR}/../../../llvmopencl) -add_pocl_device_library("pocl-devices-cuda" pocl-cuda.c pocl-cuda.h pocl-ptx-gen.cc pocl-ptx-gen.h) -target_compile_definitions("pocl-devices-cuda" PRIVATE "-DCUDA_TOOLKIT_ROOT_DIR=\"${CUDA_TOOLKIT_ROOT_DIR}\"") +add_pocl_device_library("pocl-devices-cuda" pocl-cuda.c pocl-cuda.h pocl-ptx-gen.cc pocl-ptx-gen.h cuda.h) +add_custom_command(TARGET pocl-devices-cuda PRE_LINK COMMAND dlltool -d ${CMAKE_CURRENT_SOURCE_DIR}/nvcuda.def -D nvcuda.dll -l libcuda.a) if(ENABLE_LOADABLE_DRIVERS) - target_link_libraries(pocl-devices-cuda PRIVATE cuda ${POCL_LLVM_LIBS} ${PTHREAD_LIBRARY}) + target_link_libraries(pocl-devices-cuda PRIVATE -L${CMAKE_CURRENT_BINARY_DIR} cuda ${POCL_LLVM_LIBS} ${PTHREAD_LIBRARY}) endif() if(ENABLE_CUDNN) --- origsrc/pocl-3.1/lib/CL/devices/cuda/cuda.h 1970-01-01 09:00:00.000000000 +0900 +++ src/pocl-3.1/lib/CL/devices/cuda/cuda.h 2024-01-04 03:05:15.046155400 +0900 @@ -0,0 +1,119 @@ +#ifndef _CUDA_H_ +#define _CUDA_H_ +typedef unsigned int CUdevice; +typedef void *CUcontext; +typedef void *CUevent; +typedef void *CUstream; +typedef void *CUmodule; +typedef void *CUfunction; +typedef unsigned int CUresult; +typedef unsigned int CUdevice_attirbute; +typedef unsigned long long CUdeviceptr; +typedef unsigned int CUmemorytype; +typedef void *CUarray; +typedef unsigned int CUjit_option; +typedef struct { + size_t srcXInBytes; + size_t srcY; + size_t srcZ; + size_t srcLOD; + CUmemorytype srcMemoryType; + const void *srcHost; + CUdeviceptr srcDevice; + CUarray srcArray; + void *reserved0; + size_t srcPitch; + size_t srcHeight; + size_t dstXInBytes; + size_t dstY; + size_t dstZ; + size_t dstLOD; + CUmemorytype dstMemoryType; + void *dstHost; + CUdeviceptr dstDevice; + CUarray dstArray; + void *reserved1; + size_t dstPitch; + size_t dstHeight; + size_t WidthInBytes; + size_t Height; + size_t Depth; +} CUDA_MEMCPY3D; +#define CUDA_SUCCESS 0 +#define CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED 712 +#define CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK 1 +#define CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X 2 +#define CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y 3 +#define CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z 4 +#define CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY 9 +#define CU_DEVICE_ATTRIBUTE_WARP_SIZE 10 +#define CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK 12 +#define CU_DEVICE_ATTRIBUTE_CLOCK_RATE 13 +#define CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT 14 +#define CU_DEVICE_ATTRIBUTE_GPU_OVERLAP 15 +#define CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT 16 +#define CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT 17 +#define CU_DEVICE_ATTRIBUTE_INTEGRATED 18 +#define CU_DEVICE_ATTRIBUTE_ECC_ENABLED 32 +#define CU_DEVICE_ATTRIBUTE_PCI_BUS_ID 33 +#define CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID 34 +#define CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT 40 +#define CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID 50 +#define CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR 75 +#define CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR 76 +#define CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK 97 +#define CU_CTX_MAP_HOST 8 +#define CU_EVENT_DEFAULT 0 +#define CU_EVENT_DISABLE_TIMING 2 +#define CU_STREAM_WAIT_VALUE_GEQ 0 +#define CU_STREAM_NON_BLOCKING 1 +#define CU_MEMHOSTREGISTER_DEVICEMAP 2 +#define CU_MEMORYTYPE_HOST 1 +#define CU_MEMORYTYPE_DEVICE 2 +#define CU_MEMHOSTALLOC_DEVICEMAP 2 +#define CU_JIT_ERROR_LOG_BUFFER 5 +#define CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES 6 +CUresult cuCtxCreate(CUcontext *, unsigned int, CUdevice); +CUresult cuCtxDestroy(CUcontext); +CUresult cuCtxSetCurrent(CUcontext); +CUresult cuDeviceGet(CUdevice *, int); +CUresult cuDeviceGetAttribute(int *, CUdevice_attirbute, CUdevice); +CUresult cuDeviceGetCount(int *); +CUresult cuDeviceGetName(char *, int, CUdevice); +CUresult cuEventCreate(CUevent *, unsigned int); +CUresult cuEventDestroy(CUevent); +CUresult cuEventElapsedTime(float *, CUevent, CUevent); +CUresult cuEventRecord(CUevent, CUstream); +CUresult cuEventSynchronize(CUevent); +CUresult cuGetErrorName(CUresult error, const char **); +CUresult cuGetErrorString(CUresult error, const char **); +CUresult cuInit(int); +CUresult cuLaunchKernel(CUfunction, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, CUstream, void **, void **); +CUresult cuMemAlloc(CUdeviceptr *, size_t); +CUresult cuMemcpy3DAsync(const CUDA_MEMCPY3D *, CUstream); +CUresult cuMemcpyDtoDAsync(CUdeviceptr, CUdeviceptr, size_t, CUstream); +CUresult cuMemcpyDtoH(void *, CUdeviceptr, size_t); +CUresult cuMemcpyDtoHAsync(void *, CUdeviceptr, size_t, CUstream); +CUresult cuMemcpyHtoD(CUdeviceptr, const void *, size_t); +CUresult cuMemcpyHtoD(CUdeviceptr, const void *, size_t); +CUresult cuMemcpyHtoDAsync(CUdeviceptr, const void *, unsigned int, CUstream); +CUresult cuMemFree(CUdeviceptr); +CUresult cuMemFreeHost(void *); +CUresult cuMemGetInfo(size_t *, size_t *); +CUresult cuMemHostAlloc(void **, size_t, unsigned int); +CUresult cuMemHostGetDevicePointer(CUdeviceptr *, void *, unsigned int); +CUresult cuMemHostRegister(void *, size_t, unsigned int); +CUresult cuMemHostUnregister(void *); +CUresult cuMemsetD16Async(CUdeviceptr, unsigned short, size_t, CUstream); +CUresult cuMemsetD32Async(CUdeviceptr, unsigned int, size_t, CUstream); +CUresult cuMemsetD8Async(CUdeviceptr, unsigned char, size_t, CUstream); +CUresult cuModuleGetFunction(CUfunction *, CUmodule, const char *); +CUresult cuModuleGetGlobal(CUdeviceptr *, size_t *, CUmodule, const char *); +CUresult cuModuleLoadData(CUmodule *, const void *); +CUresult cuModuleLoadDataEx(CUmodule *, const void *, unsigned int, CUjit_option *, void **); +CUresult cuStreamCreate(CUstream *, unsigned int); +CUresult cuStreamDestroy(CUstream); +CUresult cuStreamSynchronize(CUstream); +CUresult cuStreamWaitEvent(CUstream, CUevent, unsigned int); +CUresult cuStreamWaitValue32(CUstream, CUdeviceptr, uint32_t, unsigned int); +#endif /* _CUDA_H_ */ --- origsrc/pocl-3.1/lib/CL/devices/cuda/nvcuda.def 1970-01-01 09:00:00.000000000 +0900 +++ src/pocl-3.1/lib/CL/devices/cuda/nvcuda.def 2024-01-04 03:05:15.049153700 +0900 @@ -0,0 +1,657 @@ +; +; Definition file of nvcuda_loader.dll +; Automatic generated by gendef +; written by Kai Tietz 2008 +; +LIBRARY "nvcuda_loader.dll" +EXPORTS +cuArray3DCreate +cuArray3DCreate_v2 +cuArray3DGetDescriptor +cuArray3DGetDescriptor_v2 +cuArrayCreate +cuArrayCreate_v2 +cuArrayDestroy +cuArrayGetDescriptor +cuArrayGetDescriptor_v2 +cuArrayGetMemoryRequirements +cuArrayGetPlane +cuArrayGetSparseProperties +cuCoredumpGetAttribute +cuCoredumpGetAttributeGlobal +cuCoredumpSetAttribute +cuCoredumpSetAttributeGlobal +cuCtxAttach +cuCtxCreate +cuCtxCreate_v2 +cuCtxCreate_v3 +cuCtxDestroy +cuCtxDestroy_v2 +cuCtxDetach +cuCtxDisablePeerAccess +cuCtxEnablePeerAccess +cuCtxGetApiVersion +cuCtxGetCacheConfig +cuCtxGetCurrent +cuCtxGetDevice +cuCtxGetExecAffinity +cuCtxGetFlags +cuCtxGetId +cuCtxGetLimit +cuCtxGetSharedMemConfig +cuCtxGetStreamPriorityRange +cuCtxPopCurrent +cuCtxPopCurrent_v2 +cuCtxPushCurrent +cuCtxPushCurrent_v2 +cuCtxResetPersistingL2Cache +cuCtxSetCacheConfig +cuCtxSetCurrent +cuCtxSetFlags +cuCtxSetLimit +cuCtxSetSharedMemConfig +cuCtxSynchronize +cuD3D10CtxCreate +cuD3D10CtxCreateOnDevice +cuD3D10CtxCreate_v2 +cuD3D10GetDevice +cuD3D10GetDevices +cuD3D10GetDirect3DDevice +cuD3D10MapResources +cuD3D10RegisterResource +cuD3D10ResourceGetMappedArray +cuD3D10ResourceGetMappedPitch +cuD3D10ResourceGetMappedPitch_v2 +cuD3D10ResourceGetMappedPointer +cuD3D10ResourceGetMappedPointer_v2 +cuD3D10ResourceGetMappedSize +cuD3D10ResourceGetMappedSize_v2 +cuD3D10ResourceGetSurfaceDimensions +cuD3D10ResourceGetSurfaceDimensions_v2 +cuD3D10ResourceSetMapFlags +cuD3D10UnmapResources +cuD3D10UnregisterResource +cuD3D11CtxCreate +cuD3D11CtxCreateOnDevice +cuD3D11CtxCreate_v2 +cuD3D11GetDevice +cuD3D11GetDevices +cuD3D11GetDirect3DDevice +cuD3D9Begin +cuD3D9CtxCreate +cuD3D9CtxCreateOnDevice +cuD3D9CtxCreate_v2 +cuD3D9End +cuD3D9GetDevice +cuD3D9GetDevices +cuD3D9GetDirect3DDevice +cuD3D9MapResources +cuD3D9MapVertexBuffer +cuD3D9MapVertexBuffer_v2 +cuD3D9RegisterResource +cuD3D9RegisterVertexBuffer +cuD3D9ResourceGetMappedArray +cuD3D9ResourceGetMappedPitch +cuD3D9ResourceGetMappedPitch_v2 +cuD3D9ResourceGetMappedPointer +cuD3D9ResourceGetMappedPointer_v2 +cuD3D9ResourceGetMappedSize +cuD3D9ResourceGetMappedSize_v2 +cuD3D9ResourceGetSurfaceDimensions +cuD3D9ResourceGetSurfaceDimensions_v2 +cuD3D9ResourceSetMapFlags +cuD3D9UnmapResources +cuD3D9UnmapVertexBuffer +cuD3D9UnregisterResource +cuD3D9UnregisterVertexBuffer +cuDestroyExternalMemory +cuDestroyExternalSemaphore +cuDeviceCanAccessPeer +cuDeviceComputeCapability +cuDeviceGet +cuDeviceGetAttribute +cuDeviceGetByPCIBusId +cuDeviceGetCount +cuDeviceGetDefaultMemPool +cuDeviceGetExecAffinitySupport +cuDeviceGetGraphMemAttribute +cuDeviceGetLuid +cuDeviceGetMemPool +cuDeviceGetName +cuDeviceGetP2PAttribute +cuDeviceGetPCIBusId +cuDeviceGetProperties +cuDeviceGetTexture1DLinearMaxWidth +cuDeviceGetUuid +cuDeviceGetUuid_v2 +cuDeviceGraphMemTrim +cuDevicePrimaryCtxGetState +cuDevicePrimaryCtxRelease +cuDevicePrimaryCtxRelease_v2 +cuDevicePrimaryCtxReset +cuDevicePrimaryCtxReset_v2 +cuDevicePrimaryCtxRetain +cuDevicePrimaryCtxSetFlags +cuDevicePrimaryCtxSetFlags_v2 +cuDeviceSetGraphMemAttribute +cuDeviceSetMemPool +cuDeviceTotalMem +cuDeviceTotalMem_v2 +cuDriverGetVersion +cuEventCreate +cuEventDestroy +cuEventDestroy_v2 +cuEventElapsedTime +cuEventQuery +cuEventRecord +cuEventRecordWithFlags +cuEventRecordWithFlags_ptsz +cuEventRecord_ptsz +cuEventSynchronize +cuExternalMemoryGetMappedBuffer +cuExternalMemoryGetMappedMipmappedArray +cuFlushGPUDirectRDMAWrites +cuFuncGetAttribute +cuFuncGetModule +cuFuncGetName +cuFuncSetAttribute +cuFuncSetBlockShape +cuFuncSetCacheConfig +cuFuncSetSharedMemConfig +cuFuncSetSharedSize +cuGLCtxCreate +cuGLCtxCreate_v2 +cuGLGetDevices +cuGLGetDevices_v2 +cuGLInit +cuGLMapBufferObject +cuGLMapBufferObjectAsync +cuGLMapBufferObjectAsync_v2 +cuGLMapBufferObjectAsync_v2_ptsz +cuGLMapBufferObject_v2 +cuGLMapBufferObject_v2_ptds +cuGLRegisterBufferObject +cuGLSetBufferObjectMapFlags +cuGLUnmapBufferObject +cuGLUnmapBufferObjectAsync +cuGLUnregisterBufferObject +cuGetErrorName +cuGetErrorString +cuGetExportTable +cuGetProcAddress +cuGetProcAddress_v2 +cuGraphAddBatchMemOpNode +cuGraphAddChildGraphNode +cuGraphAddDependencies +cuGraphAddDependencies_v2 +cuGraphAddEmptyNode +cuGraphAddEventRecordNode +cuGraphAddEventWaitNode +cuGraphAddExternalSemaphoresSignalNode +cuGraphAddExternalSemaphoresWaitNode +cuGraphAddHostNode +cuGraphAddKernelNode +cuGraphAddKernelNode_v2 +cuGraphAddMemAllocNode +cuGraphAddMemFreeNode +cuGraphAddMemcpyNode +cuGraphAddMemsetNode +cuGraphAddNode +cuGraphAddNode_v2 +cuGraphBatchMemOpNodeGetParams +cuGraphBatchMemOpNodeSetParams +cuGraphChildGraphNodeGetGraph +cuGraphClone +cuGraphConditionalHandleCreate +cuGraphCreate +cuGraphDebugDotPrint +cuGraphDestroy +cuGraphDestroyNode +cuGraphEventRecordNodeGetEvent +cuGraphEventRecordNodeSetEvent +cuGraphEventWaitNodeGetEvent +cuGraphEventWaitNodeSetEvent +cuGraphExecBatchMemOpNodeSetParams +cuGraphExecChildGraphNodeSetParams +cuGraphExecDestroy +cuGraphExecEventRecordNodeSetEvent +cuGraphExecEventWaitNodeSetEvent +cuGraphExecExternalSemaphoresSignalNodeSetParams +cuGraphExecExternalSemaphoresWaitNodeSetParams +cuGraphExecGetFlags +cuGraphExecHostNodeSetParams +cuGraphExecKernelNodeSetParams +cuGraphExecKernelNodeSetParams_v2 +cuGraphExecMemcpyNodeSetParams +cuGraphExecMemsetNodeSetParams +cuGraphExecNodeSetParams +cuGraphExecUpdate +cuGraphExecUpdate_v2 +cuGraphExternalSemaphoresSignalNodeGetParams +cuGraphExternalSemaphoresSignalNodeSetParams +cuGraphExternalSemaphoresWaitNodeGetParams +cuGraphExternalSemaphoresWaitNodeSetParams +cuGraphGetEdges +cuGraphGetEdges_v2 +cuGraphGetNodes +cuGraphGetRootNodes +cuGraphHostNodeGetParams +cuGraphHostNodeSetParams +cuGraphInstantiate +cuGraphInstantiateWithFlags +cuGraphInstantiateWithParams +cuGraphInstantiateWithParams_ptsz +cuGraphInstantiate_v2 +cuGraphKernelNodeCopyAttributes +cuGraphKernelNodeGetAttribute +cuGraphKernelNodeGetParams +cuGraphKernelNodeGetParams_v2 +cuGraphKernelNodeSetAttribute +cuGraphKernelNodeSetParams +cuGraphKernelNodeSetParams_v2 +cuGraphLaunch +cuGraphLaunch_ptsz +cuGraphMemAllocNodeGetParams +cuGraphMemFreeNodeGetParams +cuGraphMemcpyNodeGetParams +cuGraphMemcpyNodeSetParams +cuGraphMemsetNodeGetParams +cuGraphMemsetNodeSetParams +cuGraphNodeFindInClone +cuGraphNodeGetDependencies +cuGraphNodeGetDependencies_v2 +cuGraphNodeGetDependentNodes +cuGraphNodeGetDependentNodes_v2 +cuGraphNodeGetEnabled +cuGraphNodeGetType +cuGraphNodeSetEnabled +cuGraphNodeSetParams +cuGraphReleaseUserObject +cuGraphRemoveDependencies +cuGraphRemoveDependencies_v2 +cuGraphRetainUserObject +cuGraphUpload +cuGraphUpload_ptsz +cuGraphicsD3D10RegisterResource +cuGraphicsD3D11RegisterResource +cuGraphicsD3D9RegisterResource +cuGraphicsGLRegisterBuffer +cuGraphicsGLRegisterImage +cuGraphicsMapResources +cuGraphicsMapResources_ptsz +cuGraphicsResourceGetMappedMipmappedArray +cuGraphicsResourceGetMappedPointer +cuGraphicsResourceGetMappedPointer_v2 +cuGraphicsResourceSetMapFlags +cuGraphicsResourceSetMapFlags_v2 +cuGraphicsSubResourceGetMappedArray +cuGraphicsUnmapResources +cuGraphicsUnmapResources_ptsz +cuGraphicsUnregisterResource +cuImportExternalMemory +cuImportExternalSemaphore +cuInit +cuIpcCloseMemHandle +cuIpcGetEventHandle +cuIpcGetMemHandle +cuIpcOpenEventHandle +cuIpcOpenMemHandle +cuIpcOpenMemHandle_v2 +cuKernelGetAttribute +cuKernelGetFunction +cuKernelGetName +cuKernelSetAttribute +cuKernelSetCacheConfig +cuLaunch +cuLaunchCooperativeKernel +cuLaunchCooperativeKernelMultiDevice +cuLaunchCooperativeKernel_ptsz +cuLaunchGrid +cuLaunchGridAsync +cuLaunchHostFunc +cuLaunchHostFunc_ptsz +cuLaunchKernel +cuLaunchKernelEx +cuLaunchKernelEx_ptsz +cuLaunchKernel_ptsz +cuLibraryGetGlobal +cuLibraryGetKernel +cuLibraryGetManaged +cuLibraryGetModule +cuLibraryGetUnifiedFunction +cuLibraryLoadData +cuLibraryLoadFromFile +cuLibraryUnload +cuLinkAddData +cuLinkAddData_v2 +cuLinkAddFile +cuLinkAddFile_v2 +cuLinkComplete +cuLinkCreate +cuLinkCreate_v2 +cuLinkDestroy +cuMemAddressFree +cuMemAddressReserve +cuMemAdvise +cuMemAdvise_v2 +cuMemAlloc +cuMemAllocAsync +cuMemAllocAsync_ptsz +cuMemAllocFromPoolAsync +cuMemAllocFromPoolAsync_ptsz +cuMemAllocHost +cuMemAllocHost_v2 +cuMemAllocManaged +cuMemAllocPitch +cuMemAllocPitch_v2 +cuMemAlloc_v2 +cuMemCreate +cuMemExportToShareableHandle +cuMemFree +cuMemFreeAsync +cuMemFreeAsync_ptsz +cuMemFreeHost +cuMemFree_v2 +cuMemGetAccess +cuMemGetAddressRange +cuMemGetAddressRange_v2 +cuMemGetAllocationGranularity +cuMemGetAllocationPropertiesFromHandle +cuMemGetHandleForAddressRange +cuMemGetInfo +cuMemGetInfo_v2 +cuMemHostAlloc +cuMemHostGetDevicePointer +cuMemHostGetDevicePointer_v2 +cuMemHostGetFlags +cuMemHostRegister +cuMemHostRegister_v2 +cuMemHostUnregister +cuMemImportFromShareableHandle +cuMemMap +cuMemMapArrayAsync +cuMemMapArrayAsync_ptsz +cuMemPoolCreate +cuMemPoolDestroy +cuMemPoolExportPointer +cuMemPoolExportToShareableHandle +cuMemPoolGetAccess +cuMemPoolGetAttribute +cuMemPoolImportFromShareableHandle +cuMemPoolImportPointer +cuMemPoolSetAccess +cuMemPoolSetAttribute +cuMemPoolTrimTo +cuMemPrefetchAsync +cuMemPrefetchAsync_ptsz +cuMemPrefetchAsync_v2 +cuMemPrefetchAsync_v2_ptsz +cuMemRangeGetAttribute +cuMemRangeGetAttributes +cuMemRelease +cuMemRetainAllocationHandle +cuMemSetAccess +cuMemUnmap +cuMemcpy +cuMemcpy2D +cuMemcpy2DAsync +cuMemcpy2DAsync_v2 +cuMemcpy2DAsync_v2_ptsz +cuMemcpy2DUnaligned +cuMemcpy2DUnaligned_v2 +cuMemcpy2DUnaligned_v2_ptds +cuMemcpy2D_v2 +cuMemcpy2D_v2_ptds +cuMemcpy3D +cuMemcpy3DAsync +cuMemcpy3DAsync_v2 +cuMemcpy3DAsync_v2_ptsz +cuMemcpy3DPeer +cuMemcpy3DPeerAsync +cuMemcpy3DPeerAsync_ptsz +cuMemcpy3DPeer_ptds +cuMemcpy3D_v2 +cuMemcpy3D_v2_ptds +cuMemcpyAsync +cuMemcpyAsync_ptsz +cuMemcpyAtoA +cuMemcpyAtoA_v2 +cuMemcpyAtoA_v2_ptds +cuMemcpyAtoD +cuMemcpyAtoD_v2 +cuMemcpyAtoD_v2_ptds +cuMemcpyAtoH +cuMemcpyAtoHAsync +cuMemcpyAtoHAsync_v2 +cuMemcpyAtoHAsync_v2_ptsz +cuMemcpyAtoH_v2 +cuMemcpyAtoH_v2_ptds +cuMemcpyDtoA +cuMemcpyDtoA_v2 +cuMemcpyDtoA_v2_ptds +cuMemcpyDtoD +cuMemcpyDtoDAsync +cuMemcpyDtoDAsync_v2 +cuMemcpyDtoDAsync_v2_ptsz +cuMemcpyDtoD_v2 +cuMemcpyDtoD_v2_ptds +cuMemcpyDtoH +cuMemcpyDtoHAsync +cuMemcpyDtoHAsync_v2 +cuMemcpyDtoHAsync_v2_ptsz +cuMemcpyDtoH_v2 +cuMemcpyDtoH_v2_ptds +cuMemcpyHtoA +cuMemcpyHtoAAsync +cuMemcpyHtoAAsync_v2 +cuMemcpyHtoAAsync_v2_ptsz +cuMemcpyHtoA_v2 +cuMemcpyHtoA_v2_ptds +cuMemcpyHtoD +cuMemcpyHtoDAsync +cuMemcpyHtoDAsync_v2 +cuMemcpyHtoDAsync_v2_ptsz +cuMemcpyHtoD_v2 +cuMemcpyHtoD_v2_ptds +cuMemcpyPeer +cuMemcpyPeerAsync +cuMemcpyPeerAsync_ptsz +cuMemcpyPeer_ptds +cuMemcpy_ptds +cuMemsetD16 +cuMemsetD16Async +cuMemsetD16Async_ptsz +cuMemsetD16_v2 +cuMemsetD16_v2_ptds +cuMemsetD2D16 +cuMemsetD2D16Async +cuMemsetD2D16Async_ptsz +cuMemsetD2D16_v2 +cuMemsetD2D16_v2_ptds +cuMemsetD2D32 +cuMemsetD2D32Async +cuMemsetD2D32Async_ptsz +cuMemsetD2D32_v2 +cuMemsetD2D32_v2_ptds +cuMemsetD2D8 +cuMemsetD2D8Async +cuMemsetD2D8Async_ptsz +cuMemsetD2D8_v2 +cuMemsetD2D8_v2_ptds +cuMemsetD32 +cuMemsetD32Async +cuMemsetD32Async_ptsz +cuMemsetD32_v2 +cuMemsetD32_v2_ptds +cuMemsetD8 +cuMemsetD8Async +cuMemsetD8Async_ptsz +cuMemsetD8_v2 +cuMemsetD8_v2_ptds +cuMipmappedArrayCreate +cuMipmappedArrayDestroy +cuMipmappedArrayGetLevel +cuMipmappedArrayGetMemoryRequirements +cuMipmappedArrayGetSparseProperties +cuModuleGetFunction +cuModuleGetGlobal +cuModuleGetGlobal_v2 +cuModuleGetLoadingMode +cuModuleGetSurfRef +cuModuleGetTexRef +cuModuleLoad +cuModuleLoadData +cuModuleLoadDataEx +cuModuleLoadFatBinary +cuModuleUnload +cuMulticastAddDevice +cuMulticastBindAddr +cuMulticastBindMem +cuMulticastCreate +cuMulticastGetGranularity +cuMulticastUnbind +cuOccupancyAvailableDynamicSMemPerBlock +cuOccupancyMaxActiveBlocksPerMultiprocessor +cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags +cuOccupancyMaxActiveClusters +cuOccupancyMaxPotentialBlockSize +cuOccupancyMaxPotentialBlockSizeWithFlags +cuOccupancyMaxPotentialClusterSize +cuParamSetSize +cuParamSetTexRef +cuParamSetf +cuParamSeti +cuParamSetv +cuPointerGetAttribute +cuPointerGetAttributes +cuPointerSetAttribute +cuProfilerInitialize +cuProfilerStart +cuProfilerStop +cuSignalExternalSemaphoresAsync +cuSignalExternalSemaphoresAsync_ptsz +cuStreamAddCallback +cuStreamAddCallback_ptsz +cuStreamAttachMemAsync +cuStreamAttachMemAsync_ptsz +cuStreamBatchMemOp +cuStreamBatchMemOp_ptsz +cuStreamBatchMemOp_v2 +cuStreamBatchMemOp_v2_ptsz +cuStreamBeginCapture +cuStreamBeginCaptureToGraph +cuStreamBeginCaptureToGraph_ptsz +cuStreamBeginCapture_ptsz +cuStreamBeginCapture_v2 +cuStreamBeginCapture_v2_ptsz +cuStreamCopyAttributes +cuStreamCopyAttributes_ptsz +cuStreamCreate +cuStreamCreateWithPriority +cuStreamDestroy +cuStreamDestroy_v2 +cuStreamEndCapture +cuStreamEndCapture_ptsz +cuStreamGetAttribute +cuStreamGetAttribute_ptsz +cuStreamGetCaptureInfo +cuStreamGetCaptureInfo_ptsz +cuStreamGetCaptureInfo_v2 +cuStreamGetCaptureInfo_v2_ptsz +cuStreamGetCaptureInfo_v3 +cuStreamGetCaptureInfo_v3_ptsz +cuStreamGetCtx +cuStreamGetCtx_ptsz +cuStreamGetFlags +cuStreamGetFlags_ptsz +cuStreamGetId +cuStreamGetId_ptsz +cuStreamGetPriority +cuStreamGetPriority_ptsz +cuStreamIsCapturing +cuStreamIsCapturing_ptsz +cuStreamQuery +cuStreamQuery_ptsz +cuStreamSetAttribute +cuStreamSetAttribute_ptsz +cuStreamSynchronize +cuStreamSynchronize_ptsz +cuStreamUpdateCaptureDependencies +cuStreamUpdateCaptureDependencies_ptsz +cuStreamUpdateCaptureDependencies_v2 +cuStreamUpdateCaptureDependencies_v2_ptsz +cuStreamWaitEvent +cuStreamWaitEvent_ptsz +cuStreamWaitValue32 +cuStreamWaitValue32_ptsz +cuStreamWaitValue32_v2 +cuStreamWaitValue32_v2_ptsz +cuStreamWaitValue64 +cuStreamWaitValue64_ptsz +cuStreamWaitValue64_v2 +cuStreamWaitValue64_v2_ptsz +cuStreamWriteValue32 +cuStreamWriteValue32_ptsz +cuStreamWriteValue32_v2 +cuStreamWriteValue32_v2_ptsz +cuStreamWriteValue64 +cuStreamWriteValue64_ptsz +cuStreamWriteValue64_v2 +cuStreamWriteValue64_v2_ptsz +cuSurfObjectCreate +cuSurfObjectDestroy +cuSurfObjectGetResourceDesc +cuSurfRefGetArray +cuSurfRefSetArray +cuTensorMapEncodeIm2col +cuTensorMapEncodeTiled +cuTensorMapReplaceAddress +cuTexObjectCreate +cuTexObjectDestroy +cuTexObjectGetResourceDesc +cuTexObjectGetResourceViewDesc +cuTexObjectGetTextureDesc +cuTexRefCreate +cuTexRefDestroy +cuTexRefGetAddress +cuTexRefGetAddressMode +cuTexRefGetAddress_v2 +cuTexRefGetArray +cuTexRefGetBorderColor +cuTexRefGetFilterMode +cuTexRefGetFlags +cuTexRefGetFormat +cuTexRefGetMaxAnisotropy +cuTexRefGetMipmapFilterMode +cuTexRefGetMipmapLevelBias +cuTexRefGetMipmapLevelClamp +cuTexRefGetMipmappedArray +cuTexRefSetAddress +cuTexRefSetAddress2D +cuTexRefSetAddress2D_v2 +cuTexRefSetAddress2D_v3 +cuTexRefSetAddressMode +cuTexRefSetAddress_v2 +cuTexRefSetArray +cuTexRefSetBorderColor +cuTexRefSetFilterMode +cuTexRefSetFlags +cuTexRefSetFormat +cuTexRefSetMaxAnisotropy +cuTexRefSetMipmapFilterMode +cuTexRefSetMipmapLevelBias +cuTexRefSetMipmapLevelClamp +cuTexRefSetMipmappedArray +cuThreadExchangeStreamCaptureMode +cuUserObjectCreate +cuUserObjectRelease +cuUserObjectRetain +cuWGLGetDevice +cuWaitExternalSemaphoresAsync +cuWaitExternalSemaphoresAsync_ptsz +cudbgApiAttach +cudbgApiDetach +cudbgApiInit +cudbgGetAPI +cudbgGetAPIVersion +cudbgMain --- origsrc/pocl-3.1/lib/CL/devices/cuda/pocl-cuda.c 2022-12-05 21:36:08.000000000 +0900 +++ src/pocl-3.1/lib/CL/devices/cuda/pocl-cuda.c 2024-01-04 03:05:15.052152000 +0900 @@ -47,8 +47,7 @@ #include #include -#include -#include +#include "cuda.h" #ifdef ENABLE_CUDNN #include @@ -73,6 +72,10 @@ cudnnHandle_t cudnn; } \ } +#ifdef __CYGWIN__ +#undef POCL_DEBUG_MESSAGES +#endif + typedef struct pocl_cuda_device_data_s { CUdevice device; @@ -2107,7 +2113,8 @@ pocl_cuda_free_event_data (cl_event even { pocl_cuda_event_data_t *event_data = (pocl_cuda_event_data_t *)event->data; - PTHREAD_CHECK (pthread_cond_destroy (&event_data->event_cond)); + if (event_data->event_cond) + PTHREAD_CHECK (pthread_cond_destroy (&event_data->event_cond)); if (event->queue->properties & CL_QUEUE_PROFILING_ENABLE) cuEventDestroy (event_data->start); cuEventDestroy (event_data->end); --- origsrc/pocl-3.1/lib/CL/devices/cuda/pocl-ptx-gen.cc 2022-12-05 21:36:08.000000000 +0900 +++ src/pocl-3.1/lib/CL/devices/cuda/pocl-ptx-gen.cc 2024-01-04 03:18:37.421667500 +0900 @@ -555,8 +555,9 @@ int findLibDevice(char LibDevicePath[PAT LibDeviceSM = 30; const char *BasePath[] = { - pocl_get_string_option("POCL_CUDA_TOOLKIT_PATH", CUDA_TOOLKIT_ROOT_DIR), + pocl_get_string_option("POCL_CUDA_TOOLKIT_PATH", "/usr/local/cuda"), pocl_get_string_option("CUDA_HOME", "/usr/local/cuda"), + pocl_get_string_option("CUDA_PATH", "/usr/local/cuda"), "/usr/local/lib/cuda", "/usr/local/lib", "/usr/lib", @@ -613,7 +614,7 @@ void linkLibDevice(llvm::Module *Module, const char *LibDevicePath) { auto Buffer = llvm::MemoryBuffer::getFile(LibDevicePath); if (!Buffer) - POCL_ABORT("[CUDA] failed to open libdevice library file\n"); + POCL_ABORT("[CUDA] failed to open libdevice library file. Most likely, you do not have NVIDIA CUDA Toolkit. Please consider to install it. If you have it installed, set CUDA_PATH properly.\n"); POCL_MSG_PRINT_INFO("loading libdevice from '%s'\n", LibDevicePath); @@ -960,11 +961,19 @@ int pocl_cuda_get_ptr_arg_alignment(cons llvm::dyn_cast(U)) { for (auto UU : GEP->users()) { if (llvm::StoreInst *SI = llvm::dyn_cast(UU)) { +#ifdef LLVM_OLDER_THAN_15_0 + Alignments[i] = SI->getAlignment(); +#else Alignments[i] = SI->getAlign().value(); +#endif break; } if (llvm::LoadInst *LI = llvm::dyn_cast(UU)) { +#ifdef LLVM_OLDER_THAN_15_0 + Alignments[i] = LI->getAlignment(); +#else Alignments[i] = LI->getAlign().value(); +#endif break; } } --- origsrc/pocl-3.1/lib/CL/devices/topology/pocl_topology.c 2022-12-05 21:36:08.000000000 +0900 +++ src/pocl-3.1/lib/CL/devices/topology/pocl_topology.c 2024-01-04 03:05:15.058148600 +0900 @@ -205,7 +205,7 @@ exit_destroy: } // #ifdef HWLOC -#elif defined(__linux__) || defined(__ANDROID__) +#elif defined(__linux__) || defined(__ANDROID__) || defined(__CYGWIN__) #define L3_CACHE_SIZE "/sys/devices/system/cpu/cpu0/cache/index3/size" #define L2_CACHE_SIZE "/sys/devices/system/cpu/cpu0/cache/index2/size" --- origsrc/pocl-3.1/lib/CL/pocl_build.c 2022-12-05 21:36:08.000000000 +0900 +++ src/pocl-3.1/lib/CL/pocl_build.c 2024-01-04 03:05:15.061147000 +0900 @@ -690,6 +690,13 @@ compile_and_link_program(int compile_pro goto ERROR_CLEAN_OPTIONS; } +#ifdef __CYGWIN__ + if (program->compiler_options == NULL) + { + program->compiler_options = (char *)malloc(512); + program->compiler_options[0] = '\0'; + } +#endif POCL_MSG_PRINT_LLVM ("building program with options %s\n", program->compiler_options); --- origsrc/pocl-3.1/lib/CL/pocl_util.c 2022-12-05 21:36:08.000000000 +0900 +++ src/pocl-3.1/lib/CL/pocl_util.c 2024-01-04 03:05:15.064145100 +0900 @@ -59,8 +59,10 @@ #if defined(__APPLE__) #define _DARWIN_C_SOURCE #endif -#ifdef __linux__ +#if defined(__linux__) || defined(__CYGWIN__) #define _GNU_SOURCE +#undef __GNU_VISIBLE +#define __GNU_VISIBLE 1 #endif #include #endif --- origsrc/pocl-3.1/lib/kernel/host/spir_wrapper32.ll 2022-12-05 21:36:08.000000000 +0900 +++ src/pocl-3.1/lib/kernel/host/spir_wrapper32.ll 2024-01-04 03:05:15.066143900 +0900 @@ -2663,7 +2663,7 @@ attributes #0 = { nounwind "correctly-ro !0 = !{i32 1, i32 2} !1 = !{!"clang version 6.0.0"} -!2 = !{i32 1, !"wchar_size", i32 4} +!2 = !{i32 1, !"wchar_size", i32 2} !3 = !{i32 7, !"PIC Level", i32 2} --- origsrc/pocl-3.1/lib/kernel/host/spir_wrapper64.ll 2022-12-05 21:36:08.000000000 +0900 +++ src/pocl-3.1/lib/kernel/host/spir_wrapper64.ll 2024-01-04 03:05:15.071141100 +0900 @@ -14731,7 +14731,7 @@ attributes #0 = { nounwind "correctly-ro !0 = !{i32 1, i32 2} !1 = !{!"clang version 6.0.0"} -!2 = !{i32 1, !"wchar_size", i32 4} +!2 = !{i32 1, !"wchar_size", i32 2} !3 = !{i32 7, !"PIC Level", i32 2} --- origsrc/pocl-3.1/lib/llvmopencl/CMakeLists.txt 2022-12-05 21:36:08.000000000 +0900 +++ src/pocl-3.1/lib/llvmopencl/CMakeLists.txt 2024-01-04 03:05:15.075138700 +0900 @@ -115,9 +115,10 @@ harden("llvmopencl") # not sure where other platforms get their library linkage list, probably there is # way to make this more consistent on different platforms -if(MSVC) +if(WIN32 OR CYGWIN) + target_link_libraries("llvmopencl" pocl) target_link_libraries("llvmopencl" ${POCL_LLVM_LIBS}) -endif(MSVC) +endif(WIN32 OR CYGWIN) #target_link_libraries("llvmopencl" "${LLVM_SYSLIBS}") endif(ENABLE_LIBLLVMOPENCL) --- origsrc/pocl-3.1/lib/llvmopencl/DebugHelpers.h 2022-12-05 21:36:08.000000000 +0900 +++ src/pocl-3.1/lib/llvmopencl/DebugHelpers.h 2024-01-04 03:05:15.077137600 +0900 @@ -24,7 +24,7 @@ #define _POCL_DEBUG_HELPERS_H #include -#if _MSC_VER +#if _MSC_VER || defined(__CYGWIN__) # include #endif