Initial change to nvenc
parent
21cc797dd3
commit
ccae6d174b
@ -0,0 +1,423 @@
|
|||||||
|
/*
|
||||||
|
* This copyright notice applies to this header file only:
|
||||||
|
*
|
||||||
|
* Copyright (c) 2016
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the software, and to permit persons to whom the
|
||||||
|
* software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if !defined(FFNV_DYNLINK_CUDA_H) && !defined(CUDA_VERSION)
|
||||||
|
#define FFNV_DYNLINK_CUDA_H
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
|
#define CUDA_VERSION 7050
|
||||||
|
|
||||||
|
#if defined(_WIN32) || defined(__CYGWIN__)
|
||||||
|
#define CUDAAPI __stdcall
|
||||||
|
#else
|
||||||
|
#define CUDAAPI
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define CU_CTX_SCHED_BLOCKING_SYNC 4
|
||||||
|
|
||||||
|
typedef int CUdevice;
|
||||||
|
#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) || defined(__LP64__) || defined(__aarch64__)
|
||||||
|
typedef unsigned long long CUdeviceptr;
|
||||||
|
#else
|
||||||
|
typedef unsigned int CUdeviceptr;
|
||||||
|
#endif
|
||||||
|
typedef unsigned long long CUtexObject;
|
||||||
|
|
||||||
|
typedef struct CUarray_st *CUarray;
|
||||||
|
typedef struct CUctx_st *CUcontext;
|
||||||
|
typedef struct CUstream_st *CUstream;
|
||||||
|
typedef struct CUevent_st *CUevent;
|
||||||
|
typedef struct CUfunc_st *CUfunction;
|
||||||
|
typedef struct CUmod_st *CUmodule;
|
||||||
|
typedef struct CUmipmappedArray_st *CUmipmappedArray;
|
||||||
|
typedef struct CUgraphicsResource_st *CUgraphicsResource;
|
||||||
|
typedef struct CUextMemory_st *CUexternalMemory;
|
||||||
|
typedef struct CUextSemaphore_st *CUexternalSemaphore;
|
||||||
|
|
||||||
|
typedef struct CUlinkState_st *CUlinkState;
|
||||||
|
|
||||||
|
typedef enum cudaError_enum {
|
||||||
|
CUDA_SUCCESS = 0,
|
||||||
|
CUDA_ERROR_NOT_READY = 600
|
||||||
|
} CUresult;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Device properties (subset)
|
||||||
|
*/
|
||||||
|
typedef enum CUdevice_attribute_enum {
|
||||||
|
CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13,
|
||||||
|
CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14,
|
||||||
|
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16,
|
||||||
|
CU_DEVICE_ATTRIBUTE_INTEGRATED = 18,
|
||||||
|
CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19,
|
||||||
|
CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20,
|
||||||
|
CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31,
|
||||||
|
CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33,
|
||||||
|
CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34,
|
||||||
|
CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35,
|
||||||
|
CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36,
|
||||||
|
CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37,
|
||||||
|
CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40,
|
||||||
|
CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41,
|
||||||
|
CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 50,
|
||||||
|
CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 51,
|
||||||
|
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75,
|
||||||
|
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76,
|
||||||
|
CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 83,
|
||||||
|
CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = 84,
|
||||||
|
CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = 85,
|
||||||
|
} CUdevice_attribute;
|
||||||
|
|
||||||
|
typedef enum CUarray_format_enum {
|
||||||
|
CU_AD_FORMAT_UNSIGNED_INT8 = 0x01,
|
||||||
|
CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
|
||||||
|
CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
|
||||||
|
CU_AD_FORMAT_SIGNED_INT8 = 0x08,
|
||||||
|
CU_AD_FORMAT_SIGNED_INT16 = 0x09,
|
||||||
|
CU_AD_FORMAT_SIGNED_INT32 = 0x0a,
|
||||||
|
CU_AD_FORMAT_HALF = 0x10,
|
||||||
|
CU_AD_FORMAT_FLOAT = 0x20
|
||||||
|
} CUarray_format;
|
||||||
|
|
||||||
|
typedef enum CUmemorytype_enum {
|
||||||
|
CU_MEMORYTYPE_HOST = 1,
|
||||||
|
CU_MEMORYTYPE_DEVICE = 2,
|
||||||
|
CU_MEMORYTYPE_ARRAY = 3
|
||||||
|
} CUmemorytype;
|
||||||
|
|
||||||
|
typedef enum CUlimit_enum {
|
||||||
|
CU_LIMIT_STACK_SIZE = 0,
|
||||||
|
CU_LIMIT_PRINTF_FIFO_SIZE = 1,
|
||||||
|
CU_LIMIT_MALLOC_HEAP_SIZE = 2,
|
||||||
|
CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH = 3,
|
||||||
|
CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT = 4
|
||||||
|
} CUlimit;
|
||||||
|
|
||||||
|
typedef enum CUresourcetype_enum {
|
||||||
|
CU_RESOURCE_TYPE_ARRAY = 0x00,
|
||||||
|
CU_RESOURCE_TYPE_MIPMAPPED_ARRAY = 0x01,
|
||||||
|
CU_RESOURCE_TYPE_LINEAR = 0x02,
|
||||||
|
CU_RESOURCE_TYPE_PITCH2D = 0x03
|
||||||
|
} CUresourcetype;
|
||||||
|
|
||||||
|
typedef enum CUaddress_mode_enum {
|
||||||
|
CU_TR_ADDRESS_MODE_WRAP = 0,
|
||||||
|
CU_TR_ADDRESS_MODE_CLAMP = 1,
|
||||||
|
CU_TR_ADDRESS_MODE_MIRROR = 2,
|
||||||
|
CU_TR_ADDRESS_MODE_BORDER = 3
|
||||||
|
} CUaddress_mode;
|
||||||
|
|
||||||
|
typedef enum CUfilter_mode_enum {
|
||||||
|
CU_TR_FILTER_MODE_POINT = 0,
|
||||||
|
CU_TR_FILTER_MODE_LINEAR = 1
|
||||||
|
} CUfilter_mode;
|
||||||
|
|
||||||
|
typedef enum CUgraphicsRegisterFlags_enum {
|
||||||
|
CU_GRAPHICS_REGISTER_FLAGS_NONE = 0,
|
||||||
|
CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY = 1,
|
||||||
|
CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD = 2,
|
||||||
|
CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST = 4,
|
||||||
|
CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER = 8
|
||||||
|
} CUgraphicsRegisterFlags;
|
||||||
|
|
||||||
|
typedef enum CUexternalMemoryHandleType_enum {
|
||||||
|
CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD = 1,
|
||||||
|
CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32 = 2,
|
||||||
|
CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT = 3,
|
||||||
|
CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP = 4,
|
||||||
|
CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE = 5,
|
||||||
|
} CUexternalMemoryHandleType;
|
||||||
|
|
||||||
|
typedef enum CUexternalSemaphoreHandleType_enum {
|
||||||
|
CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD = 1,
|
||||||
|
CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32 = 2,
|
||||||
|
CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT = 3,
|
||||||
|
CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE = 4
|
||||||
|
} CUexternalSemaphoreHandleType;
|
||||||
|
|
||||||
|
typedef enum CUjit_option_enum
|
||||||
|
{
|
||||||
|
CU_JIT_MAX_REGISTERS = 0,
|
||||||
|
CU_JIT_THREADS_PER_BLOCK = 1,
|
||||||
|
CU_JIT_WALL_TIME = 2,
|
||||||
|
CU_JIT_INFO_LOG_BUFFER = 3,
|
||||||
|
CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES = 4,
|
||||||
|
CU_JIT_ERROR_LOG_BUFFER = 5,
|
||||||
|
CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES = 6,
|
||||||
|
CU_JIT_OPTIMIZATION_LEVEL = 7,
|
||||||
|
CU_JIT_TARGET_FROM_CUCONTEXT = 8,
|
||||||
|
CU_JIT_TARGET = 9,
|
||||||
|
CU_JIT_FALLBACK_STRATEGY = 10,
|
||||||
|
CU_JIT_GENERATE_DEBUG_INFO = 11,
|
||||||
|
CU_JIT_LOG_VERBOSE = 12,
|
||||||
|
CU_JIT_GENERATE_LINE_INFO = 13,
|
||||||
|
CU_JIT_CACHE_MODE = 14,
|
||||||
|
CU_JIT_NEW_SM3X_OPT = 15,
|
||||||
|
CU_JIT_FAST_COMPILE = 16,
|
||||||
|
CU_JIT_GLOBAL_SYMBOL_NAMES = 17,
|
||||||
|
CU_JIT_GLOBAL_SYMBOL_ADDRESSES = 18,
|
||||||
|
CU_JIT_GLOBAL_SYMBOL_COUNT = 19,
|
||||||
|
CU_JIT_NUM_OPTIONS
|
||||||
|
} CUjit_option;
|
||||||
|
|
||||||
|
typedef enum CUjitInputType_enum
|
||||||
|
{
|
||||||
|
CU_JIT_INPUT_CUBIN = 0,
|
||||||
|
CU_JIT_INPUT_PTX = 1,
|
||||||
|
CU_JIT_INPUT_FATBINARY = 2,
|
||||||
|
CU_JIT_INPUT_OBJECT = 3,
|
||||||
|
CU_JIT_INPUT_LIBRARY = 4,
|
||||||
|
CU_JIT_NUM_INPUT_TYPES
|
||||||
|
} CUjitInputType;
|
||||||
|
|
||||||
|
#ifndef CU_UUID_HAS_BEEN_DEFINED
|
||||||
|
#define CU_UUID_HAS_BEEN_DEFINED
|
||||||
|
typedef struct CUuuid_st {
|
||||||
|
char bytes[16];
|
||||||
|
} CUuuid;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef struct CUDA_MEMCPY2D_st {
|
||||||
|
size_t srcXInBytes;
|
||||||
|
size_t srcY;
|
||||||
|
CUmemorytype srcMemoryType;
|
||||||
|
const void *srcHost;
|
||||||
|
CUdeviceptr srcDevice;
|
||||||
|
CUarray srcArray;
|
||||||
|
size_t srcPitch;
|
||||||
|
|
||||||
|
size_t dstXInBytes;
|
||||||
|
size_t dstY;
|
||||||
|
CUmemorytype dstMemoryType;
|
||||||
|
void *dstHost;
|
||||||
|
CUdeviceptr dstDevice;
|
||||||
|
CUarray dstArray;
|
||||||
|
size_t dstPitch;
|
||||||
|
|
||||||
|
size_t WidthInBytes;
|
||||||
|
size_t Height;
|
||||||
|
} CUDA_MEMCPY2D;
|
||||||
|
|
||||||
|
typedef struct CUDA_RESOURCE_DESC_st {
|
||||||
|
CUresourcetype resType;
|
||||||
|
union {
|
||||||
|
struct {
|
||||||
|
CUarray hArray;
|
||||||
|
} array;
|
||||||
|
struct {
|
||||||
|
CUmipmappedArray hMipmappedArray;
|
||||||
|
} mipmap;
|
||||||
|
struct {
|
||||||
|
CUdeviceptr devPtr;
|
||||||
|
CUarray_format format;
|
||||||
|
unsigned int numChannels;
|
||||||
|
size_t sizeInBytes;
|
||||||
|
} linear;
|
||||||
|
struct {
|
||||||
|
CUdeviceptr devPtr;
|
||||||
|
CUarray_format format;
|
||||||
|
unsigned int numChannels;
|
||||||
|
size_t width;
|
||||||
|
size_t height;
|
||||||
|
size_t pitchInBytes;
|
||||||
|
} pitch2D;
|
||||||
|
struct {
|
||||||
|
int reserved[32];
|
||||||
|
} reserved;
|
||||||
|
} res;
|
||||||
|
unsigned int flags;
|
||||||
|
} CUDA_RESOURCE_DESC;
|
||||||
|
|
||||||
|
typedef struct CUDA_TEXTURE_DESC_st {
|
||||||
|
CUaddress_mode addressMode[3];
|
||||||
|
CUfilter_mode filterMode;
|
||||||
|
unsigned int flags;
|
||||||
|
unsigned int maxAnisotropy;
|
||||||
|
CUfilter_mode mipmapFilterMode;
|
||||||
|
float mipmapLevelBias;
|
||||||
|
float minMipmapLevelClamp;
|
||||||
|
float maxMipmapLevelClamp;
|
||||||
|
float borderColor[4];
|
||||||
|
int reserved[12];
|
||||||
|
} CUDA_TEXTURE_DESC;
|
||||||
|
|
||||||
|
/* Unused type */
|
||||||
|
typedef struct CUDA_RESOURCE_VIEW_DESC_st CUDA_RESOURCE_VIEW_DESC;
|
||||||
|
|
||||||
|
typedef unsigned int GLenum;
|
||||||
|
typedef unsigned int GLuint;
|
||||||
|
|
||||||
|
typedef enum CUGLDeviceList_enum {
|
||||||
|
CU_GL_DEVICE_LIST_ALL = 1,
|
||||||
|
CU_GL_DEVICE_LIST_CURRENT_FRAME = 2,
|
||||||
|
CU_GL_DEVICE_LIST_NEXT_FRAME = 3,
|
||||||
|
} CUGLDeviceList;
|
||||||
|
|
||||||
|
typedef struct CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st {
|
||||||
|
CUexternalMemoryHandleType type;
|
||||||
|
union {
|
||||||
|
int fd;
|
||||||
|
struct {
|
||||||
|
void *handle;
|
||||||
|
const void *name;
|
||||||
|
} win32;
|
||||||
|
} handle;
|
||||||
|
unsigned long long size;
|
||||||
|
unsigned int flags;
|
||||||
|
unsigned int reserved[16];
|
||||||
|
} CUDA_EXTERNAL_MEMORY_HANDLE_DESC;
|
||||||
|
|
||||||
|
typedef struct CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st {
|
||||||
|
unsigned long long offset;
|
||||||
|
unsigned long long size;
|
||||||
|
unsigned int flags;
|
||||||
|
unsigned int reserved[16];
|
||||||
|
} CUDA_EXTERNAL_MEMORY_BUFFER_DESC;
|
||||||
|
|
||||||
|
typedef struct CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st {
|
||||||
|
CUexternalSemaphoreHandleType type;
|
||||||
|
union {
|
||||||
|
int fd;
|
||||||
|
struct {
|
||||||
|
void *handle;
|
||||||
|
const void *name;
|
||||||
|
} win32;
|
||||||
|
} handle;
|
||||||
|
unsigned int flags;
|
||||||
|
unsigned int reserved[16];
|
||||||
|
} CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC;
|
||||||
|
|
||||||
|
typedef struct CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st {
|
||||||
|
struct {
|
||||||
|
struct {
|
||||||
|
unsigned long long value;
|
||||||
|
} fence;
|
||||||
|
unsigned int reserved[16];
|
||||||
|
} params;
|
||||||
|
unsigned int flags;
|
||||||
|
unsigned int reserved[16];
|
||||||
|
} CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS;
|
||||||
|
|
||||||
|
typedef CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS;
|
||||||
|
|
||||||
|
typedef struct CUDA_ARRAY3D_DESCRIPTOR_st {
|
||||||
|
size_t Width;
|
||||||
|
size_t Height;
|
||||||
|
size_t Depth;
|
||||||
|
|
||||||
|
CUarray_format Format;
|
||||||
|
unsigned int NumChannels;
|
||||||
|
unsigned int Flags;
|
||||||
|
} CUDA_ARRAY3D_DESCRIPTOR;
|
||||||
|
|
||||||
|
typedef struct CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st {
|
||||||
|
unsigned long long offset;
|
||||||
|
CUDA_ARRAY3D_DESCRIPTOR arrayDesc;
|
||||||
|
unsigned int numLevels;
|
||||||
|
unsigned int reserved[16];
|
||||||
|
} CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC;
|
||||||
|
|
||||||
|
#define CU_STREAM_NON_BLOCKING 1
|
||||||
|
#define CU_EVENT_BLOCKING_SYNC 1
|
||||||
|
#define CU_EVENT_DISABLE_TIMING 2
|
||||||
|
#define CU_TRSF_READ_AS_INTEGER 1
|
||||||
|
|
||||||
|
typedef void CUDAAPI CUstreamCallback(CUstream hStream, CUresult status, void *userdata);
|
||||||
|
|
||||||
|
typedef CUresult CUDAAPI tcuInit(unsigned int Flags);
|
||||||
|
typedef CUresult CUDAAPI tcuDeviceGetCount(int *count);
|
||||||
|
typedef CUresult CUDAAPI tcuDeviceGet(CUdevice *device, int ordinal);
|
||||||
|
typedef CUresult CUDAAPI tcuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev);
|
||||||
|
typedef CUresult CUDAAPI tcuDeviceGetName(char *name, int len, CUdevice dev);
|
||||||
|
typedef CUresult CUDAAPI tcuDeviceGetUuid(CUuuid *uuid, CUdevice dev);
|
||||||
|
typedef CUresult CUDAAPI tcuDeviceComputeCapability(int *major, int *minor, CUdevice dev);
|
||||||
|
typedef CUresult CUDAAPI tcuCtxCreate_v2(CUcontext *pctx, unsigned int flags, CUdevice dev);
|
||||||
|
typedef CUresult CUDAAPI tcuCtxSetLimit(CUlimit limit, size_t value);
|
||||||
|
typedef CUresult CUDAAPI tcuCtxPushCurrent_v2(CUcontext pctx);
|
||||||
|
typedef CUresult CUDAAPI tcuCtxPopCurrent_v2(CUcontext *pctx);
|
||||||
|
typedef CUresult CUDAAPI tcuCtxDestroy_v2(CUcontext ctx);
|
||||||
|
typedef CUresult CUDAAPI tcuMemAlloc_v2(CUdeviceptr *dptr, size_t bytesize);
|
||||||
|
typedef CUresult CUDAAPI tcuMemAllocPitch_v2(CUdeviceptr *dptr, size_t *pPitch, size_t WidthInBytes, size_t Height, unsigned int ElementSizeBytes);
|
||||||
|
typedef CUresult CUDAAPI tcuMemsetD8Async(CUdeviceptr dstDevice, unsigned char uc, size_t N, CUstream hStream);
|
||||||
|
typedef CUresult CUDAAPI tcuMemFree_v2(CUdeviceptr dptr);
|
||||||
|
typedef CUresult CUDAAPI tcuMemcpy(CUdeviceptr dst, CUdeviceptr src, size_t bytesize);
|
||||||
|
typedef CUresult CUDAAPI tcuMemcpyAsync(CUdeviceptr dst, CUdeviceptr src, size_t bytesize, CUstream hStream);
|
||||||
|
typedef CUresult CUDAAPI tcuMemcpy2D_v2(const CUDA_MEMCPY2D *pcopy);
|
||||||
|
typedef CUresult CUDAAPI tcuMemcpy2DAsync_v2(const CUDA_MEMCPY2D *pcopy, CUstream hStream);
|
||||||
|
typedef CUresult CUDAAPI tcuGetErrorName(CUresult error, const char** pstr);
|
||||||
|
typedef CUresult CUDAAPI tcuGetErrorString(CUresult error, const char** pstr);
|
||||||
|
typedef CUresult CUDAAPI tcuCtxGetDevice(CUdevice *device);
|
||||||
|
|
||||||
|
typedef CUresult CUDAAPI tcuDevicePrimaryCtxRetain(CUcontext *pctx, CUdevice dev);
|
||||||
|
typedef CUresult CUDAAPI tcuDevicePrimaryCtxRelease(CUdevice dev);
|
||||||
|
typedef CUresult CUDAAPI tcuDevicePrimaryCtxSetFlags(CUdevice dev, unsigned int flags);
|
||||||
|
typedef CUresult CUDAAPI tcuDevicePrimaryCtxGetState(CUdevice dev, unsigned int *flags, int *active);
|
||||||
|
typedef CUresult CUDAAPI tcuDevicePrimaryCtxReset(CUdevice dev);
|
||||||
|
|
||||||
|
typedef CUresult CUDAAPI tcuStreamCreate(CUstream *phStream, unsigned int flags);
|
||||||
|
typedef CUresult CUDAAPI tcuStreamQuery(CUstream hStream);
|
||||||
|
typedef CUresult CUDAAPI tcuStreamSynchronize(CUstream hStream);
|
||||||
|
typedef CUresult CUDAAPI tcuStreamDestroy_v2(CUstream hStream);
|
||||||
|
typedef CUresult CUDAAPI tcuStreamAddCallback(CUstream hStream, CUstreamCallback *callback, void *userdata, unsigned int flags);
|
||||||
|
typedef CUresult CUDAAPI tcuEventCreate(CUevent *phEvent, unsigned int flags);
|
||||||
|
typedef CUresult CUDAAPI tcuEventDestroy_v2(CUevent hEvent);
|
||||||
|
typedef CUresult CUDAAPI tcuEventSynchronize(CUevent hEvent);
|
||||||
|
typedef CUresult CUDAAPI tcuEventQuery(CUevent hEvent);
|
||||||
|
typedef CUresult CUDAAPI tcuEventRecord(CUevent hEvent, CUstream hStream);
|
||||||
|
|
||||||
|
typedef CUresult CUDAAPI tcuLaunchKernel(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void** kernelParams, void** extra);
|
||||||
|
typedef CUresult CUDAAPI tcuLinkCreate(unsigned int numOptions, CUjit_option* options, void** optionValues, CUlinkState* stateOut);
|
||||||
|
typedef CUresult CUDAAPI tcuLinkAddData(CUlinkState state, CUjitInputType type, void* data, size_t size, const char* name, unsigned int numOptions, CUjit_option* options, void** optionValues);
|
||||||
|
typedef CUresult CUDAAPI tcuLinkComplete(CUlinkState state, void** cubinOut, size_t* sizeOut);
|
||||||
|
typedef CUresult CUDAAPI tcuLinkDestroy(CUlinkState state);
|
||||||
|
typedef CUresult CUDAAPI tcuModuleLoadData(CUmodule* module, const void* image);
|
||||||
|
typedef CUresult CUDAAPI tcuModuleUnload(CUmodule hmod);
|
||||||
|
typedef CUresult CUDAAPI tcuModuleGetFunction(CUfunction* hfunc, CUmodule hmod, const char* name);
|
||||||
|
typedef CUresult CUDAAPI tcuModuleGetGlobal(CUdeviceptr *dptr, size_t *bytes, CUmodule hmod, const char* name);
|
||||||
|
typedef CUresult CUDAAPI tcuTexObjectCreate(CUtexObject* pTexObject, const CUDA_RESOURCE_DESC* pResDesc, const CUDA_TEXTURE_DESC* pTexDesc, const CUDA_RESOURCE_VIEW_DESC* pResViewDesc);
|
||||||
|
typedef CUresult CUDAAPI tcuTexObjectDestroy(CUtexObject texObject);
|
||||||
|
|
||||||
|
typedef CUresult CUDAAPI tcuGLGetDevices_v2(unsigned int* pCudaDeviceCount, CUdevice* pCudaDevices, unsigned int cudaDeviceCount, CUGLDeviceList deviceList);
|
||||||
|
typedef CUresult CUDAAPI tcuGraphicsGLRegisterImage(CUgraphicsResource* pCudaResource, GLuint image, GLenum target, unsigned int Flags);
|
||||||
|
typedef CUresult CUDAAPI tcuGraphicsUnregisterResource(CUgraphicsResource resource);
|
||||||
|
typedef CUresult CUDAAPI tcuGraphicsMapResources(unsigned int count, CUgraphicsResource* resources, CUstream hStream);
|
||||||
|
typedef CUresult CUDAAPI tcuGraphicsUnmapResources(unsigned int count, CUgraphicsResource* resources, CUstream hStream);
|
||||||
|
typedef CUresult CUDAAPI tcuGraphicsSubResourceGetMappedArray(CUarray* pArray, CUgraphicsResource resource, unsigned int arrayIndex, unsigned int mipLevel);
|
||||||
|
|
||||||
|
typedef CUresult CUDAAPI tcuImportExternalMemory(CUexternalMemory* extMem_out, const CUDA_EXTERNAL_MEMORY_HANDLE_DESC* memHandleDesc);
|
||||||
|
typedef CUresult CUDAAPI tcuDestroyExternalMemory(CUexternalMemory extMem);
|
||||||
|
typedef CUresult CUDAAPI tcuExternalMemoryGetMappedBuffer(CUdeviceptr* devPtr, CUexternalMemory extMem, const CUDA_EXTERNAL_MEMORY_BUFFER_DESC* bufferDesc);
|
||||||
|
typedef CUresult CUDAAPI tcuExternalMemoryGetMappedMipmappedArray(CUmipmappedArray* mipmap, CUexternalMemory extMem, const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC* mipmapDesc);
|
||||||
|
typedef CUresult CUDAAPI tcuMipmappedArrayGetLevel(CUarray* pLevelArray, CUmipmappedArray hMipmappedArray, unsigned int level);
|
||||||
|
typedef CUresult CUDAAPI tcuMipmappedArrayDestroy(CUmipmappedArray hMipmappedArray);
|
||||||
|
|
||||||
|
typedef CUresult CUDAAPI tcuImportExternalSemaphore(CUexternalSemaphore* extSem_out, const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC* semHandleDesc);
|
||||||
|
typedef CUresult CUDAAPI tcuDestroyExternalSemaphore(CUexternalSemaphore extSem);
|
||||||
|
typedef CUresult CUDAAPI tcuSignalExternalSemaphoresAsync(const CUexternalSemaphore* extSemArray, const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS* paramsArray, unsigned int numExtSems, CUstream stream);
|
||||||
|
typedef CUresult CUDAAPI tcuWaitExternalSemaphoresAsync(const CUexternalSemaphore* extSemArray, const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS* paramsArray, unsigned int numExtSems, CUstream stream);
|
||||||
|
#endif
|
@ -0,0 +1,340 @@
|
|||||||
|
/*
|
||||||
|
* This copyright notice applies to this header file only:
|
||||||
|
*
|
||||||
|
* Copyright (c) 2016
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the software, and to permit persons to whom the
|
||||||
|
* software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef FFNV_CUDA_DYNLINK_LOADER_H
|
||||||
|
#define FFNV_CUDA_DYNLINK_LOADER_H
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include "dynlink_cuda.h"
|
||||||
|
#include "nvEncodeAPI.h"
|
||||||
|
|
||||||
|
#if defined(_WIN32) && (!defined(FFNV_LOAD_FUNC) || !defined(FFNV_SYM_FUNC) || !defined(FFNV_LIB_HANDLE))
|
||||||
|
# include <windows.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef FFNV_LIB_HANDLE
|
||||||
|
# if defined(_WIN32)
|
||||||
|
# define FFNV_LIB_HANDLE HMODULE
|
||||||
|
# else
|
||||||
|
# define FFNV_LIB_HANDLE void*
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(_WIN32) || defined(__CYGWIN__)
|
||||||
|
# define CUDA_LIBNAME "nvcuda.dll"
|
||||||
|
# define NVCUVID_LIBNAME "nvcuvid.dll"
|
||||||
|
# if defined(_WIN64) || defined(__CYGWIN64__)
|
||||||
|
# define NVENC_LIBNAME "nvEncodeAPI64.dll"
|
||||||
|
# else
|
||||||
|
# define NVENC_LIBNAME "nvEncodeAPI.dll"
|
||||||
|
# endif
|
||||||
|
#else
|
||||||
|
# define CUDA_LIBNAME "libcuda.so.1"
|
||||||
|
# define NVCUVID_LIBNAME "libnvcuvid.so.1"
|
||||||
|
# define NVENC_LIBNAME "libnvidia-encode.so.1"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined(FFNV_LOAD_FUNC) || !defined(FFNV_SYM_FUNC)
|
||||||
|
# ifdef _WIN32
|
||||||
|
# define FFNV_LOAD_FUNC(path) LoadLibrary(TEXT(path))
|
||||||
|
# define FFNV_SYM_FUNC(lib, sym) GetProcAddress((lib), (sym))
|
||||||
|
# define FFNV_FREE_FUNC(lib) FreeLibrary(lib)
|
||||||
|
# else
|
||||||
|
# include <dlfcn.h>
|
||||||
|
# define FFNV_LOAD_FUNC(path) dlopen((path), RTLD_LAZY)
|
||||||
|
# define FFNV_SYM_FUNC(lib, sym) dlsym((lib), (sym))
|
||||||
|
# define FFNV_FREE_FUNC(lib) dlclose(lib)
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined(FFNV_LOG_FUNC) || !defined(FFNV_DEBUG_LOG_FUNC)
|
||||||
|
# include <stdio.h>
|
||||||
|
# define FFNV_LOG_FUNC(logctx, msg, ...) fprintf(stderr, (msg), __VA_ARGS__)
|
||||||
|
# define FFNV_DEBUG_LOG_FUNC(logctx, msg, ...)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define LOAD_LIBRARY(l, path) \
|
||||||
|
do { \
|
||||||
|
if (!((l) = FFNV_LOAD_FUNC(path))) { \
|
||||||
|
FFNV_LOG_FUNC(logctx, "Cannot load %s\n", path); \
|
||||||
|
ret = -1; \
|
||||||
|
goto error; \
|
||||||
|
} \
|
||||||
|
FFNV_DEBUG_LOG_FUNC(logctx, "Loaded lib: %s\n", path); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define LOAD_SYMBOL(fun, tp, symbol) \
|
||||||
|
do { \
|
||||||
|
if (!((f->fun) = (tp*)FFNV_SYM_FUNC(f->lib, symbol))) { \
|
||||||
|
FFNV_LOG_FUNC(logctx, "Cannot load %s\n", symbol); \
|
||||||
|
ret = -1; \
|
||||||
|
goto error; \
|
||||||
|
} \
|
||||||
|
FFNV_DEBUG_LOG_FUNC(logctx, "Loaded sym: %s\n", symbol); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define LOAD_SYMBOL_OPT(fun, tp, symbol) \
|
||||||
|
do { \
|
||||||
|
if (!((f->fun) = (tp*)FFNV_SYM_FUNC(f->lib, symbol))) { \
|
||||||
|
FFNV_DEBUG_LOG_FUNC(logctx, "Cannot load optional %s\n", symbol); \
|
||||||
|
} else { \
|
||||||
|
FFNV_DEBUG_LOG_FUNC(logctx, "Loaded sym: %s\n", symbol); \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define GENERIC_LOAD_FUNC_PREAMBLE(T, n, N) \
|
||||||
|
T *f; \
|
||||||
|
int ret; \
|
||||||
|
\
|
||||||
|
n##_free_functions(functions); \
|
||||||
|
\
|
||||||
|
f = *functions = (T*)calloc(1, sizeof(*f)); \
|
||||||
|
if (!f) \
|
||||||
|
return -1; \
|
||||||
|
\
|
||||||
|
LOAD_LIBRARY(f->lib, N);
|
||||||
|
|
||||||
|
#define GENERIC_LOAD_FUNC_FINALE(n) \
|
||||||
|
return 0; \
|
||||||
|
error: \
|
||||||
|
n##_free_functions(functions); \
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
#define GENERIC_FREE_FUNC() \
|
||||||
|
if (!functions) \
|
||||||
|
return; \
|
||||||
|
if (*functions && (*functions)->lib) \
|
||||||
|
FFNV_FREE_FUNC((*functions)->lib); \
|
||||||
|
free(*functions); \
|
||||||
|
*functions = NULL;
|
||||||
|
|
||||||
|
#ifdef FFNV_DYNLINK_CUDA_H
|
||||||
|
typedef struct CudaFunctions {
|
||||||
|
tcuInit *cuInit;
|
||||||
|
tcuDeviceGetCount *cuDeviceGetCount;
|
||||||
|
tcuDeviceGet *cuDeviceGet;
|
||||||
|
tcuDeviceGetAttribute *cuDeviceGetAttribute;
|
||||||
|
tcuDeviceGetName *cuDeviceGetName;
|
||||||
|
tcuDeviceGetUuid *cuDeviceGetUuid;
|
||||||
|
tcuDeviceComputeCapability *cuDeviceComputeCapability;
|
||||||
|
tcuCtxCreate_v2 *cuCtxCreate;
|
||||||
|
tcuCtxSetLimit *cuCtxSetLimit;
|
||||||
|
tcuCtxPushCurrent_v2 *cuCtxPushCurrent;
|
||||||
|
tcuCtxPopCurrent_v2 *cuCtxPopCurrent;
|
||||||
|
tcuCtxDestroy_v2 *cuCtxDestroy;
|
||||||
|
tcuMemAlloc_v2 *cuMemAlloc;
|
||||||
|
tcuMemAllocPitch_v2 *cuMemAllocPitch;
|
||||||
|
tcuMemsetD8Async *cuMemsetD8Async;
|
||||||
|
tcuMemFree_v2 *cuMemFree;
|
||||||
|
tcuMemcpy *cuMemcpy;
|
||||||
|
tcuMemcpyAsync *cuMemcpyAsync;
|
||||||
|
tcuMemcpy2D_v2 *cuMemcpy2D;
|
||||||
|
tcuMemcpy2DAsync_v2 *cuMemcpy2DAsync;
|
||||||
|
tcuGetErrorName *cuGetErrorName;
|
||||||
|
tcuGetErrorString *cuGetErrorString;
|
||||||
|
tcuCtxGetDevice *cuCtxGetDevice;
|
||||||
|
|
||||||
|
tcuDevicePrimaryCtxRetain *cuDevicePrimaryCtxRetain;
|
||||||
|
tcuDevicePrimaryCtxRelease *cuDevicePrimaryCtxRelease;
|
||||||
|
tcuDevicePrimaryCtxSetFlags *cuDevicePrimaryCtxSetFlags;
|
||||||
|
tcuDevicePrimaryCtxGetState *cuDevicePrimaryCtxGetState;
|
||||||
|
tcuDevicePrimaryCtxReset *cuDevicePrimaryCtxReset;
|
||||||
|
|
||||||
|
tcuStreamCreate *cuStreamCreate;
|
||||||
|
tcuStreamQuery *cuStreamQuery;
|
||||||
|
tcuStreamSynchronize *cuStreamSynchronize;
|
||||||
|
tcuStreamDestroy_v2 *cuStreamDestroy;
|
||||||
|
tcuStreamAddCallback *cuStreamAddCallback;
|
||||||
|
tcuEventCreate *cuEventCreate;
|
||||||
|
tcuEventDestroy_v2 *cuEventDestroy;
|
||||||
|
tcuEventSynchronize *cuEventSynchronize;
|
||||||
|
tcuEventQuery *cuEventQuery;
|
||||||
|
tcuEventRecord *cuEventRecord;
|
||||||
|
|
||||||
|
tcuLaunchKernel *cuLaunchKernel;
|
||||||
|
tcuLinkCreate *cuLinkCreate;
|
||||||
|
tcuLinkAddData *cuLinkAddData;
|
||||||
|
tcuLinkComplete *cuLinkComplete;
|
||||||
|
tcuLinkDestroy *cuLinkDestroy;
|
||||||
|
tcuModuleLoadData *cuModuleLoadData;
|
||||||
|
tcuModuleUnload *cuModuleUnload;
|
||||||
|
tcuModuleGetFunction *cuModuleGetFunction;
|
||||||
|
tcuModuleGetGlobal *cuModuleGetGlobal;
|
||||||
|
tcuTexObjectCreate *cuTexObjectCreate;
|
||||||
|
tcuTexObjectDestroy *cuTexObjectDestroy;
|
||||||
|
|
||||||
|
tcuGLGetDevices_v2 *cuGLGetDevices;
|
||||||
|
tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage;
|
||||||
|
tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource;
|
||||||
|
tcuGraphicsMapResources *cuGraphicsMapResources;
|
||||||
|
tcuGraphicsUnmapResources *cuGraphicsUnmapResources;
|
||||||
|
tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray;
|
||||||
|
|
||||||
|
tcuImportExternalMemory *cuImportExternalMemory;
|
||||||
|
tcuDestroyExternalMemory *cuDestroyExternalMemory;
|
||||||
|
tcuExternalMemoryGetMappedBuffer *cuExternalMemoryGetMappedBuffer;
|
||||||
|
tcuExternalMemoryGetMappedMipmappedArray *cuExternalMemoryGetMappedMipmappedArray;
|
||||||
|
tcuMipmappedArrayDestroy *cuMipmappedArrayDestroy;
|
||||||
|
|
||||||
|
tcuMipmappedArrayGetLevel *cuMipmappedArrayGetLevel;
|
||||||
|
|
||||||
|
tcuImportExternalSemaphore *cuImportExternalSemaphore;
|
||||||
|
tcuDestroyExternalSemaphore *cuDestroyExternalSemaphore;
|
||||||
|
tcuSignalExternalSemaphoresAsync *cuSignalExternalSemaphoresAsync;
|
||||||
|
tcuWaitExternalSemaphoresAsync *cuWaitExternalSemaphoresAsync;
|
||||||
|
|
||||||
|
FFNV_LIB_HANDLE lib;
|
||||||
|
} CudaFunctions;
|
||||||
|
#else
|
||||||
|
typedef struct CudaFunctions CudaFunctions;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef NVENCSTATUS NVENCAPI tNvEncodeAPICreateInstance(NV_ENCODE_API_FUNCTION_LIST *functionList);
|
||||||
|
typedef NVENCSTATUS NVENCAPI tNvEncodeAPIGetMaxSupportedVersion(uint32_t* version);
|
||||||
|
|
||||||
|
typedef struct NvencFunctions {
|
||||||
|
tNvEncodeAPICreateInstance *NvEncodeAPICreateInstance;
|
||||||
|
tNvEncodeAPIGetMaxSupportedVersion *NvEncodeAPIGetMaxSupportedVersion;
|
||||||
|
|
||||||
|
FFNV_LIB_HANDLE lib;
|
||||||
|
} NvencFunctions;
|
||||||
|
|
||||||
|
#ifdef FFNV_DYNLINK_CUDA_H
|
||||||
|
static inline void cuda_free_functions(CudaFunctions **functions)
|
||||||
|
{
|
||||||
|
GENERIC_FREE_FUNC();
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static inline void nvenc_free_functions(NvencFunctions **functions)
|
||||||
|
{
|
||||||
|
GENERIC_FREE_FUNC();
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef FFNV_DYNLINK_CUDA_H
|
||||||
|
static inline int cuda_load_functions(CudaFunctions **functions)
|
||||||
|
{
|
||||||
|
GENERIC_LOAD_FUNC_PREAMBLE(CudaFunctions, cuda, CUDA_LIBNAME);
|
||||||
|
|
||||||
|
LOAD_SYMBOL(cuInit, tcuInit, "cuInit");
|
||||||
|
LOAD_SYMBOL(cuDeviceGetCount, tcuDeviceGetCount, "cuDeviceGetCount");
|
||||||
|
LOAD_SYMBOL(cuDeviceGet, tcuDeviceGet, "cuDeviceGet");
|
||||||
|
LOAD_SYMBOL(cuDeviceGetAttribute, tcuDeviceGetAttribute, "cuDeviceGetAttribute");
|
||||||
|
LOAD_SYMBOL(cuDeviceGetName, tcuDeviceGetName, "cuDeviceGetName");
|
||||||
|
LOAD_SYMBOL(cuDeviceComputeCapability, tcuDeviceComputeCapability, "cuDeviceComputeCapability");
|
||||||
|
LOAD_SYMBOL(cuCtxCreate, tcuCtxCreate_v2, "cuCtxCreate_v2");
|
||||||
|
LOAD_SYMBOL(cuCtxSetLimit, tcuCtxSetLimit, "cuCtxSetLimit");
|
||||||
|
LOAD_SYMBOL(cuCtxPushCurrent, tcuCtxPushCurrent_v2, "cuCtxPushCurrent_v2");
|
||||||
|
LOAD_SYMBOL(cuCtxPopCurrent, tcuCtxPopCurrent_v2, "cuCtxPopCurrent_v2");
|
||||||
|
LOAD_SYMBOL(cuCtxDestroy, tcuCtxDestroy_v2, "cuCtxDestroy_v2");
|
||||||
|
LOAD_SYMBOL(cuMemAlloc, tcuMemAlloc_v2, "cuMemAlloc_v2");
|
||||||
|
LOAD_SYMBOL(cuMemAllocPitch, tcuMemAllocPitch_v2, "cuMemAllocPitch_v2");
|
||||||
|
LOAD_SYMBOL(cuMemsetD8Async, tcuMemsetD8Async, "cuMemsetD8Async");
|
||||||
|
LOAD_SYMBOL(cuMemFree, tcuMemFree_v2, "cuMemFree_v2");
|
||||||
|
LOAD_SYMBOL(cuMemcpy, tcuMemcpy, "cuMemcpy");
|
||||||
|
LOAD_SYMBOL(cuMemcpyAsync, tcuMemcpyAsync, "cuMemcpyAsync");
|
||||||
|
LOAD_SYMBOL(cuMemcpy2D, tcuMemcpy2D_v2, "cuMemcpy2D_v2");
|
||||||
|
LOAD_SYMBOL(cuMemcpy2DAsync, tcuMemcpy2DAsync_v2, "cuMemcpy2DAsync_v2");
|
||||||
|
LOAD_SYMBOL(cuGetErrorName, tcuGetErrorName, "cuGetErrorName");
|
||||||
|
LOAD_SYMBOL(cuGetErrorString, tcuGetErrorString, "cuGetErrorString");
|
||||||
|
LOAD_SYMBOL(cuCtxGetDevice, tcuCtxGetDevice, "cuCtxGetDevice");
|
||||||
|
|
||||||
|
LOAD_SYMBOL(cuDevicePrimaryCtxRetain, tcuDevicePrimaryCtxRetain, "cuDevicePrimaryCtxRetain");
|
||||||
|
LOAD_SYMBOL(cuDevicePrimaryCtxRelease, tcuDevicePrimaryCtxRelease, "cuDevicePrimaryCtxRelease");
|
||||||
|
LOAD_SYMBOL(cuDevicePrimaryCtxSetFlags, tcuDevicePrimaryCtxSetFlags, "cuDevicePrimaryCtxSetFlags");
|
||||||
|
LOAD_SYMBOL(cuDevicePrimaryCtxGetState, tcuDevicePrimaryCtxGetState, "cuDevicePrimaryCtxGetState");
|
||||||
|
LOAD_SYMBOL(cuDevicePrimaryCtxReset, tcuDevicePrimaryCtxReset, "cuDevicePrimaryCtxReset");
|
||||||
|
|
||||||
|
LOAD_SYMBOL(cuStreamCreate, tcuStreamCreate, "cuStreamCreate");
|
||||||
|
LOAD_SYMBOL(cuStreamQuery, tcuStreamQuery, "cuStreamQuery");
|
||||||
|
LOAD_SYMBOL(cuStreamSynchronize, tcuStreamSynchronize, "cuStreamSynchronize");
|
||||||
|
LOAD_SYMBOL(cuStreamDestroy, tcuStreamDestroy_v2, "cuStreamDestroy_v2");
|
||||||
|
LOAD_SYMBOL(cuStreamAddCallback, tcuStreamAddCallback, "cuStreamAddCallback");
|
||||||
|
LOAD_SYMBOL(cuEventCreate, tcuEventCreate, "cuEventCreate");
|
||||||
|
LOAD_SYMBOL(cuEventDestroy, tcuEventDestroy_v2, "cuEventDestroy_v2");
|
||||||
|
LOAD_SYMBOL(cuEventSynchronize, tcuEventSynchronize, "cuEventSynchronize");
|
||||||
|
LOAD_SYMBOL(cuEventQuery, tcuEventQuery, "cuEventQuery");
|
||||||
|
LOAD_SYMBOL(cuEventRecord, tcuEventRecord, "cuEventRecord");
|
||||||
|
|
||||||
|
LOAD_SYMBOL(cuLaunchKernel, tcuLaunchKernel, "cuLaunchKernel");
|
||||||
|
LOAD_SYMBOL(cuLinkCreate, tcuLinkCreate, "cuLinkCreate");
|
||||||
|
LOAD_SYMBOL(cuLinkAddData, tcuLinkAddData, "cuLinkAddData");
|
||||||
|
LOAD_SYMBOL(cuLinkComplete, tcuLinkComplete, "cuLinkComplete");
|
||||||
|
LOAD_SYMBOL(cuLinkDestroy, tcuLinkDestroy, "cuLinkDestroy");
|
||||||
|
LOAD_SYMBOL(cuModuleLoadData, tcuModuleLoadData, "cuModuleLoadData");
|
||||||
|
LOAD_SYMBOL(cuModuleUnload, tcuModuleUnload, "cuModuleUnload");
|
||||||
|
LOAD_SYMBOL(cuModuleGetFunction, tcuModuleGetFunction, "cuModuleGetFunction");
|
||||||
|
LOAD_SYMBOL(cuModuleGetGlobal, tcuModuleGetGlobal, "cuModuleGetGlobal");
|
||||||
|
LOAD_SYMBOL(cuTexObjectCreate, tcuTexObjectCreate, "cuTexObjectCreate");
|
||||||
|
LOAD_SYMBOL(cuTexObjectDestroy, tcuTexObjectDestroy, "cuTexObjectDestroy");
|
||||||
|
|
||||||
|
LOAD_SYMBOL(cuGLGetDevices, tcuGLGetDevices_v2, "cuGLGetDevices_v2");
|
||||||
|
LOAD_SYMBOL(cuGraphicsGLRegisterImage, tcuGraphicsGLRegisterImage, "cuGraphicsGLRegisterImage");
|
||||||
|
LOAD_SYMBOL(cuGraphicsUnregisterResource, tcuGraphicsUnregisterResource, "cuGraphicsUnregisterResource");
|
||||||
|
LOAD_SYMBOL(cuGraphicsMapResources, tcuGraphicsMapResources, "cuGraphicsMapResources");
|
||||||
|
LOAD_SYMBOL(cuGraphicsUnmapResources, tcuGraphicsUnmapResources, "cuGraphicsUnmapResources");
|
||||||
|
LOAD_SYMBOL(cuGraphicsSubResourceGetMappedArray, tcuGraphicsSubResourceGetMappedArray, "cuGraphicsSubResourceGetMappedArray");
|
||||||
|
|
||||||
|
LOAD_SYMBOL_OPT(cuDeviceGetUuid, tcuDeviceGetUuid, "cuDeviceGetUuid");
|
||||||
|
LOAD_SYMBOL_OPT(cuImportExternalMemory, tcuImportExternalMemory, "cuImportExternalMemory");
|
||||||
|
LOAD_SYMBOL_OPT(cuDestroyExternalMemory, tcuDestroyExternalMemory, "cuDestroyExternalMemory");
|
||||||
|
LOAD_SYMBOL_OPT(cuExternalMemoryGetMappedBuffer, tcuExternalMemoryGetMappedBuffer, "cuExternalMemoryGetMappedBuffer");
|
||||||
|
LOAD_SYMBOL_OPT(cuExternalMemoryGetMappedMipmappedArray, tcuExternalMemoryGetMappedMipmappedArray, "cuExternalMemoryGetMappedMipmappedArray");
|
||||||
|
LOAD_SYMBOL_OPT(cuMipmappedArrayGetLevel, tcuMipmappedArrayGetLevel, "cuMipmappedArrayGetLevel");
|
||||||
|
LOAD_SYMBOL_OPT(cuMipmappedArrayDestroy, tcuMipmappedArrayDestroy, "cuMipmappedArrayDestroy");
|
||||||
|
|
||||||
|
LOAD_SYMBOL_OPT(cuImportExternalSemaphore, tcuImportExternalSemaphore, "cuImportExternalSemaphore");
|
||||||
|
LOAD_SYMBOL_OPT(cuDestroyExternalSemaphore, tcuDestroyExternalSemaphore, "cuDestroyExternalSemaphore");
|
||||||
|
LOAD_SYMBOL_OPT(cuSignalExternalSemaphoresAsync, tcuSignalExternalSemaphoresAsync, "cuSignalExternalSemaphoresAsync");
|
||||||
|
LOAD_SYMBOL_OPT(cuWaitExternalSemaphoresAsync, tcuWaitExternalSemaphoresAsync, "cuWaitExternalSemaphoresAsync");
|
||||||
|
|
||||||
|
GENERIC_LOAD_FUNC_FINALE(cuda);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static inline int nvenc_load_functions(NvencFunctions **functions)
|
||||||
|
{
|
||||||
|
GENERIC_LOAD_FUNC_PREAMBLE(NvencFunctions, nvenc, NVENC_LIBNAME);
|
||||||
|
|
||||||
|
LOAD_SYMBOL(NvEncodeAPICreateInstance, tNvEncodeAPICreateInstance, "NvEncodeAPICreateInstance");
|
||||||
|
LOAD_SYMBOL(NvEncodeAPIGetMaxSupportedVersion, tNvEncodeAPIGetMaxSupportedVersion, "NvEncodeAPIGetMaxSupportedVersion");
|
||||||
|
|
||||||
|
GENERIC_LOAD_FUNC_FINALE(nvenc);
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef GENERIC_LOAD_FUNC_PREAMBLE
|
||||||
|
#undef LOAD_LIBRARY
|
||||||
|
#undef LOAD_SYMBOL
|
||||||
|
#undef GENERIC_LOAD_FUNC_FINALE
|
||||||
|
#undef GENERIC_FREE_FUNC
|
||||||
|
#undef CUDA_LIBNAME
|
||||||
|
#undef NVCUVID_LIBNAME
|
||||||
|
#undef NVENC_LIBNAME
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,453 @@
|
|||||||
|
/*
|
||||||
|
* H.264/HEVC hardware encoding using nvidia nvenc
|
||||||
|
* Copyright (c) 2016 Timo Rothenpieler <timo@rothenpieler.org>
|
||||||
|
*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <rfb/LogWriter.h>
|
||||||
|
|
||||||
|
#include <dlfcn.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "nvidia.h"
|
||||||
|
|
||||||
|
using namespace rfb;
|
||||||
|
|
||||||
|
static LogWriter vlog("nvidia");
|
||||||
|
|
||||||
|
#define FFNV_LOG_FUNC(logctx, msg, ...) vlog.info((msg), __VA_ARGS__)
|
||||||
|
#define FFNV_DEBUG_LOG_FUNC(logctx, msg, ...)
|
||||||
|
|
||||||
|
#include "dynlink_loader.h"
|
||||||
|
|
||||||
|
#define NUM_SURF 4
|
||||||
|
|
||||||
|
typedef struct NvencSurface
|
||||||
|
{
|
||||||
|
NV_ENC_INPUT_PTR input_surface;
|
||||||
|
int reg_idx;
|
||||||
|
int width;
|
||||||
|
int height;
|
||||||
|
int pitch;
|
||||||
|
|
||||||
|
NV_ENC_OUTPUT_PTR output_surface;
|
||||||
|
NV_ENC_BUFFER_FORMAT format;
|
||||||
|
} NvencSurface;
|
||||||
|
|
||||||
|
typedef struct NvencDynLoadFunctions
|
||||||
|
{
|
||||||
|
CudaFunctions *cuda_dl;
|
||||||
|
NvencFunctions *nvenc_dl;
|
||||||
|
|
||||||
|
void *nvenc_ctx;
|
||||||
|
NV_ENCODE_API_FUNCTION_LIST nvenc_funcs;
|
||||||
|
|
||||||
|
NV_ENC_INITIALIZE_PARAMS init_enc_parms;
|
||||||
|
NV_ENC_CONFIG enc_cfg;
|
||||||
|
CUdevice cu_dev;
|
||||||
|
CUcontext cu_ctx;
|
||||||
|
|
||||||
|
NvencSurface surf[NUM_SURF];
|
||||||
|
uint8_t cursurf;
|
||||||
|
} NvencDynLoadFunctions;
|
||||||
|
|
||||||
|
static NvencDynLoadFunctions nvenc;
|
||||||
|
|
||||||
|
/*
|
||||||
|
Recommended settings for streaming
|
||||||
|
Low-Latency High Quality preset
|
||||||
|
Rate control mode = Two-pass CBR
|
||||||
|
Very low VBV buffer size (Single frame)
|
||||||
|
No B Frames
|
||||||
|
Infinite GOP length
|
||||||
|
Adaptive Quantization enabled
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int loadfuncs() {
|
||||||
|
int ret;
|
||||||
|
NVENCSTATUS err;
|
||||||
|
uint32_t nvenc_max_ver;
|
||||||
|
|
||||||
|
ret = cuda_load_functions(&nvenc.cuda_dl);
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
ret = nvenc_load_functions(&nvenc.nvenc_dl);
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
err = nvenc.nvenc_dl->NvEncodeAPIGetMaxSupportedVersion(&nvenc_max_ver);
|
||||||
|
if (err != NV_ENC_SUCCESS)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
vlog.info("Loaded nvenc version %u.%u", nvenc_max_ver >> 4, nvenc_max_ver & 0xf);
|
||||||
|
|
||||||
|
if ((NVENCAPI_MAJOR_VERSION << 4 | NVENCAPI_MINOR_VERSION) > nvenc_max_ver) {
|
||||||
|
vlog.error("Your Nvidia driver is too old. Nvenc %u.%u required",
|
||||||
|
NVENCAPI_MAJOR_VERSION, NVENCAPI_MINOR_VERSION);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
nvenc.nvenc_funcs.version = NV_ENCODE_API_FUNCTION_LIST_VER;
|
||||||
|
|
||||||
|
err = nvenc.nvenc_dl->NvEncodeAPICreateInstance(&nvenc.nvenc_funcs);
|
||||||
|
if (err != NV_ENC_SUCCESS)
|
||||||
|
return -1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int nvenc_check_cap(NV_ENC_CAPS cap) {
|
||||||
|
NV_ENC_CAPS_PARAM params;
|
||||||
|
memset(¶ms, 0, sizeof(NV_ENC_CAPS_PARAM));
|
||||||
|
|
||||||
|
params.version = NV_ENC_CAPS_PARAM_VER;
|
||||||
|
params.capsToQuery = cap;
|
||||||
|
|
||||||
|
int ret, val = 0;
|
||||||
|
|
||||||
|
ret = nvenc.nvenc_funcs.nvEncGetEncodeCaps(nvenc.nvenc_ctx,
|
||||||
|
nvenc.init_enc_parms.encodeGUID,
|
||||||
|
¶ms, &val);
|
||||||
|
if (ret == NV_ENC_SUCCESS)
|
||||||
|
return val;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int setupdevice() {
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
nvenc.init_enc_parms.encodeGUID = NV_ENC_CODEC_H264_GUID;
|
||||||
|
nvenc.init_enc_parms.presetGUID = NV_ENC_PRESET_P7_GUID;
|
||||||
|
|
||||||
|
ret = nvenc.cuda_dl->cuInit(0);
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
ret = nvenc.cuda_dl->cuDeviceGet(&nvenc.cu_dev, 0);
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
ret = nvenc.cuda_dl->cuCtxCreate(&nvenc.cu_ctx, CU_CTX_SCHED_BLOCKING_SYNC,
|
||||||
|
nvenc.cu_dev);
|
||||||
|
if (ret < 0)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
CUcontext dummy;
|
||||||
|
nvenc.cuda_dl->cuCtxPopCurrent(&dummy);
|
||||||
|
|
||||||
|
// cuda stream is NULL to use the default
|
||||||
|
|
||||||
|
// open session
|
||||||
|
NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS params;
|
||||||
|
memset(¶ms, 0, sizeof(NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS));
|
||||||
|
NVENCSTATUS err;
|
||||||
|
|
||||||
|
params.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER;
|
||||||
|
params.apiVersion = NVENCAPI_VERSION;
|
||||||
|
params.device = nvenc.cu_ctx;
|
||||||
|
params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
|
||||||
|
|
||||||
|
err = nvenc.nvenc_funcs.nvEncOpenEncodeSessionEx(¶ms, &nvenc.nvenc_ctx);
|
||||||
|
if (err != NV_ENC_SUCCESS)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
// check caps
|
||||||
|
const int maxw = nvenc_check_cap(NV_ENC_CAPS_WIDTH_MAX);
|
||||||
|
const int maxh = nvenc_check_cap(NV_ENC_CAPS_HEIGHT_MAX);
|
||||||
|
const int minw = nvenc_check_cap(NV_ENC_CAPS_WIDTH_MIN);
|
||||||
|
const int minh = nvenc_check_cap(NV_ENC_CAPS_HEIGHT_MIN);
|
||||||
|
|
||||||
|
vlog.info("Max enc resolution %ux%u, min %ux%u\n", maxw, maxh, minw, minh);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int setupenc(const unsigned w, const unsigned h, const unsigned kbps,
|
||||||
|
const unsigned fps) {
|
||||||
|
NVENCSTATUS err;
|
||||||
|
|
||||||
|
nvenc.enc_cfg.version = NV_ENC_CONFIG_VER;
|
||||||
|
nvenc.init_enc_parms.version = NV_ENC_INITIALIZE_PARAMS_VER;
|
||||||
|
nvenc.init_enc_parms.darWidth =
|
||||||
|
nvenc.init_enc_parms.encodeWidth = w;
|
||||||
|
nvenc.init_enc_parms.darHeight =
|
||||||
|
nvenc.init_enc_parms.encodeHeight = h;
|
||||||
|
|
||||||
|
nvenc.init_enc_parms.frameRateNum = fps;
|
||||||
|
nvenc.init_enc_parms.frameRateDen = 1;
|
||||||
|
|
||||||
|
nvenc.init_enc_parms.encodeConfig = &nvenc.enc_cfg;
|
||||||
|
nvenc.init_enc_parms.tuningInfo = NV_ENC_TUNING_INFO_LOW_LATENCY;
|
||||||
|
|
||||||
|
NV_ENC_PRESET_CONFIG preset_cfg;
|
||||||
|
memset(&preset_cfg, 0, sizeof(NV_ENC_PRESET_CONFIG));
|
||||||
|
|
||||||
|
preset_cfg.version = NV_ENC_PRESET_CONFIG_VER;
|
||||||
|
preset_cfg.presetCfg.version = NV_ENC_CONFIG_VER;
|
||||||
|
|
||||||
|
err = nvenc.nvenc_funcs.nvEncGetEncodePresetConfigEx(nvenc.nvenc_ctx,
|
||||||
|
nvenc.init_enc_parms.encodeGUID,
|
||||||
|
nvenc.init_enc_parms.presetGUID,
|
||||||
|
nvenc.init_enc_parms.tuningInfo,
|
||||||
|
&preset_cfg);
|
||||||
|
if (err != NV_ENC_SUCCESS)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
memcpy(&nvenc.enc_cfg, &preset_cfg.presetCfg, sizeof(nvenc.enc_cfg));
|
||||||
|
|
||||||
|
nvenc.enc_cfg.version = NV_ENC_CONFIG_VER;
|
||||||
|
|
||||||
|
nvenc.init_enc_parms.enableEncodeAsync = 0;
|
||||||
|
nvenc.init_enc_parms.enablePTD = 1;
|
||||||
|
|
||||||
|
nvenc.enc_cfg.frameIntervalP = 0;
|
||||||
|
nvenc.enc_cfg.gopLength = 1;
|
||||||
|
|
||||||
|
// use 4 surfaces
|
||||||
|
|
||||||
|
// setup rate control
|
||||||
|
nvenc.enc_cfg.rcParams.multiPass = NV_ENC_TWO_PASS_FULL_RESOLUTION;
|
||||||
|
nvenc.enc_cfg.rcParams.averageBitRate = kbps * 1024;
|
||||||
|
nvenc.enc_cfg.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR;
|
||||||
|
nvenc.enc_cfg.rcParams.lowDelayKeyFrameScale = 1;
|
||||||
|
|
||||||
|
nvenc.enc_cfg.rcParams.enableAQ = 1;
|
||||||
|
nvenc.enc_cfg.rcParams.aqStrength = 4; // 1 - 15, 0 would be auto
|
||||||
|
|
||||||
|
nvenc.enc_cfg.frameFieldMode = NV_ENC_PARAMS_FRAME_FIELD_MODE_FRAME;
|
||||||
|
|
||||||
|
// setup_codec_config
|
||||||
|
nvenc.enc_cfg.encodeCodecConfig.h264Config.h264VUIParameters.videoFullRangeFlag = 1;
|
||||||
|
nvenc.enc_cfg.encodeCodecConfig.h264Config.outputBufferingPeriodSEI = 1;
|
||||||
|
nvenc.enc_cfg.encodeCodecConfig.h264Config.adaptiveTransformMode = NV_ENC_H264_ADAPTIVE_TRANSFORM_ENABLE;
|
||||||
|
nvenc.enc_cfg.encodeCodecConfig.h264Config.fmoMode = NV_ENC_H264_FMO_DISABLE;
|
||||||
|
nvenc.enc_cfg.profileGUID = NV_ENC_H264_PROFILE_MAIN_GUID;
|
||||||
|
|
||||||
|
nvenc.cuda_dl->cuCtxPushCurrent(nvenc.cu_ctx);
|
||||||
|
|
||||||
|
err = nvenc.nvenc_funcs.nvEncInitializeEncoder(nvenc.nvenc_ctx,
|
||||||
|
&nvenc.init_enc_parms);
|
||||||
|
if (err != NV_ENC_SUCCESS)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
// custream?
|
||||||
|
|
||||||
|
CUcontext dummy;
|
||||||
|
nvenc.cuda_dl->cuCtxPopCurrent(&dummy);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int setupsurf(const unsigned w, const unsigned h) {
|
||||||
|
|
||||||
|
nvenc.cuda_dl->cuCtxPushCurrent(nvenc.cu_ctx);
|
||||||
|
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < NUM_SURF; i++) {
|
||||||
|
NVENCSTATUS err;
|
||||||
|
NV_ENC_CREATE_BITSTREAM_BUFFER allocOut;
|
||||||
|
memset(&allocOut, 0, sizeof(NV_ENC_CREATE_BITSTREAM_BUFFER));
|
||||||
|
allocOut.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER;
|
||||||
|
|
||||||
|
NV_ENC_CREATE_INPUT_BUFFER allocSurf;
|
||||||
|
memset(&allocSurf, 0, sizeof(NV_ENC_CREATE_INPUT_BUFFER));
|
||||||
|
|
||||||
|
nvenc.surf[i].format = NV_ENC_BUFFER_FORMAT_ABGR; // doesn't have RGBA!
|
||||||
|
allocSurf.version = NV_ENC_CREATE_INPUT_BUFFER_VER;
|
||||||
|
allocSurf.width = w;
|
||||||
|
allocSurf.height = h;
|
||||||
|
allocSurf.bufferFmt = nvenc.surf[i].format;
|
||||||
|
|
||||||
|
err = nvenc.nvenc_funcs.nvEncCreateInputBuffer(nvenc.nvenc_ctx, &allocSurf);
|
||||||
|
if (err != NV_ENC_SUCCESS)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
nvenc.surf[i].input_surface = allocSurf.inputBuffer;
|
||||||
|
nvenc.surf[i].width = allocSurf.width;
|
||||||
|
nvenc.surf[i].height = allocSurf.height;
|
||||||
|
|
||||||
|
// output
|
||||||
|
err = nvenc.nvenc_funcs.nvEncCreateBitstreamBuffer(nvenc.nvenc_ctx, &allocOut);
|
||||||
|
if (err != NV_ENC_SUCCESS)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
nvenc.surf[i].output_surface = allocOut.bitstreamBuffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
CUcontext dummy;
|
||||||
|
nvenc.cuda_dl->cuCtxPopCurrent(&dummy);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int nvenc_frame(const uint8_t *data, unsigned pts, uint8_t *out, uint32_t &outlen) {
|
||||||
|
NVENCSTATUS err;
|
||||||
|
|
||||||
|
NV_ENC_PIC_PARAMS params;
|
||||||
|
memset(¶ms, 0, sizeof(NV_ENC_PIC_PARAMS));
|
||||||
|
params.version = NV_ENC_PIC_PARAMS_VER;
|
||||||
|
params.encodePicFlags = NV_ENC_PIC_FLAG_FORCEINTRA | NV_ENC_PIC_FLAG_OUTPUT_SPSPPS;
|
||||||
|
|
||||||
|
nvenc.cuda_dl->cuCtxPushCurrent(nvenc.cu_ctx);
|
||||||
|
|
||||||
|
NV_ENC_LOCK_INPUT_BUFFER lockBufferParams;
|
||||||
|
memset(&lockBufferParams, 0, sizeof(NV_ENC_LOCK_INPUT_BUFFER));
|
||||||
|
lockBufferParams.version = NV_ENC_LOCK_INPUT_BUFFER_VER;
|
||||||
|
lockBufferParams.inputBuffer = nvenc.surf[nvenc.cursurf].input_surface;
|
||||||
|
|
||||||
|
err = nvenc.nvenc_funcs.nvEncLockInputBuffer(nvenc.nvenc_ctx, &lockBufferParams);
|
||||||
|
if (err != NV_ENC_SUCCESS)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
nvenc.surf[nvenc.cursurf].pitch = lockBufferParams.pitch;
|
||||||
|
vlog.info("pitch %u\n", lockBufferParams.pitch);
|
||||||
|
|
||||||
|
// copy frame
|
||||||
|
unsigned y;
|
||||||
|
uint8_t *dst = (uint8_t *) lockBufferParams.bufferDataPtr;
|
||||||
|
const unsigned linelen = nvenc.surf[nvenc.cursurf].width * 4;
|
||||||
|
for (y = 0; y < (unsigned) nvenc.surf[nvenc.cursurf].height; y++) {
|
||||||
|
memcpy(dst, data, linelen);
|
||||||
|
data += linelen;
|
||||||
|
dst += lockBufferParams.pitch;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = nvenc.nvenc_funcs.nvEncUnlockInputBuffer(nvenc.nvenc_ctx,
|
||||||
|
nvenc.surf[nvenc.cursurf].input_surface);
|
||||||
|
if (err != NV_ENC_SUCCESS)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
CUcontext dummy;
|
||||||
|
nvenc.cuda_dl->cuCtxPopCurrent(&dummy);
|
||||||
|
|
||||||
|
params.inputBuffer = nvenc.surf[nvenc.cursurf].input_surface;
|
||||||
|
params.bufferFmt = nvenc.surf[nvenc.cursurf].format;
|
||||||
|
params.inputWidth = nvenc.surf[nvenc.cursurf].width;
|
||||||
|
params.inputHeight = nvenc.surf[nvenc.cursurf].height;
|
||||||
|
params.inputPitch = nvenc.surf[nvenc.cursurf].pitch;
|
||||||
|
params.outputBitstream = nvenc.surf[nvenc.cursurf].output_surface;
|
||||||
|
params.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
|
||||||
|
params.inputTimeStamp = pts;
|
||||||
|
|
||||||
|
nvenc.cuda_dl->cuCtxPushCurrent(nvenc.cu_ctx);
|
||||||
|
|
||||||
|
err = nvenc.nvenc_funcs.nvEncEncodePicture(nvenc.nvenc_ctx, ¶ms);
|
||||||
|
|
||||||
|
nvenc.cuda_dl->cuCtxPopCurrent(&dummy);
|
||||||
|
|
||||||
|
if (err != NV_ENC_SUCCESS)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
|
||||||
|
nvenc.cuda_dl->cuCtxPushCurrent(nvenc.cu_ctx);
|
||||||
|
|
||||||
|
// Get output
|
||||||
|
NV_ENC_LOCK_BITSTREAM lock_params;
|
||||||
|
memset(&lock_params, 0, sizeof(NV_ENC_LOCK_BITSTREAM));
|
||||||
|
|
||||||
|
lock_params.version = NV_ENC_LOCK_BITSTREAM_VER;
|
||||||
|
lock_params.doNotWait = 0;
|
||||||
|
lock_params.outputBitstream = nvenc.surf[nvenc.cursurf].output_surface;
|
||||||
|
// lock_params.sliceOffsets = slice_offsets; TODO?
|
||||||
|
|
||||||
|
err = nvenc.nvenc_funcs.nvEncLockBitstream(nvenc.nvenc_ctx, &lock_params);
|
||||||
|
if (err != NV_ENC_SUCCESS)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
memcpy(out, lock_params.bitstreamBufferPtr, lock_params.bitstreamSizeInBytes);
|
||||||
|
outlen = lock_params.bitstreamSizeInBytes;
|
||||||
|
|
||||||
|
err = nvenc.nvenc_funcs.nvEncUnlockBitstream(nvenc.nvenc_ctx,
|
||||||
|
nvenc.surf[nvenc.cursurf].output_surface);
|
||||||
|
if (err != NV_ENC_SUCCESS)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
nvenc.cuda_dl->cuCtxPopCurrent(&dummy);
|
||||||
|
|
||||||
|
vlog.info("Pic type %x, idr %x i %x\n", lock_params.pictureType, NV_ENC_PIC_TYPE_IDR,
|
||||||
|
NV_ENC_PIC_TYPE_I);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void unload() {
|
||||||
|
NV_ENC_PIC_PARAMS params;
|
||||||
|
memset(¶ms, 0, sizeof(NV_ENC_PIC_PARAMS));
|
||||||
|
params.version = NV_ENC_PIC_PARAMS_VER;
|
||||||
|
params.encodePicFlags = NV_ENC_PIC_FLAG_EOS;
|
||||||
|
|
||||||
|
nvenc.cuda_dl->cuCtxPushCurrent(nvenc.cu_ctx);
|
||||||
|
|
||||||
|
nvenc.nvenc_funcs.nvEncEncodePicture(nvenc.nvenc_ctx, ¶ms);
|
||||||
|
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < NUM_SURF; i++) {
|
||||||
|
nvenc.nvenc_funcs.nvEncDestroyInputBuffer(nvenc.nvenc_ctx,
|
||||||
|
nvenc.surf[i].input_surface);
|
||||||
|
nvenc.nvenc_funcs.nvEncDestroyBitstreamBuffer(nvenc.nvenc_ctx,
|
||||||
|
nvenc.surf[i].output_surface);
|
||||||
|
}
|
||||||
|
|
||||||
|
nvenc.nvenc_funcs.nvEncDestroyEncoder(nvenc.nvenc_ctx);
|
||||||
|
|
||||||
|
CUcontext dummy;
|
||||||
|
nvenc.cuda_dl->cuCtxPopCurrent(&dummy);
|
||||||
|
|
||||||
|
nvenc.cuda_dl->cuCtxDestroy(nvenc.cu_ctx);
|
||||||
|
|
||||||
|
nvenc_free_functions(&nvenc.nvenc_dl);
|
||||||
|
cuda_free_functions(&nvenc.cuda_dl);
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
int main() {
|
||||||
|
|
||||||
|
unsigned w = 256, h = 256, kbps = 400, fps = 15;
|
||||||
|
|
||||||
|
memset(&nvenc, 0, sizeof(NvencDynLoadFunctions));
|
||||||
|
if (loadfuncs() < 0)
|
||||||
|
return 1;
|
||||||
|
if (setupdevice() < 0)
|
||||||
|
return 1;
|
||||||
|
if (setupenc(w, h, kbps, fps) < 0)
|
||||||
|
return 1;
|
||||||
|
if (setupsurf(w, h) < 0)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
unload();
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
int nvidia_init(const unsigned w, const unsigned h, const unsigned kbps,
|
||||||
|
const unsigned fps) {
|
||||||
|
|
||||||
|
memset(&nvenc, 0, sizeof(NvencDynLoadFunctions));
|
||||||
|
if (loadfuncs() < 0)
|
||||||
|
return 1;
|
||||||
|
if (setupdevice() < 0)
|
||||||
|
return 1;
|
||||||
|
if (setupenc(w, h, kbps, fps) < 0)
|
||||||
|
return 1;
|
||||||
|
if (setupsurf(w, h) < 0)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
@ -0,0 +1,10 @@
|
|||||||
|
#ifndef KASM_NVIDIA_H
|
||||||
|
#define KASM_NVIDIA_H
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
int nvidia_init(const unsigned w, const unsigned h, const unsigned kbps,
|
||||||
|
const unsigned fps);
|
||||||
|
int nvenc_frame(const uint8_t *data, unsigned pts, uint8_t *out, uint32_t &outlen);
|
||||||
|
|
||||||
|
#endif
|
Loading…
Reference in New Issue