介绍

FFmpeg已经提供对 VideoToolBox 的编解码支持；主要涉及到的文件有videotoolbox.c、videotoolbox.h、videotoolboxenc.c、ffmepg_videotoolbox.c。
在编译 FFmpeg 源码时，想要支持VideoToolBox，在 configure 时，需要–enable-videotoolbox 命令。
命令行ffmpeg -hwaccels查看支持哪些硬编码器。
ffmpeg 支持 videotoolbox h264 和 h265 的编码，即 h264_videotoolbox、hevc_videotoolbox。

FFmpeg

FFmpeg 是一个可以处理音视频的软件，功能非常强大，主要包括，编解码转换，封装格式转换，滤镜特效。
FFmpeg支持各种网络协议，支持 RTMP ，RTSP，HLS 等高层协议的推拉流，也支持更底层的TCP/UDP 协议推拉流。
FFmpeg 可以在 Windows，Linux，Mac，iOS，Android等操作系统上运行。
FFmpeg 是 " Fast Forward mpeg " 的缩写；
FFMPEG从功能上划分为几个模块，分别为核心工具（libutils）、媒体格式（libavformat）、编解码（libavcodec）、设备（libavdevice）和后处理（libavfilter, libswscale, libpostproc），分别负责提供公用的功能函数、实现多媒体文件的读包和写包、完成音视频的编解码、管理音视频设备的操作以及进行音视频后处理。

VideoToolBox

VideoToolBox是一个优化的视频编解码器框架，由苹果公司开发并针对iOS和macOS平台进行优化，作为现代移动应用程序中不可或缺的组成部分之一，它被用于H.264解码和编码，HEVC解码和编码，以及MPEG-2解码和编码，同时还支持对Core Audio和Core Video的访问。
VideoToolBox的优点是高效性、易用性；在iOS和macOS设备上，它的编解码速度比其他框架要快得多；此外，它为开发人员提供了各种功能，包括修改视频帧速率，更改编码格式等等。

FFmpeg 硬编码 VideoToolBox 流程

可以看出，FFmpeg 与 VideoToolBox之间的交互，主要通过三个函数指针 init、encode2、close 来完成；
从整体流程分析，VideoToolBox 的工作流程是：
创建一个压缩会话；
添加会话属性；
编码视频帧、接受视频编码回调；
强制完成一些或者全部未处理的视频帧；
释放压缩会话、释放内存资源。
init模块核心函数是 vtenc_configure_encode()；
encode2模块核心函数是vtenc_send_frame()；
close 模块的核心函数是VTCompressionSessionCompleteFrames()；

h264_videotoolbox

VideoToolBox 的h264硬编码通过三个结构体h264_options、h264_videotoolbox_class、ff_h264_videotoolbox_encoder来完成与 FFmpeg 的交互。
h264_options主要涉及的是内部参数，例如 profile、level、熵编码选择等。
h264_videotoolbox_class来定义 h264的私有类，指定编码类型和编码参数。
ff_h264_videotoolbox_encoder是具体的对外与 FFmpeg 的交互结构体，完成h264硬编码。

static const AVOption h264_options[] = {{ "profile", "Profile", OFFSET(profile), AV_OPT_TYPE_INT, { .i64 = H264_PROF_AUTO }, H264_PROF_AUTO, H264_PROF_COUNT, VE, "profile" },{ "baseline", "Baseline Profile", 0, AV_OPT_TYPE_CONST, { .i64 = H264_PROF_BASELINE }, INT_MIN, INT_MAX, VE, "profile" },{ "main",     "Main Profile",     0, AV_OPT_TYPE_CONST, { .i64 = H264_PROF_MAIN     }, INT_MIN, INT_MAX, VE, "profile" },{ "high",     "High Profile",     0, AV_OPT_TYPE_CONST, { .i64 = H264_PROF_HIGH     }, INT_MIN, INT_MAX, VE, "profile" },{ "extended", "Extend Profile",   0, AV_OPT_TYPE_CONST, { .i64 = H264_PROF_EXTENDED }, INT_MIN, INT_MAX, VE, "profile" },{ "level", "Level", OFFSET(level), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 52, VE, "level" },{ "1.3", "Level 1.3, only available with Baseline Profile", 0, AV_OPT_TYPE_CONST, { .i64 = 13 }, INT_MIN, INT_MAX, VE, "level" },{ "3.0", "Level 3.0", 0, AV_OPT_TYPE_CONST, { .i64 = 30 }, INT_MIN, INT_MAX, VE, "level" },{ "3.1", "Level 3.1", 0, AV_OPT_TYPE_CONST, { .i64 = 31 }, INT_MIN, INT_MAX, VE, "level" },{ "3.2", "Level 3.2", 0, AV_OPT_TYPE_CONST, { .i64 = 32 }, INT_MIN, INT_MAX, VE, "level" },{ "4.0", "Level 4.0", 0, AV_OPT_TYPE_CONST, { .i64 = 40 }, INT_MIN, INT_MAX, VE, "level" },{ "4.1", "Level 4.1", 0, AV_OPT_TYPE_CONST, { .i64 = 41 }, INT_MIN, INT_MAX, VE, "level" },{ "4.2", "Level 4.2", 0, AV_OPT_TYPE_CONST, { .i64 = 42 }, INT_MIN, INT_MAX, VE, "level" },{ "5.0", "Level 5.0", 0, AV_OPT_TYPE_CONST, { .i64 = 50 }, INT_MIN, INT_MAX, VE, "level" },{ "5.1", "Level 5.1", 0, AV_OPT_TYPE_CONST, { .i64 = 51 }, INT_MIN, INT_MAX, VE, "level" },{ "5.2", "Level 5.2", 0, AV_OPT_TYPE_CONST, { .i64 = 52 }, INT_MIN, INT_MAX, VE, "level" },{ "coder", "Entropy coding", OFFSET(entropy), AV_OPT_TYPE_INT, { .i64 = VT_ENTROPY_NOT_SET }, VT_ENTROPY_NOT_SET, VT_CABAC, VE, "coder" },{ "cavlc", "CAVLC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CAVLC }, INT_MIN, INT_MAX, VE, "coder" },{ "vlc",   "CAVLC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CAVLC }, INT_MIN, INT_MAX, VE, "coder" },{ "cabac", "CABAC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CABAC }, INT_MIN, INT_MAX, VE, "coder" },{ "ac",    "CABAC entropy coding", 0, AV_OPT_TYPE_CONST, { .i64 = VT_CABAC }, INT_MIN, INT_MAX, VE, "coder" },{ "a53cc", "Use A53 Closed Captions (if available)", OFFSET(a53_cc), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, VE },COMMON_OPTIONS{ NULL },
};static const AVClass h264_videotoolbox_class = {.class_name = "h264_videotoolbox",.item_name  = av_default_item_name,.option     = h264_options,.version    = LIBAVUTIL_VERSION_INT,
};AVCodec ff_h264_videotoolbox_encoder = {.name             = "h264_videotoolbox",.long_name        = NULL_IF_CONFIG_SMALL("VideoToolbox H.264 Encoder"),.type             = AVMEDIA_TYPE_VIDEO,.id               = AV_CODEC_ID_H264,.priv_data_size   = sizeof(VTEncContext),.pix_fmts         = avc_pix_fmts,.init             = vtenc_init,.encode2          = vtenc_frame,.close            = vtenc_close,.capabilities     = AV_CODEC_CAP_DELAY,.priv_class       = &h264_videotoolbox_class,.caps_internal    = FF_CODEC_CAP_INIT_THREADSAFE |FF_CODEC_CAP_INIT_CLEANUP,
};

hevc_videotoolbox

VideoToolBox 的HEVC硬编码通过三个结构体hevc_options、hevc_videotoolbox_class、ff_hevc_videotoolbox_encoder来完成与 FFmpeg 的交互。
hevc_options主要涉及的是内部参数，例如 profile的选择。
hevc_videotoolbox_class来定义 HEVC的私有类，指定编码类型和编码参数。
ff_hevc_videotoolbox_encoder是具体的对外与 FFmpeg 的交互结构体，完成HEVC硬编码。

static const AVOption hevc_options[] = {{ "profile", "Profile", OFFSET(profile), AV_OPT_TYPE_INT, { .i64 = HEVC_PROF_AUTO }, HEVC_PROF_AUTO, HEVC_PROF_COUNT, VE, "profile" },{ "main",     "Main Profile",     0, AV_OPT_TYPE_CONST, { .i64 = HEVC_PROF_MAIN   }, INT_MIN, INT_MAX, VE, "profile" },{ "main10",   "Main10 Profile",   0, AV_OPT_TYPE_CONST, { .i64 = HEVC_PROF_MAIN10 }, INT_MIN, INT_MAX, VE, "profile" },COMMON_OPTIONS{ NULL },
};static const AVClass hevc_videotoolbox_class = {.class_name = "hevc_videotoolbox",.item_name  = av_default_item_name,.option     = hevc_options,.version    = LIBAVUTIL_VERSION_INT,
};AVCodec ff_hevc_videotoolbox_encoder = {.name             = "hevc_videotoolbox",.long_name        = NULL_IF_CONFIG_SMALL("VideoToolbox H.265 Encoder"),.type             = AVMEDIA_TYPE_VIDEO,.id               = AV_CODEC_ID_HEVC,.priv_data_size   = sizeof(VTEncContext),.pix_fmts         = hevc_pix_fmts,.init             = vtenc_init,.encode2          = vtenc_frame,.close            = vtenc_close,.capabilities     = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE,.priv_class       = &hevc_videotoolbox_class,.caps_internal    = FF_CODEC_CAP_INIT_THREADSAFE |FF_CODEC_CAP_INIT_CLEANUP,.wrapper_name     = "videotoolbox",
};

核心模块介绍

.init

.init模块完成初始化工作，对应的函数是vtenc_init()；函数内部主要完成了线程初始化、配置编码器、检索属性以及 B 帧的相关处理。

static av_cold int vtenc_init(AVCodecContext *avctx)
{VTEncContext    *vtctx = avctx->priv_data;CFBooleanRef    has_b_frames_cfbool;int             status;pthread_once(&once_ctrl, loadVTEncSymbols);pthread_mutex_init(&vtctx->lock, NULL);pthread_cond_init(&vtctx->cv_sample_sent, NULL);vtctx->session = NULL;status = vtenc_configure_encoder(avctx);if (status) return status;status = VTSessionCopyProperty(vtctx->session,kVTCompressionPropertyKey_AllowFrameReordering,kCFAllocatorDefault,&has_b_frames_cfbool);if (!status && has_b_frames_cfbool) {//Some devices don't output B-frames for main profile, even if requested.vtctx->has_b_frames = CFBooleanGetValue(has_b_frames_cfbool);CFRelease(has_b_frames_cfbool);}avctx->has_b_frames = vtctx->has_b_frames;return 0;
}

vtenc_configure_encoder()函数是 init 模块的核心函数，主要完成编码器的配置工作；根据编码器类型（h264/HEVC）来配置 profile、level、熵编码等信息；此外还会选择裁剪信息、传递函数、YCbCr 矩阵、颜色原色以及额外信息；最后调用vtenc_create_encoder()完成编码器的创建；

static int vtenc_configure_encoder(AVCodecContext *avctx)
{CFMutableDictionaryRef enc_info;CFMutableDictionaryRef pixel_buffer_info;CMVideoCodecType       codec_type;VTEncContext           *vtctx = avctx->priv_data;CFStringRef            profile_level;CFNumberRef            gamma_level = NULL;int                    status;codec_type = get_cm_codec_type(avctx->codec_id);if (!codec_type) {av_log(avctx, AV_LOG_ERROR, "Error: no mapping for AVCodecID %d\n", avctx->codec_id);return AVERROR(EINVAL);}vtctx->codec_id = avctx->codec_id;if (vtctx->codec_id == AV_CODEC_ID_H264) {vtctx->get_param_set_func = CMVideoFormatDescriptionGetH264ParameterSetAtIndex;vtctx->has_b_frames = avctx->max_b_frames > 0;if(vtctx->has_b_frames && vtctx->profile == H264_PROF_BASELINE){av_log(avctx, AV_LOG_WARNING, "Cannot use B-frames with baseline profile. Output will not contain B-frames.\n");vtctx->has_b_frames = false;}if (vtctx->entropy == VT_CABAC && vtctx->profile == H264_PROF_BASELINE) {av_log(avctx, AV_LOG_WARNING, "CABAC entropy requires 'main' or 'high' profile, but baseline was requested. Encode will not use CABAC entropy.\n");vtctx->entropy = VT_ENTROPY_NOT_SET;}if (!get_vt_h264_profile_level(avctx, &profile_level)) return AVERROR(EINVAL);} else {vtctx->get_param_set_func = compat_keys.CMVideoFormatDescriptionGetHEVCParameterSetAtIndex;if (!vtctx->get_param_set_func) return AVERROR(EINVAL);if (!get_vt_hevc_profile_level(avctx, &profile_level)) return AVERROR(EINVAL);}enc_info = CFDictionaryCreateMutable(kCFAllocatorDefault,20,&kCFCopyStringDictionaryKeyCallBacks,&kCFTypeDictionaryValueCallBacks);if (!enc_info) return AVERROR(ENOMEM);#if !TARGET_OS_IPHONEif(vtctx->require_sw) {CFDictionarySetValue(enc_info,compat_keys.kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder,kCFBooleanFalse);} else if (!vtctx->allow_sw) {CFDictionarySetValue(enc_info,compat_keys.kVTVideoEncoderSpecification_RequireHardwareAcceleratedVideoEncoder,kCFBooleanTrue);} else {CFDictionarySetValue(enc_info,compat_keys.kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder,kCFBooleanTrue);}
#endifif (avctx->pix_fmt != AV_PIX_FMT_VIDEOTOOLBOX) {status = create_cv_pixel_buffer_info(avctx, &pixel_buffer_info);if (status)goto init_cleanup;} else {pixel_buffer_info = NULL;}vtctx->dts_delta = vtctx->has_b_frames ? -1 : 0;get_cv_transfer_function(avctx, &vtctx->transfer_function, &gamma_level);get_cv_ycbcr_matrix(avctx, &vtctx->ycbcr_matrix);get_cv_color_primaries(avctx, &vtctx->color_primaries);if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {status = vtenc_populate_extradata(avctx,codec_type,profile_level,gamma_level,enc_info,pixel_buffer_info);if (status)goto init_cleanup;}status = vtenc_create_encoder(avctx,codec_type,profile_level,gamma_level,enc_info,pixel_buffer_info,&vtctx->session);init_cleanup:if (gamma_level)CFRelease(gamma_level);if (pixel_buffer_info)CFRelease(pixel_buffer_info);CFRelease(enc_info);return status;
}

vtenc_create_encoder()完成编码器创建工作；调用VTCompressionSessionCreate()创建压缩帧实例，接着会创建码率/码控等各类对象，并配置相应属性；最后，（可选）调用VTCompressionSessionPrepareToEncodeFrames()完成编码前的合理资源分配。

.encode2

.encode2模块完成具体的编码工作，对应的函数是 vtenc_frame()；判断 AVFrame里是否有帧数据，有数据就调用vtenc_send_frame()完成具体的编码，没有就 flush 下；然后调用 vtenc_q_pop()完成线程相关操作；最后利用vtenc_cm_to_avpacket()得到数据包信息，如 SEI、pts、dts 等。

static av_cold int vtenc_frame(AVCodecContext *avctx,AVPacket       *pkt,const AVFrame  *frame,int            *got_packet)
{VTEncContext *vtctx = avctx->priv_data;bool get_frame;int status;CMSampleBufferRef buf = NULL;ExtraSEI *sei = NULL;if (frame) {status = vtenc_send_frame(avctx, vtctx, frame);if (status) {status = AVERROR_EXTERNAL;goto end_nopkt;}if (vtctx->frame_ct_in == 0) {vtctx->first_pts = frame->pts;} else if(vtctx->frame_ct_in == 1 && vtctx->has_b_frames) {vtctx->dts_delta = frame->pts - vtctx->first_pts;}vtctx->frame_ct_in++;} else if(!vtctx->flushing) {vtctx->flushing = true;status = VTCompressionSessionCompleteFrames(vtctx->session,kCMTimeIndefinite);if (status) {av_log(avctx, AV_LOG_ERROR, "Error flushing frames: %d\n", status);status = AVERROR_EXTERNAL;goto end_nopkt;}}*got_packet = 0;get_frame = vtctx->dts_delta >= 0 || !frame;if (!get_frame) {status = 0;goto end_nopkt;}status = vtenc_q_pop(vtctx, !frame, &buf, &sei);if (status) goto end_nopkt;if (!buf)   goto end_nopkt;status = vtenc_cm_to_avpacket(avctx, buf, pkt, sei);if (sei) {if (sei->data) av_free(sei->data);av_free(sei);}CFRelease(buf);if (status) goto end_nopkt;*got_packet = 1;return 0;end_nopkt:av_packet_unref(pkt);return status;
}

vtenc_send_frame()完成编码核心工作；内部主要调用 VideoToolBox 的核心 API函数VTCompressionSessionEncodeFrame()完成具体的编码工作。

static int vtenc_send_frame(AVCodecContext *avctx,VTEncContext   *vtctx,const AVFrame  *frame)
{CMTime time;CFDictionaryRef frame_dict;CVPixelBufferRef cv_img = NULL;AVFrameSideData *side_data = NULL;ExtraSEI *sei = NULL;int status = create_cv_pixel_buffer(avctx, frame, &cv_img);if (status) return status;status = create_encoder_dict_h264(frame, &frame_dict);if (status) {CFRelease(cv_img);return status;}side_data = av_frame_get_side_data(frame, AV_FRAME_DATA_A53_CC);if (vtctx->a53_cc && side_data && side_data->size) {sei = av_mallocz(sizeof(*sei));if (!sei) {av_log(avctx, AV_LOG_ERROR, "Not enough memory for closed captions, skipping\n");} else {int ret = ff_alloc_a53_sei(frame, 0, &sei->data, &sei->size);if (ret < 0) {av_log(avctx, AV_LOG_ERROR, "Not enough memory for closed captions, skipping\n");av_free(sei);sei = NULL;}}}time = CMTimeMake(frame->pts * avctx->time_base.num, avctx->time_base.den);status = VTCompressionSessionEncodeFrame(vtctx->session,cv_img,time,kCMTimeInvalid,frame_dict,sei,NULL);if (frame_dict) CFRelease(frame_dict);CFRelease(cv_img);if (status) {av_log(avctx, AV_LOG_ERROR, "Error: cannot encode frame: %d\n", status);return AVERROR_EXTERNAL;}return 0;
}

.close

.close 模块完成关闭回收工作，对应的函数是 vtenc_close()；内部主要进行线程的销毁、强制完成一些或全部未处理的视频帧、清除帧队列、释放资源的工作。

static av_cold int vtenc_close(AVCodecContext *avctx)
{VTEncContext *vtctx = avctx->priv_data;pthread_cond_destroy(&vtctx->cv_sample_sent);pthread_mutex_destroy(&vtctx->lock);if(!vtctx->session) return 0;VTCompressionSessionCompleteFrames(vtctx->session,kCMTimeIndefinite);clear_frame_queue(vtctx);CFRelease(vtctx->session);vtctx->session = NULL;if (vtctx->color_primaries) {CFRelease(vtctx->color_primaries);vtctx->color_primaries = NULL;}if (vtctx->transfer_function) {CFRelease(vtctx->transfer_function);vtctx->transfer_function = NULL;}if (vtctx->ycbcr_matrix) {CFRelease(vtctx->ycbcr_matrix);vtctx->ycbcr_matrix = NULL;}return 0;
}