iOS音视频开发-音频硬编码-AudioToolbox-PCMToAAC

之前几篇文章记录了视频的软、硬编码过程,接下来将记录下音频的软、硬编码过程,学习、工作之余,以免忘记。
视频编码地址:
iOS音视频开发-视频会话捕捉
iOS音视频开发-视频硬编码(H264)
iOS音视频开发-视频软编码(x264编码H.264文件)
iOS音视频开发-视频软编码(FFmpeg+x264编码H.264文件)

PCM数据
PCM(Pulse Code Modulation)也被称为脉冲编码调制。PCM音频数据是未经压缩的音频采样数据裸流,它是由模拟信号经过采样、量化、编码转换成的标准的数字音频数据。
移动端对音频的实时采集编码传输,一般为将采集的音频数据设置为PCM格式数据,然后将PCM编码为AAC格式数据,以便后续传输。
PCM的数据格式,这里有篇文章介绍的很好,后续代码中的采样率、声道等均参考此文章设置。
PCM数据格式文章地址:点这里

ADTS
ADTS全称是(Audio Data Transport Stream),是AAC的一种十分常见的传输格式。
将PCM数据编码为AAC的时候需要将每帧的AAC数据添加ADTS header,否则将无法解码播放。
ADTS数据格式分为两部分:
固定头部:adts_fixed_header
可变头部:adts_variable_header
详见Wiki,地址在文章末尾。

主要代码
1、音频捕获代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
#import "BBAudioCapture.h"
#import <AVFoundation/AVFoundation.h>
#import "BBAudioConfig.h"
#import "BBAudioHardEncoder.h"
#import "BBAudioHardEncoder.h"

@interface BBAudioCapture ()
{
AudioComponentInstance _outInstance;
}
@property (nonatomic, assign) AudioComponent component;
@property (nonatomic, strong) AVAudioSession *session;
@property (nonatomic, strong) BBAudioHardEncoder *encoder;
@property (nonatomic, strong) NSFileHandle *handle;
@end

@implementation BBAudioCapture

#pragma mark -- 对象销毁方法
- (void)dealloc{
AudioComponentInstanceDispose(_outInstance);
}

#pragma mark -- 对外API(控制是否捕捉音频数据)
- (void)startRunning{
AudioOutputUnitStart(_outInstance);
}

-(void)stopRunning{
AudioOutputUnitStop(_outInstance);
}

#pragma mark -- 对外API(设置捕获音频数据配置项)
- (void)setConfig:(BBAudioConfig *)config{
_config = config;
[self private_setupAudioSession];
}

#pragma mark -- 私有API(初始化音频会话)
- (void)private_setupAudioSession{

//0.初始化编码器
self.encoder = [[BBAudioHardEncoder alloc] init];
self.encoder.config = self.config;

//1.获取音频会话实例
self.session = [AVAudioSession sharedInstance];

NSError *error = nil;
[self.session setCategory:AVAudioSessionCategoryPlayAndRecord withOptions:AVAudioSessionCategoryOptionMixWithOthers | AVAudioSessionCategoryOptionDefaultToSpeaker error:&error];

if (error) {
NSLog(@"AVAudioSession setupError");
error = nil;
return;
}

//2.激活会话
[self.session setActive:YES error:&error];

if (error) {
NSLog(@"AVAudioSession setActiveError");
error = nil;
return;
}

//3.设置模式
[self.session setMode:AVAudioSessionModeVideoRecording error:&error];

if (error) {
NSLog(@"AVAudioSession setModeError");
error = nil;
return;
}

//4.设置音频单元
AudioComponentDescription acd = {
.componentType = kAudioUnitType_Output,
.componentSubType = kAudioUnitSubType_RemoteIO,
.componentManufacturer = kAudioUnitManufacturer_Apple,
.componentFlags = 0,
.componentFlagsMask = 0,
};

//5.查找音频单元
self.component = AudioComponentFindNext(NULL, &acd);

//6.获取音频单元实例
OSStatus status = AudioComponentInstanceNew(self.component, &_outInstance);

if (status != noErr) {
NSLog(@"AudioSource new AudioComponent error");
status = noErr;
return;
}

//7.设置音频单元属性-->可读写 0-->不可读写 1-->可读写
UInt32 flagOne = 1;
AudioUnitSetProperty(_outInstance, kAudioOutputUnitProperty_EnableIO, kAudioUnitScope_Input, 1, &flagOne, sizeof(flagOne));

//8.设置音频单元属性-->音频流
AudioStreamBasicDescription asbd = {0};
asbd.mSampleRate = self.config.sampleRate;//采样率
asbd.mFormatID = kAudioFormatLinearPCM;//原始数据为PCM格式
asbd.mFormatFlags = kAudioFormatFlagIsSignedInteger | kAudioFormatFlagsNativeEndian | kAudioFormatFlagIsPacked;
asbd.mChannelsPerFrame = (UInt32)self.config.channels;//每帧的声道数量
asbd.mFramesPerPacket = 1;//每个数据包多少帧
asbd.mBitsPerChannel = 16;//16位
asbd.mBytesPerFrame = asbd.mChannelsPerFrame * asbd.mBitsPerChannel / 8;//每帧多少字节 bytes -> bit / 8
asbd.mBytesPerPacket = asbd.mFramesPerPacket * asbd.mBytesPerFrame;//每个包多少字节

status = AudioUnitSetProperty(_outInstance, kAudioUnitProperty_StreamFormat, kAudioUnitScope_Output, 1, &asbd, sizeof(asbd));

if (status != noErr) {
NSLog(@"AudioUnitSetProperty StreamFormat error");
status = noErr;
return;
}

//9.设置回调函数
AURenderCallbackStruct cb;
cb.inputProcRefCon = (__bridge void *)self;
cb.inputProc = audioBufferCallBack;

status = AudioUnitSetProperty(_outInstance, kAudioOutputUnitProperty_SetInputCallback, kAudioUnitScope_Global, 1, &cb, sizeof(cb));

if(status != noErr){
NSLog(@"AudioUnitSetProperty StreamFormat InputCallback error");
status = noErr;
return;
}

//10.初始化音频单元
status = AudioUnitInitialize(_outInstance);

if (status != noErr) {
NSLog(@"AudioUnitInitialize error");
status = noErr;
return;
}

//11.设置优先采样率
[self.session setPreferredSampleRate:self.config.sampleRate error:&error];

if (error) {
NSLog(@"AudioSource setPreferredSampleRate error");
error = nil;
return;
}

//12.aac文件夹地址
NSString *audioPath = [[NSSearchPathForDirectoriesInDomains(NSDocumentDirectory, NSUserDomainMask, YES) lastObject] stringByAppendingPathComponent:@"test.aac"];
[[NSFileManager defaultManager] removeItemAtPath:audioPath error:nil];
[[NSFileManager defaultManager] createFileAtPath:audioPath contents:nil attributes:nil];
self.handle = [NSFileHandle fileHandleForWritingAtPath:audioPath];

}

#pragma mark -- 音频流回调函数
static OSStatus audioBufferCallBack(void *inRefCon,
AudioUnitRenderActionFlags *ioActionFlags,
const AudioTimeStamp *inTimeStamp,
UInt32 inBusNumber,
UInt32 inNumberFrames,
AudioBufferList *ioData) {
@autoreleasepool {
BBAudioCapture *capture = (__bridge BBAudioCapture *)inRefCon;
if(!capture) return -1;

AudioBuffer buffer;
buffer.mData = NULL;
buffer.mDataByteSize = 0;
buffer.mNumberChannels = 1;

AudioBufferList buffers;
buffers.mNumberBuffers = 1;
buffers.mBuffers[0] = buffer;

OSStatus status = AudioUnitRender(capture->_outInstance,
ioActionFlags,
inTimeStamp,
inBusNumber,
inNumberFrames,
&buffers);

if(status == noErr) {
[capture.encoder encodeWithBufferList:buffers completianBlock:^(NSData *encodedData, NSError *error) {
if (error) {
NSLog(@"error:%@",error);
return;
}

NSLog(@"write to file!");
[capture.handle writeData:encodedData];
}];
}
return status;
}
}

@end

2、编码代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
#import "BBAudioHardEncoder.h"
#import "BBAudioConfig.h"

@interface BBAudioHardEncoder ()
@property (nonatomic, assign) AudioConverterRef converterRef;
@end
@implementation BBAudioHardEncoder

- (AudioConverterRef)converterRef{
if (_converterRef == nil) {
[self private_setupAudioConvert];
}
return _converterRef;
}

- (void)dealloc {
AudioConverterDispose(_converterRef);
}

- (void)private_setupAudioConvert{

//1.输入流
AudioStreamBasicDescription inputFormat = {0};
inputFormat.mSampleRate = self.config.sampleRate;//采样率
inputFormat.mFormatID = kAudioFormatLinearPCM;//PCM采样
inputFormat.mFormatFlags = kAudioFormatFlagIsSignedInteger | kAudioFormatFlagsNativeEndian | kAudioFormatFlagIsPacked;
inputFormat.mChannelsPerFrame = (UInt32)self.config.channels;//每帧声道数
inputFormat.mFramesPerPacket = 1;//每包帧数
inputFormat.mBitsPerChannel = 16;//每声道位数
inputFormat.mBytesPerFrame = inputFormat.mBitsPerChannel / 8 * inputFormat.mChannelsPerFrame;//每帧的字节数
inputFormat.mBytesPerPacket = inputFormat.mBytesPerFrame * inputFormat.mFramesPerPacket;//每包字节数

//2.输出流
AudioStreamBasicDescription outputFormat;
//2.1初始清零
memset(&outputFormat, 0, sizeof(outputFormat));
//2.2音频流,在正常播放情况下的帧率。如果是压缩的格式,这个属性表示解压缩后的帧率。帧率不能为0。
outputFormat.mSampleRate = inputFormat.mSampleRate;
//2.3AAC编码 kAudioFormatMPEG4AAC kAudioFormatMPEG4AAC_HE_V2
outputFormat.mFormatID = kAudioFormatMPEG4AAC;
//2.4无损编码,0则无
outputFormat.mFormatFlags = kMPEG4Object_AAC_LC;
//2.5每一个packet的音频数据大小。如果的动态大小设置为0。动态大小的格式需要用AudioStreamPacketDescription来确定每个packet的大小。
outputFormat.mBytesPerPacket = 0;
//2.6每帧的声道数
outputFormat.mChannelsPerFrame = (UInt32)self.config.channels;
//2.7每个packet的帧数。如果是未压缩的音频数据,值是1。动态帧率格式,这个值是一个较大的固定数字,比如说AAC的1024。如果是动态大小帧数(比如Ogg格式)设置为0。
outputFormat.mFramesPerPacket = 1024;
//2.8每帧的bytes数,每帧的大小。每一帧的起始点到下一帧的起始点。如果是压缩格式,设置为0 。
outputFormat.mBytesPerFrame = 0;
//2.9语音每采样点占用位数 压缩格式设置为0
outputFormat.mBitsPerChannel = 0;
//2.10字节对齐,填0.
outputFormat.mReserved = 0;

//3.编码器参数
const OSType subtype = kAudioFormatMPEG4AAC;
AudioClassDescription requestedCodecs[2] = {
{
kAudioEncoderComponentType,
subtype,
kAppleSoftwareAudioCodecManufacturer
},
{
kAudioEncoderComponentType,
subtype,
kAppleHardwareAudioCodecManufacturer
}
};

//4.编码器
OSStatus result = AudioConverterNewSpecific(&inputFormat, &outputFormat, 2, requestedCodecs, &_converterRef);

if (result == noErr) {
NSLog(@"creat convert success!");
}else{
NSLog(@"creat convert error!");
_converterRef = nil;
}

}

- (void)encodeWithBufferList:(AudioBufferList)bufferList completianBlock:(void (^)(NSData *encodedData, NSError *error))completionBlock{
if (!self.converterRef) {
return;
}
int size = bufferList.mBuffers[0].mDataByteSize;

if (size <= 0) {
return;
}

char *aacBuf = malloc(size);

//1.初始化一个输出缓冲列表
AudioBufferList outBufferList;
outBufferList.mNumberBuffers = 1;
outBufferList.mBuffers[0].mNumberChannels = bufferList.mBuffers[0].mNumberChannels;
outBufferList.mBuffers[0].mDataByteSize = bufferList.mBuffers[0].mDataByteSize; // 设置缓冲区大小
outBufferList.mBuffers[0].mData = aacBuf; // 设置AAC缓冲区
UInt32 outputDataPacketSize = 1;

NSData *data = nil;
NSError *error = nil;
OSStatus status = AudioConverterFillComplexBuffer(_converterRef, inputDataProc, &bufferList, &outputDataPacketSize, &outBufferList, NULL);
if (status == 0){
NSData *rawAAC = [NSData dataWithBytes:outBufferList.mBuffers[0].mData length:outBufferList.mBuffers[0].mDataByteSize];
NSData *adtsHeader = [self getADTSDataWithPacketLength:rawAAC.length];
NSMutableData *fullData = [NSMutableData dataWithData:adtsHeader];
[fullData appendData:rawAAC];
data = fullData;
}else{
error = [NSError errorWithDomain:NSOSStatusErrorDomain code:status userInfo:nil];
NSLog(@"音频编码失败");
return;
}

if (completionBlock) {
dispatch_async(dispatch_get_global_queue(0, 0), ^{
completionBlock(data, error);
});
}
free(aacBuf);
}

#pragma mark -- AudioCallBack
OSStatus inputDataProc(AudioConverterRef inConverter, UInt32 *ioNumberDataPackets, AudioBufferList *ioData,AudioStreamPacketDescription **outDataPacketDescription, void *inUserData) {
//填充PCM到缓冲区
AudioBufferList bufferList = *(AudioBufferList*)inUserData;
ioData->mBuffers[0].mNumberChannels = 1;
ioData->mBuffers[0].mData = bufferList.mBuffers[0].mData;
ioData->mBuffers[0].mDataByteSize = bufferList.mBuffers[0].mDataByteSize;
ioData->mNumberBuffers = 1;
return noErr;
}

/**
* Add ADTS header at the beginning of each and every AAC packet.
* This is needed as MediaCodec encoder generates a packet of raw
* AAC data.
*
* Note the packetLen must count in the ADTS header itself.
* See: http://wiki.multimedia.cx/index.php?title=ADTS
* Also: http://wiki.multimedia.cx/index.php?title=MPEG-4_Audio#Channel_Configurations
**/
- (NSData *)getADTSDataWithPacketLength:(NSInteger)packetLength {

int adtsLength = 7;
char *packet = malloc(sizeof(char) * adtsLength);
// Variables Recycled by addADTStoPacket
int profile = 2; //AAC LC
//39=MediaCodecInfo.CodecProfileLevel.AACObjectELD;
int freqIdx = 4; //44.1KHz
int chanCfg = 1; //MPEG-4 Audio Channel Configuration. 1 Channel front-center
NSUInteger fullLength = adtsLength + packetLength;
// fill in ADTS data
packet[0] = (char)0xFF; // 11111111 = syncword
packet[1] = (char)0xF9; // 1111 1 00 1 = syncword MPEG-2 Layer CRC
packet[2] = (char)(((profile-1)<<6) + (freqIdx<<2) +(chanCfg>>2));
packet[3] = (char)(((chanCfg&3)<<6) + (fullLength>>11));
packet[4] = (char)((fullLength&0x7FF) >> 3);
packet[5] = (char)(((fullLength&7)<<5) + 0x1F);
packet[6] = (char)0xFC;
NSData *data = [NSData dataWithBytesNoCopy:packet length:adtsLength freeWhenDone:YES];
return data;
}
@end

以上代码为主要代码,配置代码无非就是采样率:44.1kHz,声道:双声道。
完整代码地址:https://github.com/ibabyblue/PCMHardEncodeToAAC
将编码的AAC数据写入本地文件,利用VLC播放器可以直接播放.aac格式文件,测试很方便。

写在最后,一路学习,坎坷良多,非常感谢无私分享的开发者们,感谢Jovins、iossigner分享的干货,感恩!
参考地址:
http://blog.csdn.net/ownwell/article/details/8114121/
https://wiki.multimedia.cx/index.php?title=ADTS
https://developer.apple.com/documentation/audiotoolbox/audio_converter_services?language=objc