你好,歡迎來到IOS教程網

 Ios教程網 >> IOS編程開發 >> IOS開發綜合 >> iOS語音書寫功能

iOS語音書寫功能

編輯:IOS開發綜合

最近在項目開發中,需要將語音識別轉換成文本的功能。研究了下科大訊飛,附上Demo分享給大家。

研發前先得做一些准備。

1、注冊科大訊飛開發者帳號(http://www.xfyun.cn)

2、下載開發平台(iOS、或android,或其他)所需要的SDK(SDK包含:說明文檔、SDK即iflyMSC.framework、Demo)

3、項目中添加SDK(添加時,先將SDK復制粘貼到項目文件,再通過addframe的方法添加到項目引用),及相關聯的framework

添加方法:TARGETS-Build Phases-Link Binary With Libraries-"+"-Choose frameworks and libraries to add-add other,或選擇對應的framework-add

4、使用時要添加對應的頭文件

特別說明:

1、使用SDK關聯的APPID存在於下載的Demo中,如果SDK有替換的話APPID應該跟著一起替換。

2、在使用前,務必在AppDelegate的方法中"

 

- (BOOL)application:(UIApplication *)application didFinishLaunchingWithOptions:(NSDictionary *)launchOptions {}"進行初始化操作。

3、需要有網絡的情況下才能使用。

 

如圖

下載的科大訊飛SDK文件

\

Demo中的APPID

\

添加SDK

\\

添加關聯framework

\

 

 

語音轉文件實現代碼

 

.h文件

#import 

// 導入頭文件
#import "iflyMSC.framework/Headers/IFlyMSC.h"
#import "iflyMSC.framework/Headers/IFlySpeechUtility.h"
#import "iflyMSC/IFlySpeechConstant.h"


#pragma mark - 初始化參數類

/**************************************************************************/

@interface IATConfig : NSObject

+ (IATConfig *)sharedInstance;

+ (NSString *)mandarin;
+ (NSString *)cantonese;
+ (NSString *)henanese;
+ (NSString *)chinese;
+ (NSString *)english;
+ (NSString *)lowSampleRate;
+ (NSString *)highSampleRate;
+ (NSString *)isDot;
+ (NSString *)noDot;


/**
 以下參數,需要通過 iFlySpeechRecgonizer 進行設置
 */
@property (nonatomic, strong) NSString *speechTimeout;
@property (nonatomic, strong) NSString *vadEos;
@property (nonatomic, strong) NSString *vadBos;

@property (nonatomic, strong) NSString *language;
@property (nonatomic, strong) NSString *accent;

@property (nonatomic, strong) NSString *dot;
@property (nonatomic, strong) NSString *sampleRate;


/**
 以下參數無需設置 不必關
 */
@property (nonatomic, assign) BOOL haveView;
@property (nonatomic, strong) NSArray *accentIdentifer;
@property (nonatomic, strong) NSArray *accentNickName;

@end

/**************************************************************************/


#pragma mark - 語音聽寫類

@interface VoiceConversion : NSObject

/// 啟動初始化語音程序
+ (void)VoiceInitialize;


/// 開始錄音
- (void)voiceStart:(void (^)(BOOL isStart))startListening speechBegin:(void (^)(void))begin speechEnd:(void (^)(void))end speechError:(void (^)(BOOL isSuccess))error speechResult:(void (^)(NSString *text))result speechVolume:(void (^)(int volume))volume;

/// 取消錄音
- (void)voiceCancel;

/// 停止錄音
- (void)voiceStop;

@end

.m文件

#import "VoiceConversion.h"

#pragma mark - 初始化參數類

/**************************************************************************/

static NSString *const PUTONGHUA = @"mandarin";
static NSString *const YUEYU     = @"cantonese";
static NSString *const HENANHUA  = @"henanese";
static NSString *const ENGLISH   = @"en_us";
static NSString *const CHINESE   = @"zh_cn";

@implementation IATConfig

- (id)init
{
    self  = [super init];
    if (self)
    {
        [self defaultSetting];
        return  self;
    }
    return nil;
}

+ (IATConfig *)sharedInstance
{
    static IATConfig  * instance = nil;
    static dispatch_once_t predict;
    dispatch_once(&predict, ^{
        instance = [[IATConfig alloc] init];
    });
    return instance;
}

- (void)defaultSetting
{
    _speechTimeout = @"30000";
    _vadEos = @"3000";
    _vadBos = @"3000";
    _dot = @"1";
    _sampleRate = @"16000";
    _language = CHINESE;
    _accent = PUTONGHUA;
    _haveView = NO;//默認是不dai界面的
    _accentNickName = [[NSArray alloc] initWithObjects:@"粵語", @"普通話", @"河南話", @"英文", nil];
}

+ (NSString *)mandarin
{
    return PUTONGHUA;
}

+ (NSString *)cantonese
{
    return YUEYU;
}

+ (NSString *)henanese
{
    return HENANHUA;
}

+ (NSString *)chinese
{
    return CHINESE;
}

+ (NSString *)english
{
    return ENGLISH;
}

+ (NSString *)lowSampleRate
{
    return @"8000";
}

+ (NSString *)highSampleRate
{
    return @"16000";
}

+ (NSString *)isDot
{
    return @"1";
}

+ (NSString *)noDot
{
    return @"0";
}

@end

/**************************************************************************/

#pragma mark - 語音聽寫類

static NSString *const VoiceAPPID   = @"572016e4";
static NSString *const VoiceTimeOut = @"20000";

@interface VoiceConversion () 

@property (nonatomic, strong) NSMutableString *resultText;
@property (nonatomic, strong) IFlySpeechRecognizer *iFlySpeechRecognizer;

@property (nonatomic, copy) void (^beginSpeech)(void);
@property (nonatomic, copy) void (^endSpeech)(void);
@property (nonatomic, copy) void (^errorSpeech)(BOOL isSuccess);
@property (nonatomic, copy) void (^resultSpeech)(NSString *text);
@property (nonatomic, copy) void (^volumeSpeech)(int volume);

@end

@implementation VoiceConversion

#pragma mark 初始化------------

/// 啟動初始化語音程序
+ (void)VoiceInitialize
{
    // 設置sdk的log等級,log保存在下面設置的工作路徑中
    [IFlySetting setLogFile:LVL_ALL];
    
    // 打開輸出在console的log開關
    [IFlySetting showLogcat:YES];
    
    // 設置sdk的工作路徑
    NSArray *paths = NSSearchPathForDirectoriesInDomains(NSCachesDirectory, NSUserDomainMask, YES);
    NSString *cachePath = [paths objectAtIndex:0];
    [IFlySetting setLogFilePath:cachePath];
    
    // Appid是應用的身份信息,具有唯一性,初始化時必須要傳入Appid。初始化是一個異步過程,可放在 App 啟動時執行初始化,具體代碼可以參 照 Demo 的 MSCAppDelegate.m。未初始化時使用服務,一般會返回錯誤碼 10111.
    NSString *initString = [[NSString alloc] initWithFormat:@"appid=%@", VoiceAPPID];
    [IFlySpeechUtility createUtility:initString];
}

#pragma mark 實例化------------

- (void)dealloc
{
    [self voiceCancel];
}

- (NSMutableString *)resultText
{
    if (!_resultText)
    {
        _resultText = [[NSMutableString alloc] init];
    }
    
    return _resultText;
}

- (IFlySpeechRecognizer *)iFlySpeechRecognizer
{
    if (_iFlySpeechRecognizer == nil)
    {
        _iFlySpeechRecognizer = [IFlySpeechRecognizer sharedInstance];
        
        [_iFlySpeechRecognizer setParameter:@"" forKey:[IFlySpeechConstant PARAMS]];
        // 設置聽寫模式
        [_iFlySpeechRecognizer setParameter:@"iat" forKey:[IFlySpeechConstant IFLY_DOMAIN]];
    }
    
    return _iFlySpeechRecognizer;
}

- (void)initializeVoice
{
    self.iFlySpeechRecognizer.delegate = self;

    IATConfig *instance = [IATConfig sharedInstance];
        
    // 設置最長錄音時間
    [self.iFlySpeechRecognizer setParameter:instance.speechTimeout forKey:[IFlySpeechConstant SPEECH_TIMEOUT]];
    // 設置後端點
    [self.iFlySpeechRecognizer setParameter:instance.vadEos forKey:[IFlySpeechConstant VAD_EOS]];
    // 設置前端點
    [self.iFlySpeechRecognizer setParameter:instance.vadBos forKey:[IFlySpeechConstant VAD_BOS]];
    // 網絡等待時間
    [self.iFlySpeechRecognizer setParameter:@"20000" forKey:[IFlySpeechConstant NET_TIMEOUT]];
    
    // 設置采樣率,推薦使用16K
    [self.iFlySpeechRecognizer setParameter:instance.sampleRate forKey:[IFlySpeechConstant SAMPLE_RATE]];
    
    if ([instance.language isEqualToString:[IATConfig chinese]])
    {
        // 設置語言
        [self.iFlySpeechRecognizer setParameter:instance.language forKey:[IFlySpeechConstant LANGUAGE]];
        // 設置方言
        [self.iFlySpeechRecognizer setParameter:instance.accent forKey:[IFlySpeechConstant ACCENT]];
    }
    else if ([instance.language isEqualToString:[IATConfig english]])
    {
        [self.iFlySpeechRecognizer setParameter:instance.language forKey:[IFlySpeechConstant LANGUAGE]];
    }
    
    // 設置是否返回標點符號
    [self.iFlySpeechRecognizer setParameter:instance.dot forKey:[IFlySpeechConstant ASR_PTT]];
}

#pragma mark 語音聽寫方法------------

/// 開始錄音
- (void)voiceStart:(void (^)(BOOL isStart))startListening speechBegin:(void (^)(void))begin speechEnd:(void (^)(void))end speechError:(void (^)(BOOL isSuccess))error speechResult:(void (^)(NSString *text))result speechVolume:(void (^)(int volume))volume
{
    [self.resultText setString:@""];
    
    // 回調設置
    self.beginSpeech = [begin copy];
    self.endSpeech = [end copy];
    self.errorSpeech = [error copy];
    self.resultSpeech = [result copy];
    self.volumeSpeech = [volume copy];
    
    
    // 初始化設置
    [self initializeVoice];
    
    [self.iFlySpeechRecognizer cancel];
    
    // 設置音頻來源為麥克風
    [self.iFlySpeechRecognizer setParameter:IFLY_AUDIO_SOURCE_MIC forKey:@"audio_source"];
    
    // 設置聽寫結果格式為json
    [self.iFlySpeechRecognizer setParameter:@"json" forKey:[IFlySpeechConstant RESULT_TYPE]];
    
    // 保存錄音文件,保存在sdk工作路徑中,如未設置工作路徑,則默認保存在library/cache下
    [self.iFlySpeechRecognizer setParameter:@"asr.pcm" forKey:[IFlySpeechConstant ASR_AUDIO_PATH]];
    
    BOOL isStart = [self.iFlySpeechRecognizer startListening];
    if (startListening)
    {
        // 如果開始錄音失敗,可能是上次請求未結束,暫不支持多路並發
        startListening(isStart);
    }
}

/// 取消聽寫
- (void)voiceCancel
{
    [self.iFlySpeechRecognizer cancel];
}

/// 停止錄音
- (void)voiceStop
{
    [self.iFlySpeechRecognizer stopListening];
}

#pragma mark IFlySpeechRecognizerDelegate------------

/**
 識別結果返回代理
 @param :results識別結果
 @ param :isLast 表示是否最後一次結果
 */
- (void)onResults:(NSArray *)results isLast:(BOOL)isLast
{
    NSMutableString *resultString = [[NSMutableString alloc] init];
    NSDictionary *dic = results[0];
    for (NSString *key in dic)
    {
        [resultString appendFormat:@"%@",key];
    }
    NSString *resultFromJson =  [[self class] stringFromJson:resultString];
    NSString *resultTextTemp = [NSString stringWithFormat:@"%@%@", self.resultText, resultFromJson];
    [self.resultText setString:resultTextTemp];
    if (self.resultSpeech)
    {
        self.resultSpeech(self.resultText);
    }
}

/**
 識別會話結束返回代理
 @ param error 錯誤碼,error.errorCode=0表示正常結束,非0表示發生錯誤。 
 */
- (void)onError:(IFlySpeechError *)error
{
    if (self.errorSpeech)
    {
        BOOL isSuccess = (0 == error.errorCode);
        self.errorSpeech(isSuccess);
    }
}

/**
 停止錄音回調
 */
- (void)onEndOfSpeech
{
    if (self.endSpeech)
    {
        self.endSpeech();
    }
}

/**
 開始識別回調
 */
- (void)onBeginOfSpeech
{
    if (self.beginSpeech)
    {
        self.beginSpeech();
    }
}

/**
 音量回調函數 volume 0-30
 */
- (void)onVolumeChanged:(int)volume
{
    if (self.volumeSpeech)
    {
        self.volumeSpeech(volume);
    }
}


#pragma mark 解析方法------------

/**************************************************************************/

/**
 解析命令詞返回的結果
 */
+ (NSString *)stringFromAsr:(NSString *)params;
{
    NSMutableString * resultString = [[NSMutableString alloc] init];
    NSString *inputString = nil;
    
    NSArray *array = [params componentsSeparatedByString:@"\n"];
    
    for (int index = 0; index < array.count; index++)
    {
        NSRange range;
        NSString *line = [array objectAtIndex:index];
        
        NSRange idRange = [line rangeOfString:@"id="];
        NSRange nameRange = [line rangeOfString:@"name="];
        NSRange confidenceRange = [line rangeOfString:@"confidence="];
        NSRange grammarRange = [line rangeOfString:@" grammar="];
        
        NSRange inputRange = [line rangeOfString:@"input="];
        
        if (confidenceRange.length == 0 || grammarRange.length == 0 || inputRange.length == 0 )
        {
            continue;
        }
        
        // check nomatch
        if (idRange.length != 0)
        {
            NSUInteger idPosX = idRange.location + idRange.length;
            NSUInteger idLength = nameRange.location - idPosX;
            range = NSMakeRange(idPosX, idLength);
            
            NSString *subString = [line substringWithRange:range];
            NSCharacterSet *subSet = [NSCharacterSet whitespaceAndNewlineCharacterSet];
            NSString *idValue = [subString stringByTrimmingCharactersInSet:subSet];
            if ([idValue isEqualToString:@"nomatch"])
            {
                return @"";
            }
        }
        
        // Get Confidence Value
        NSUInteger confidencePosX = confidenceRange.location + confidenceRange.length;
        NSUInteger confidenceLength = grammarRange.location - confidencePosX;
        range = NSMakeRange(confidencePosX,confidenceLength);
        
        NSString *score = [line substringWithRange:range];
        
        NSUInteger inputStringPosX = inputRange.location + inputRange.length;
        NSUInteger inputStringLength = line.length - inputStringPosX;
        
        range = NSMakeRange(inputStringPosX , inputStringLength);
        inputString = [line substringWithRange:range];
        
        [resultString appendFormat:@"%@ 置信度%@\n",inputString, score];
    }
    
    return resultString;
}

/**
 解析聽寫json格式的數據
 params例如:
 {"sn":1,"ls":true,"bg":0,"ed":0,"ws":[{"bg":0,"cw":[{"w":"白日","sc":0}]},{"bg":0,"cw":[{"w":"依山","sc":0}]},{"bg":0,"cw":[{"w":"盡","sc":0}]},{"bg":0,"cw":[{"w":"黃河入海流","sc":0}]},{"bg":0,"cw":[{"w":"。","sc":0}]}]}
 */
+ (NSString *)stringFromJson:(NSString *)params
{
    if (params == NULL)
    {
        return nil;
    }
    
    NSMutableString *tempStr = [[NSMutableString alloc] init];
    // 返回的格式必須為utf8的,否則發生未知錯誤
    NSData *dataJSON = [params dataUsingEncoding:NSUTF8StringEncoding];
    NSDictionary *resultDic  = [NSJSONSerialization JSONObjectWithData:dataJSON options:kNilOptions error:nil];
    
    if (resultDic != nil)
    {
        NSArray *wordArray = [resultDic objectForKey:@"ws"];
        
        for (int i = 0; i < [wordArray count]; i++)
        {
            NSDictionary *wsDic = [wordArray objectAtIndex:i];
            NSArray *cwArray = [wsDic objectForKey:@"cw"];
            
            for (int j = 0; j < [cwArray count]; j++)
            {
                NSDictionary *wDic = [cwArray objectAtIndex:j];
                NSString *str = [wDic objectForKey:@"w"];
                [tempStr appendString: str];
            }
        }
    }
    
    return tempStr;
}


/**
 解析語法識別返回的結果
 */
+ (NSString *)stringFromABNFJson:(NSString *)params
{
    if (params == NULL)
    {
        return nil;
    }
    NSMutableString *tempStr = [[NSMutableString alloc] init];
    NSData *dataJSON = [params dataUsingEncoding:NSUTF8StringEncoding];
    NSDictionary *resultDic  = [NSJSONSerialization JSONObjectWithData:dataJSON options:kNilOptions error:nil];
    
    NSArray *wordArray = [resultDic objectForKey:@"ws"];
    for (int i = 0; i < [wordArray count]; i++)
    {
        NSDictionary *wsDic = [wordArray objectAtIndex:i];
        NSArray *cwArray = [wsDic objectForKey:@"cw"];
        
        for (int j = 0; j < [cwArray count]; j++)
        {
            NSDictionary *wDic = [cwArray objectAtIndex:j];
            NSString *str = [wDic objectForKey:@"w"];
            NSString *score = [wDic objectForKey:@"sc"];
            [tempStr appendString: str];
            [tempStr appendFormat:@" 置信度:%@",score];
            [tempStr appendString: @"\n"];
        }
    }
    
    return tempStr;
}

/**************************************************************************/

@end


 

使用

 

初始化方法
/// 啟動初始化語音程序
+ (void)VoiceInitialize
{
    // 設置sdk的log等級,log保存在下面設置的工作路徑中
    [IFlySetting setLogFile:LVL_ALL];
    
    // 打開輸出在console的log開關
    [IFlySetting showLogcat:YES];
    
    // 設置sdk的工作路徑
    NSArray *paths = NSSearchPathForDirectoriesInDomains(NSCachesDirectory, NSUserDomainMask, YES);
    NSString *cachePath = [paths objectAtIndex:0];
    [IFlySetting setLogFilePath:cachePath];
    
    // Appid是應用的身份信息,具有唯一性,初始化時必須要傳入Appid。初始化是一個異步過程,可放在 App 啟動時執行初始化,具體代碼可以參 照 Demo 的 MSCAppDelegate.m。未初始化時使用服務,一般會返回錯誤碼 10111.
    NSString *initString = [[NSString alloc] initWithFormat:@"appid=%@", VoiceAPPID];
    [IFlySpeechUtility createUtility:initString];
}

初始化調用
- (BOOL)application:(UIApplication *)application didFinishLaunchingWithOptions:(NSDictionary *)launchOptions 
{
    // Override point for customization after application launch.
    
    [VoiceConversion VoiceInitialize];
    
    return YES;
}


 

 

#import "VoiceConversion.h"

@interface ViewController ()

@property (nonatomic, strong) VoiceConversion *voiceConversion;
@property (nonatomic, strong) UILabel *messageLabel;

@end

@implementation ViewController

- (void)viewDidLoad {
    [super viewDidLoad];
    // Do any additional setup after loading the view, typically from a nib.
    
    UIBarButtonItem *startItem = [[UIBarButtonItem alloc] initWithTitle:@"start" style:UIBarButtonItemStyleDone target:self action:@selector(startItemClick:)];
    UIBarButtonItem *stopItem = [[UIBarButtonItem alloc] initWithTitle:@"stop" style:UIBarButtonItemStyleDone target:self action:@selector(stopItemClick:)];
    UIBarButtonItem *cancelItem = [[UIBarButtonItem alloc] initWithTitle:@"cancel" style:UIBarButtonItemStyleDone target:self action:@selector(cancelItemClick:)];
    self.navigationItem.rightBarButtonItems = @[startItem, stopItem, cancelItem];
    
    self.title = @"科大訊飛語音";
    
    [self setUI];
}

- (void)didReceiveMemoryWarning {
    [super didReceiveMemoryWarning];
    // Dispose of any resources that can be recreated.
}

#pragma mark - 視圖

- (void)setUI
{
    if ([self respondsToSelector:@selector(setEdgesForExtendedLayout:)])
    {
        [self setEdgesForExtendedLayout:UIRectEdgeNone];
    }
    
    self.messageLabel = [[UILabel alloc] initWithFrame:CGRectMake(10.0, 10.0, CGRectGetWidth(self.view.bounds) - 10.0 * 2, 40.0)];
    [self.view addSubview:self.messageLabel];
    self.messageLabel.backgroundColor = [UIColor colorWithWhite:0.5 alpha:0.3];
    self.messageLabel.textAlignment = NSTextAlignmentCenter;
}

#pragma mark - 響應

- (void)startItemClick:(UIBarButtonItem *)item
{
    ViewController __weak *weakSelf = self;
    [self.voiceConversion voiceStart:^(BOOL isStart) {
        
        NSLog(@"1 start");
        
        if (isStart)
        {
            weakSelf.messageLabel.text = @"正在錄音";
        }
        else
        {
            weakSelf.messageLabel.text = @"啟動識別服務失敗,請稍後重試";
        }
    } speechBegin:^{
        NSLog(@"2 begin");
    } speechEnd:^{
        NSLog(@"3 end");
    } speechError:^(BOOL isSuccess) {
        NSLog(@"4 error");
    } speechResult:^(NSString *text) {
        NSLog(@"5 result");
        weakSelf.messageLabel.text = text;
    } speechVolume:^(int volume) {
        NSLog(@"6 volume");
        NSString *volumeString = [NSString stringWithFormat:@"音量:%d", volume];
        weakSelf.messageLabel.text = volumeString;
    }];
}

- (void)stopItemClick:(UIBarButtonItem *)item
{
    [self.voiceConversion voiceStop];
    
    self.messageLabel.text = @"停止錄音";
}

- (void)cancelItemClick:(UIBarButtonItem *)item
{
    [self.voiceConversion voiceCancel];
    
    self.messageLabel.text = @"取消識別";
}

#pragma mark - getter

- (VoiceConversion *)voiceConversion
{
    if (!_voiceConversion)
    {
        _voiceConversion = [[VoiceConversion alloc] init];
    }
    
    return _voiceConversion;
}

@end

 

  1. 上一頁:
  2. 下一頁:
蘋果刷機越獄教程| IOS教程問題解答| IOS技巧綜合| IOS7技巧| IOS8教程
Copyright © Ios教程網 All Rights Reserved