动态

详情 返回 返回

鴻蒙應用開發---語音轉文本 - 动态 详情

🧑‍💻 寫在開頭

點贊 + 收藏 === 學會🤣🤣🤣

一、工具

企業微信截圖_20250825173432

二、開發步驟

將一段中文音頻轉換為文本

1.在使用語音識別時,將實現語音識別相關的類添加至工程。

import { speechRecognizer } from '@kit.CoreSpeechKit';
import { BusinessError } from '@kit.BasicServicesKit';

2.調用createEngine方法,對引擎進行初始化,並創建SpeechRecognitionEngine實例。

let asrEngine: speechRecognizer.SpeechRecognitionEngine;
let sessionId: string = '123456';
// 創建引擎,通過callback形式返回
// 設置創建引擎參數
let extraParam: Record = {"locate": "CN", "recognizerMode": "short"};
let initParamsInfo: speechRecognizer.CreateEngineParams = {
  language: 'zh-CN',
  online: 1,
  extraParams: extraParam
};
// 調用createEngine方法
speechRecognizer.createEngine(initParamsInfo, (err: BusinessError, speechRecognitionEngine: speechRecognizer.SpeechRecognitionEngine) => {
  if (!err) {
    console.info('Succeeded in creating engine.');
    // 接收創建引擎的實例
    asrEngine = speechRecognitionEngine;
  } else {
    console.error(`Failed to create engine. Code: ${err.code}, message: ${err.message}.`);
  }
});

3.得到SpeechRecognitionEngine實例對象後,實例化RecognitionListener對象,調用setListener方法設置回調,用來接收語音識別相關的回調信息。

// 創建回調對象
let setListener: speechRecognizer.RecognitionListener = {
  // 開始識別成功回調
  onStart(sessionId: string, eventMessage: string) {
    console.info(`onStart, sessionId: ${sessionId} eventMessage: ${eventMessage}`);
  },
  // 事件回調
  onEvent(sessionId: string, eventCode: number, eventMessage: string) {
    console.info(`onEvent, sessionId: ${sessionId} eventCode: ${eventCode} eventMessage: ${eventMessage}`);
  },
  // 識別結果回調,包括中間結果和最終結果
  onResult(sessionId: string, result: speechRecognizer.SpeechRecognitionResult) {
    console.info(`onResult, sessionId: ${sessionId} sessionId: ${JSON.stringify(result)}`);
  },
  // 識別完成回調
  onComplete(sessionId: string, eventMessage: string) {
    console.info(`onComplete, sessionId: ${sessionId} eventMessage: ${eventMessage}`);
  },
  // 錯誤回調,錯誤碼通過本方法返回
  // 返回錯誤碼1002200002,開始識別失敗,重複啓動startListening方法時觸發
  // 更多錯誤碼請參考錯誤碼參考
  onError(sessionId: string, errorCode: number, errorMessage: string) {
    console.error(`onError, sessionId: ${sessionId} errorCode: ${errorCode} errorMessage: ${errorMessage}`);
  },
}
// 設置回調
asrEngine.setListener(setListener);

4.分別為音頻文件轉文字和麥克風轉文字功能設置開始識別的相關參數,調用startListening方法,開始合成。

// 開始識別
private startListeningForWriteAudio() {
  // 設置開始識別的相關參數
  let recognizerParams: speechRecognizer.StartParams = {
    sessionId: this.sessionId,
    audioInfo: { audioType: 'pcm', sampleRate: 16000, soundChannel: 1, sampleBit: 16 } //audioInfo參數配置請參考AudioInfo
  }
  // 調用開始識別方法
  asrEngine.startListening(recognizerParams);
};

private startListeningForRecording() {
  let audioParam: speechRecognizer.AudioInfo = { audioType: 'pcm', sampleRate: 16000, soundChannel: 1, sampleBit: 16 }
  let extraParam: Record = {
    "recognitionMode": 0,
    "vadBegin": 2000,
    "vadEnd": 3000,
    "maxAudioDuration": 20000
  }
  let recognizerParams: speechRecognizer.StartParams = {
    sessionId: this.sessionId,
    audioInfo: audioParam,
    extraParams: extraParam
  }
  console.info('startListening start');
  asrEngine.startListening(recognizerParams);
};

5.傳入音頻流,調用writeAudio方法,開始寫入音頻流。讀取音頻文件時,開發者需預先準備一個pcm格式音頻文件。

let uint8Array: Uint8Array = new Uint8Array();
// 可以通過如下方式獲取音頻流:1、通過錄音獲取音頻流;2、從音頻文件中讀取音頻流
// 兩種方式示例均已實現:demo參考
// 寫入音頻流,音頻流長度僅支持640或1280
asrEngine.writeAudio(sessionId, uint8Array);

三、完整代碼案例

import { speechRecognizer } from '@kit.CoreSpeechKit';
import { BusinessError } from '@kit.BasicServicesKit';
import { fileIo } from '@kit.CoreFileKit';
import { hilog } from '@kit.PerformanceAnalysisKit';
import AudioCapturer from './AudioCapturer';

const TAG = 'CoreSpeechKitDemo';

let asrEngine: speechRecognizer.SpeechRecognitionEngine;

@Entry
@Component
struct Index {
  @State createCount: number = 0;
  @State result: boolean = false;
  @State voiceInfo: string = "";
  @State sessionId: string = "123456";
  @State sessionId2: string = "1234567";
  private mAudioCapturer = new AudioCapturer();

  aboutToAppear(): void {
    this.createCount++;
    this.createByCallback();
    this.setListener();

  }



  build() {
    Column() {
      Scroll() {
        Column() {

          Text(`${this.voiceInfo}`)
            .margin(20)


          Button() {
            Text("語音轉文本")
              .fontColor(Color.White)
              .fontSize(20)
          }
          .type(ButtonType.Capsule)
          .backgroundColor("#0x317AE7")
          .width("80%")
          .height(50)
          .margin(10)
          .onClick(() => {
            this.startRecording();
          })

          Button() {
            Text("寫入音頻流")
              .fontColor(Color.White)
              .fontSize(20)
          }
          .type(ButtonType.Capsule)
          .backgroundColor("#0x317AE7")
          .width("80%")
          .height(50)
          .margin(10)
          .onClick(() => {
            this.writeAudio();
          })


          Button() {
            Text("完成")
              .fontColor(Color.White)
              .fontSize(20)
          }
          .type(ButtonType.Capsule)
          .backgroundColor("#0x317AE7")
          .width("80%")
          .height(50)
          .margin(10)
          .onClick(() => {
            // 結束識別
            hilog.info(0x0000, TAG, "finish click:-->");
            asrEngine.finish(this.sessionId);
          })

          Button() {
            Text("取消")
              .fontColor(Color.White)
              .fontSize(20)
          }
          .type(ButtonType.Capsule)
          .backgroundColor("#0x317AE7")
          .width("80%")
          .height(50)
          .margin(10)
          .onClick(() => {
            // 取消識別
            hilog.info(0x0000, TAG, "cancel click:-->");
            asrEngine.cancel(this.sessionId);
          })

          Button() {
            Text("關閉")
              .fontColor(Color.White)
              .fontSize(20)
          }
          .type(ButtonType.Capsule)
          .backgroundColor("#0x317AA7")
          .width("80%")
          .height(50)
          .margin(10)
          .onClick(() => {
            // 釋放引擎
            asrEngine.shutdown();
          })
        }
        .layoutWeight(1)
      }
      .width('100%')
      .height('100%')

    }
  }

  // 創建引擎,通過callback形式返回
  private createByCallback() {
    // 設置創建引擎參數
    let extraParam: Record = {"locate": "CN", "recognizerMode": "short"};
    let initParamsInfo: speechRecognizer.CreateEngineParams = {
      language: 'zh-CN',
      online: 1,
      extraParams: extraParam
    };

    // 調用createEngine方法
    speechRecognizer.createEngine(initParamsInfo, (err: BusinessError, speechRecognitionEngine:
      speechRecognizer.SpeechRecognitionEngine) => {
      if (!err) {
        hilog.info(0x0000, TAG, 'Succeeded in creating engine.');
        // 接收創建引擎的實例
        asrEngine = speechRecognitionEngine;
      } else {
        // 無法創建引擎時返回錯誤碼1002200001,原因:語種不支持、模式不支持、初始化超時、資源不存在等導致創建引擎失敗
        // 無法創建引擎時返回錯誤碼1002200006,原因:引擎正在忙碌中,一般多個應用同時調用語音識別引擎時觸發
        // 無法創建引擎時返回錯誤碼1002200008,原因:引擎已被銷燬
        hilog.error(0x0000, TAG, `Failed to create engine. Code: ${err.code}, message: ${err.message}.`);
      }
    });
  }

  // 查詢語種信息,以callback形式返回
  private queryLanguagesCallback() {
    // 設置查詢相關參數
    let languageQuery: speechRecognizer.LanguageQuery = {
      sessionId: this.sessionId
    };
    // 調用listLanguages方法
    asrEngine.listLanguages(languageQuery, (err: BusinessError, languages: Array) => {
      if (!err) {
        // 接收目前支持的語種信息
        hilog.info(0x0000, TAG, `Succeeded in listing languages, result: ${JSON.stringify(languages)}`);
      } else {
        hilog.error(0x0000, TAG, `Failed to create engine. Code: ${err.code}, message: ${err.message}.`);
      }
    });
  };

  // 開始識別
  private startListeningForWriteAudio() {
    // 設置開始識別的相關參數
    let recognizerParams: speechRecognizer.StartParams = {
      sessionId: this.sessionId,
      audioInfo: { audioType: 'pcm', sampleRate: 16000, soundChannel: 1, sampleBit: 16 } //audioInfo參數配置請參考AudioInfo
    }
    // 調用開始識別方法
    asrEngine.startListening(recognizerParams);
  };

  private startListeningForRecording() {
    let audioParam: speechRecognizer.AudioInfo = { audioType: 'pcm', sampleRate: 16000, soundChannel: 1, sampleBit: 16 }
    let extraParam: Record = {
      "recognitionMode": 0,
      "vadBegin": 2000,
      "vadEnd": 3000,
      "maxAudioDuration": 20000
    }
    let recognizerParams: speechRecognizer.StartParams = {
      sessionId: this.sessionId,
      audioInfo: audioParam,
      extraParams: extraParam
    }
    hilog.info(0x0000, TAG, 'startListening start');
    asrEngine.startListening(recognizerParams);
  };



  // 寫音頻流
  private async writeAudio() {
    this.startListeningForWriteAudio();
    hilog.error(0x0000, TAG, `Failed to read from file. Code`);
    let ctx = getContext(this);
    let filenames: string[] = fileIo.listFileSync(ctx.filesDir);
    if (filenames.length <= 0) {
      hilog.error(0x0000, TAG, `Failed to read from file. Code`);
      return;
    }
    hilog.error(0x0000, TAG, `Failed to read from file. Code`);
    let filePath: string = `${ctx.filesDir}/${filenames[0]}`;
    let file = fileIo.openSync(filePath, fileIo.OpenMode.READ_WRITE);
    try {
      let buf: ArrayBuffer = new ArrayBuffer(1280);
      let offset: number = 0;
      while (1280 == fileIo.readSync(file.fd, buf, {
        offset: offset
      })) {
        let uint8Array: Uint8Array = new Uint8Array(buf);
        asrEngine.writeAudio(this.sessionId, uint8Array);
        await this.countDownLatch(1);
        offset = offset + 1280;
      }
    } catch (err) {
      hilog.error(0x0000, TAG, `Failed to read from file. Code: ${err.code}, message: ${err.message}.`);
    } finally {
      if (null != file) {
        fileIo.closeSync(file);
      }
    }
  }

  // 麥克風語音轉文本
  private async startRecording() {
    this.startListeningForRecording();
    // 錄音獲取音頻
    let data: ArrayBuffer;
    hilog.info(0x0000, TAG, 'create capture success');
    this.mAudioCapturer.init((dataBuffer: ArrayBuffer) => {
      hilog.info(0x0000, TAG, 'start write');
      hilog.info(0x0000, TAG, 'ArrayBuffer ' + JSON.stringify(dataBuffer));
      data = dataBuffer
      let uint8Array: Uint8Array = new Uint8Array(data);
      hilog.info(0x0000, TAG, 'ArrayBuffer uint8Array ' + JSON.stringify(uint8Array));
      // 寫入音頻流
      asrEngine.writeAudio(this.sessionId2, uint8Array);
    });
  };
  // 計時
  public async countDownLatch(count: number) {
    while (count > 0) {
      await this.sleep(40);
      count--;
    }
  }
  // 睡眠
  private sleep(ms: number):Promise {
    return new Promise(resolve => setTimeout(resolve, ms));
  }

  // 設置回調
  private setListener() {
    // 創建回調對象
    let setListener: speechRecognizer.RecognitionListener = {
      // 開始識別成功回調
      onStart(sessionId: string, eventMessage: string) {
        hilog.info(0x0000, TAG, `onStart, sessionId: ${sessionId} eventMessage: ${eventMessage}`);
      },
      // 事件回調
      onEvent(sessionId: string, eventCode: number, eventMessage: string) {
        hilog.info(0x0000, TAG, `onEvent, sessionId: ${sessionId} eventCode: ${eventCode} eventMessage: ${eventMessage}`);
      },
      // 識別結果回調,包括中間結果和最終結果
      onResult(sessionId: string, result: speechRecognizer.SpeechRecognitionResult) {
        hilog.info(0x0000, TAG, `onResult, sessionId: ${sessionId} sessionId: ${JSON.stringify(result)}`);
        this.voiceInfo = result;
      },
      // 識別完成回調
      onComplete(sessionId: string, eventMessage: string) {
        hilog.info(0x0000, TAG, `onComplete, sessionId: ${sessionId} eventMessage: ${eventMessage}`);
        this.voiceInfo = eventMessage;
      },
      // 錯誤回調,錯誤碼通過本方法返回
      // 返回錯誤碼1002200002,開始識別失敗,重複啓動startListening方法時觸發
      // 更多錯誤碼請參考錯誤碼參考
      onError(sessionId: string, errorCode: number, errorMessage: string) {
        hilog.error(0x0000, TAG, `onError, sessionId: ${sessionId} errorCode: ${errorCode} errorMessage: ${errorMessage}`);
      },
    }
    // 設置回調
    asrEngine.setListener(setListener);
  };
}
AudioCapturer.ts文件
'use strict';
/*
 * Copyright (c) Huawei Technologies Co., Ltd. 2023-2023. All rights reserved.
 */

import {audio} from '@kit.AudioKit';
import { hilog } from '@kit.PerformanceAnalysisKit';

const TAG = 'AudioCapturer';

/**
 * Audio collector tool
 */
export default class AudioCapturer {
  /**
   * Collector object
   */
  private mAudioCapturer = null;

  /**
   * Audio Data Callback Method
   */
  private mDataCallBack: (data: ArrayBuffer) => void = null;

  /**
   * Indicates whether recording data can be obtained.
   */
  private mCanWrite: boolean = true;

  /**
   * Audio stream information
   */
  private audioStreamInfo = {
    samplingRate: audio.AudioSamplingRate.SAMPLE_RATE_16000,
    channels: audio.AudioChannel.CHANNEL_1,
    sampleFormat: audio.AudioSampleFormat.SAMPLE_FORMAT_S16LE,
    encodingType: audio.AudioEncodingType.ENCODING_TYPE_RAW
  }

  /**
   * Audio collector information
   */
  private audioCapturerInfo = {
    source: audio.SourceType.SOURCE_TYPE_MIC,
    capturerFlags: 0
  }

  /**
   * Audio Collector Option Information
   */
  private audioCapturerOptions = {
    streamInfo: this.audioStreamInfo,
    capturerInfo: this.audioCapturerInfo
  }

  /**
   *  Initialize
   * @param audioListener
   */
  public async init(dataCallBack: (data: ArrayBuffer) => void) {
    if (null != this.mAudioCapturer) {
      hilog.error(0x0000, TAG, 'AudioCapturerUtil already init');
      return;
    }
    this.mDataCallBack = dataCallBack;
    this.mAudioCapturer = await audio.createAudioCapturer(this.audioCapturerOptions).catch(error => {
      hilog.error(0x0000, TAG, `AudioCapturerUtil init createAudioCapturer failed, code is ${error.code}, message is ${error.message}`);
    });
  }

  /**
   * start recording
   */
  public async start() {
    hilog.error(0x0000, TAG, `AudioCapturerUtil start`);
    let stateGroup = [audio.AudioState.STATE_PREPARED, audio.AudioState.STATE_PAUSED, audio.AudioState.STATE_STOPPED];
    if (stateGroup.indexOf(this.mAudioCapturer.state) === -1) {
      hilog.error(0x0000, TAG, `AudioCapturerUtil start failed`);
      return;
    }
    this.mCanWrite = true;
    await this.mAudioCapturer.start();
    while (this.mCanWrite) {
      let bufferSize = await this.mAudioCapturer.getBufferSize();
      let buffer = await this.mAudioCapturer.read(bufferSize, true);
      this.mDataCallBack(buffer)
    }
  }

  /**
   * stop recording
   */
  public async stop() {
    if (this.mAudioCapturer.state !== audio.AudioState.STATE_RUNNING && this.mAudioCapturer.state !== audio.AudioState.STATE_PAUSED) {
      hilog.error(0x0000, TAG, `AudioCapturerUtil stop Capturer is not running or paused`);
      return;
    }
    this.mCanWrite = false;
    await this.mAudioCapturer.stop();
    if (this.mAudioCapturer.state === audio.AudioState.STATE_STOPPED) {
      hilog.info(0x0000, TAG, `AudioCapturerUtil Capturer stopped`);
    } else {
      hilog.error(0x0000, TAG, `Capturer stop failed`);
    }
  }

  /**
   * release
   */
  public async release() {
    if (this.mAudioCapturer.state === audio.AudioState.STATE_RELEASED || this.mAudioCapturer.state === audio.AudioState.STATE_NEW) {
      hilog.error(0x0000, TAG, `Capturer already released`);
      return;
    }
    await this.mAudioCapturer.release();
    this.mAudioCapturer = null;
    if (this.mAudioCapturer.state == audio.AudioState.STATE_RELEASED) {
      hilog.info(0x0000, TAG, `Capturer released`);
    } else {
      hilog.error(0x0000, TAG, `Capturer release failed`);
    }
  }
}

如果對您有所幫助,歡迎您點個關注,我會定時更新技術文檔,大家一起討論學習,一起進步。

user avatar meirenlidexiaomaju 头像 user_ze46ouik 头像 yanyue404 头像 cynthia_59675eba1a2ee 头像
点赞 4 用户, 点赞了这篇动态!
点赞

Add a new 评论

Some HTML is okay.