Skip to content

Commit

Permalink
feat: doubao tts (labring#5285)
Browse files Browse the repository at this point in the history
  • Loading branch information
zijiren233 authored Dec 12, 2024
1 parent 35fff27 commit 4a6fcd0
Show file tree
Hide file tree
Showing 5 changed files with 493 additions and 0 deletions.
1 change: 1 addition & 0 deletions service/aiproxy/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ func main() {

<-ctx.Done()
log.Info("shutting down server...")
log.Info("max wait time: 120s")

shutdownCtx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
defer cancel()
Expand Down
105 changes: 105 additions & 0 deletions service/aiproxy/relay/adaptor/doubaoaudio/constants.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
package doubaoaudio

import (
"github.com/labring/sealos/service/aiproxy/model"
"github.com/labring/sealos/service/aiproxy/relay/relaymode"
)

// https://www.volcengine.com/docs/6561/1257543

var ModelList = []*model.ModelConfig{
{
Model: "Doubao-tts",
Type: relaymode.AudioSpeech,
Owner: model.ModelOwnerDoubao,
InputPrice: 0.5,
Config: map[model.ModelConfigKey]any{
model.ModelConfigSupportFormatsKey: []string{
"pcm",
"mp3",
"wav",
"ogg_opus",
},
model.ModelConfigSupportVoicesKey: []string{
"zh_female_cancan_mars_bigtts",
"zh_female_qingxinnvsheng_mars_bigtts",
"zh_female_shuangkuaisisi_moon_bigtts",
"zh_male_wennuanahu_moon_bigtts",
"zh_male_shaonianzixin_moon_bigtts",
"zh_female_zhixingnvsheng_mars_bigtts",
"zh_male_qingshuangnanda_mars_bigtts",
"zh_female_linjianvhai_moon_bigtts",
"zh_male_yuanboxiaoshu_moon_bigtts",
"zh_male_yangguangqingnian_moon_bigtts",
"zh_female_tianmeixiaoyuan_moon_bigtts",
"zh_female_qingchezizi_moon_bigtts",
"zh_male_jieshuoxiaoming_moon_bigtts",
"zh_female_kailangjiejie_moon_bigtts",
"zh_male_linjiananhai_moon_bigtts",
"zh_female_tianmeiyueyue_moon_bigtts",
"zh_female_xinlingjitang_moon_bigtts",
"en_male_smith_mars_bigtts",
"en_female_anna_mars_bigtts",
"en_male_adam_mars_bigtts",
"en_female_sarah_mars_bigtts",
"en_male_dryw_mars_bigtts",
"multi_male_jingqiangkanye_moon_bigtts",
"multi_female_shuangkuaisisi_moon_bigtts",
"multi_male_wanqudashu_moon_bigtts",
"multi_female_gaolengyujie_moon_bigtts",
"zh_male_jingqiangkanye_moon_bigtts",
"zh_female_wanwanxiaohe_moon_bigtts",
"zh_female_wanqudashu_moon_bigtts",
"zh_female_daimengchuanmei_moon_bigtts",
"zh_male_guozhoudege_moon_bigtts",
"zh_male_beijingxiaoye_moon_bigtts",
"zh_male_haoyuxiaoge_moon_bigtts",
"zh_male_guangxiyuanzhou_moon_bigtts",
"zh_female_meituojieer_moon_bigtts",
"zh_male_yuzhouzixuan_moon_bigtts",
"zh_male_naiqimengwa_mars_bigtts",
"zh_female_popo_mars_bigtts",
"zh_female_gaolengyujie_moon_bigtts",
"zh_male_aojiaobazong_moon_bigtts",
"zh_female_meilinvyou_moon_bigtts",
"zh_male_shenyeboke_moon_bigtts",
"zh_female_sajiaonvyou_moon_bigtts",
"zh_female_yuanqinvyou_moon_bigtts",
"ICL_zh_female_bingruoshaonv_tob",
"ICL_zh_female_huoponvhai_tob",
"zh_male_dongfanghaoran_moon_bigtts",
"ICL_zh_female_heainainai_tob",
"ICL_zh_female_linjuayi_tob",
"zh_female_wenrouxiaoya_moon_bigtts",
"zh_male_tiancaitongsheng_mars_bigtts",
"zh_male_sunwukong_mars_bigtts",
"zh_male_xionger_mars_bigtts",
"zh_female_peiqi_mars_bigtts",
"zh_female_wuzetian_mars_bigtts",
"zh_female_gujie_mars_bigtts",
"zh_female_yingtaowanzi_mars_bigtts",
"zh_male_chunhui_mars_bigtts",
"zh_female_shaoergushi_mars_bigtts",
"zh_male_silang_mars_bigtts",
"zh_male_jieshuonansheng_mars_bigtts",
"zh_female_jitangmeimei_mars_bigtts",
"zh_female_tiexinnvsheng_mars_bigtts",
"zh_female_qiaopinvsheng_mars_bigtts",
"zh_female_mengyatou_mars_bigtts",
"zh_male_changtianyi_mars_bigtts",
"zh_male_ruyaqingnian_mars_bigtts",
"zh_male_baqiqingshu_mars_bigtts",
"zh_male_qingcang_mars_bigtts",
"zh_female_gufengshaoyu_mars_bigtts",
"zh_female_wenroushunv_mars_bigtts",
},
},
},

// {
// Model: "Doubao-stt",
// Type: relaymode.AudioTranscription,
// Owner: model.ModelOwnerDoubao,
// InputPrice: 2.3,
// },
}
99 changes: 99 additions & 0 deletions service/aiproxy/relay/adaptor/doubaoaudio/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
package doubaoaudio

import (
"errors"
"fmt"
"io"
"net/http"
"strings"

"github.com/gin-gonic/gin"
"github.com/labring/sealos/service/aiproxy/model"
"github.com/labring/sealos/service/aiproxy/relay/adaptor/openai"
"github.com/labring/sealos/service/aiproxy/relay/meta"
relaymodel "github.com/labring/sealos/service/aiproxy/relay/model"
"github.com/labring/sealos/service/aiproxy/relay/relaymode"
)

func GetRequestURL(meta *meta.Meta) (string, error) {
u := meta.Channel.BaseURL
if u == "" {
u = baseURL
}
switch meta.Mode {
case relaymode.AudioSpeech:
return u + "/api/v1/tts/ws_binary", nil
default:
return "", fmt.Errorf("unsupported relay mode %d for doubao", meta.Mode)
}
}

type Adaptor struct{}

const baseURL = "https://openspeech.bytedance.com"

func (a *Adaptor) GetModelList() []*model.ModelConfig {
return ModelList
}

func (a *Adaptor) GetRequestURL(meta *meta.Meta) (string, error) {
return GetRequestURL(meta)
}

func (a *Adaptor) ConvertRequest(meta *meta.Meta, req *http.Request) (http.Header, io.Reader, error) {
switch meta.Mode {
case relaymode.AudioSpeech:
return ConvertTTSRequest(meta, req)
default:
return nil, nil, fmt.Errorf("unsupported relay mode %d for doubao", meta.Mode)
}
}

// key格式: app_id|app_token
func getAppIDAndToken(key string) (string, string, error) {
parts := strings.Split(key, "|")
if len(parts) != 2 {
return "", "", errors.New("invalid key format")
}
return parts[0], parts[1], nil
}

func (a *Adaptor) SetupRequestHeader(meta *meta.Meta, _ *gin.Context, req *http.Request) error {
switch meta.Mode {
case relaymode.AudioSpeech:
_, token, err := getAppIDAndToken(meta.Channel.Key)
if err != nil {
return err
}
req.Header.Set("Authorization", "Bearer;"+token)
return nil
default:
return fmt.Errorf("unsupported relay mode %d for doubao", meta.Mode)
}
}

func (a *Adaptor) DoRequest(meta *meta.Meta, _ *gin.Context, req *http.Request) (*http.Response, error) {
switch meta.Mode {
case relaymode.AudioSpeech:
return TTSDoRequest(meta, req)
default:
return nil, fmt.Errorf("unsupported relay mode %d for doubao", meta.Mode)
}
}

func (a *Adaptor) DoResponse(meta *meta.Meta, c *gin.Context, resp *http.Response) (*relaymodel.Usage, *relaymodel.ErrorWithStatusCode) {
switch meta.Mode {
case relaymode.AudioSpeech:
return TTSDoResponse(meta, c, resp)
default:
return nil, openai.ErrorWrapperWithMessage(
fmt.Sprintf("unsupported relay mode %d for doubao", meta.Mode),
nil,
http.StatusBadRequest,
)
}
}

func (a *Adaptor) GetChannelName() string {
return "doubao audio"
}
Loading

0 comments on commit 4a6fcd0

Please sign in to comment.