Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: Lint

on:
pull_request:
branches: [main]
paths:
- 'runner/**'
- '.github/workflows/lint.yml'

jobs:
lint:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version: '1.25'
cache-dependency-path: runner/go.sum
- name: go fmt
working-directory: runner
run: |
unformatted=$(gofmt -s -l .)
[ -z "$unformatted" ] || (echo "Unformatted: $unformatted"; exit 1)
- name: Download runtime
working-directory: runner
run: make download
- name: go vet
working-directory: runner
run: go vet ./...
228 changes: 114 additions & 114 deletions runner/internal/record/stream_record.go
Original file line number Diff line number Diff line change
@@ -1,114 +1,114 @@
// Copyright 2024-2026 Nexa AI, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package record
import (
"encoding/binary"
"fmt"
"io"
"log/slog"
"math"
"os/exec"
"runtime"
"strings"
)
type StreamRecorder struct {
cmd *exec.Cmd
stdout io.ReadCloser
}
func NewStreamRecorder() (*StreamRecorder, error) {
var args []string
sr := StreamRecorder{}
switch runtime.GOOS {
case "windows":
args = []string{
// input (device)
"-t", "waveaudio",
"-d",
// output format options
"-t", "raw",
"-e", "float",
"-b", "32",
"-r", "16000",
"-c", "1",
"-", // OUTFILE = stdout
"rate", "16000",
"channels", "1",
}
case "darwin", "linux":
args = []string{
// input (device)
"-d",
// output format options
"-t", "raw",
"-e", "float",
"-b", "32",
"-r", "16000",
"-c", "1",
"-", // OUTFILE = stdout
"rate", "16000",
"channels", "1",
}
default:
return nil, fmt.Errorf("unsupported OS: %s", runtime.GOOS)
}
slog.Debug("sox cmd", "cmd", "sox "+strings.Join(args, " "))
sr.cmd = exec.Command("sox", args...)
var err error
sr.stdout, err = sr.cmd.StdoutPipe()
//sr.cmd.Stderr = os.Stderr
if err != nil {
return nil, err
}
return &sr, nil
}
func (sr *StreamRecorder) Start() error {
return sr.cmd.Start()
}
func (sr *StreamRecorder) ReadFloat32(buffer []float32) (int, error) {
if sr.stdout == nil {
return 0, fmt.Errorf("recorder not started")
}
rawBytes := make([]byte, len(buffer)*4)
n, err := sr.stdout.Read(rawBytes)
if err != nil {
return 0, err
}
sampleCount := n / 4
for i := range sampleCount {
bits := binary.LittleEndian.Uint32(rawBytes[i*4 : (i+1)*4])
buffer[i] = math.Float32frombits(bits)
}
return sampleCount, nil
}
func (sr *StreamRecorder) Stop() error {
if sr.cmd != nil {
return sr.cmd.Process.Kill()
}
return nil
}
// Copyright 2024-2026 Nexa AI, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package record

import (
"encoding/binary"
"fmt"
"io"
"log/slog"
"math"
"os/exec"
"runtime"
"strings"
)

type StreamRecorder struct {
cmd *exec.Cmd
stdout io.ReadCloser
}

func NewStreamRecorder() (*StreamRecorder, error) {
var args []string

sr := StreamRecorder{}

switch runtime.GOOS {
case "windows":
args = []string{
// input (device)
"-t", "waveaudio",
"-d",
// output format options
"-t", "raw",
"-e", "float",
"-b", "32",
"-r", "16000",
"-c", "1",
"-", // OUTFILE = stdout
"rate", "16000",
"channels", "1",
}
case "darwin", "linux":
args = []string{
// input (device)
"-d",
// output format options
"-t", "raw",
"-e", "float",
"-b", "32",
"-r", "16000",
"-c", "1",
"-", // OUTFILE = stdout
"rate", "16000",
"channels", "1",
}
default:
return nil, fmt.Errorf("unsupported OS: %s", runtime.GOOS)
}

slog.Debug("sox cmd", "cmd", "sox "+strings.Join(args, " "))
sr.cmd = exec.Command("sox", args...)

var err error
sr.stdout, err = sr.cmd.StdoutPipe()
//sr.cmd.Stderr = os.Stderr
if err != nil {
return nil, err
}

return &sr, nil
}

func (sr *StreamRecorder) Start() error {
return sr.cmd.Start()
}

func (sr *StreamRecorder) ReadFloat32(buffer []float32) (int, error) {
if sr.stdout == nil {
return 0, fmt.Errorf("recorder not started")
}

rawBytes := make([]byte, len(buffer)*4)
n, err := sr.stdout.Read(rawBytes)
if err != nil {
return 0, err
}

sampleCount := n / 4
for i := range sampleCount {
bits := binary.LittleEndian.Uint32(rawBytes[i*4 : (i+1)*4])
buffer[i] = math.Float32frombits(bits)
}

return sampleCount, nil
}

func (sr *StreamRecorder) Stop() error {
if sr.cmd != nil {
return sr.cmd.Process.Kill()
}
return nil
}
38 changes: 19 additions & 19 deletions runner/nexa-sdk/diarize.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,22 +81,22 @@ func freeDiarizeSpeechSegment(ptr *C.ml_DiarizeSpeechSegment) {

// DiarizeModelConfig represents diarization model configuration
type DiarizeModelConfig struct {
NCtx int32
NThreads int32
NThreadsBatch int32
NBatch int32
NUbatch int32
NSeqMax int32
NGpuLayers int32
ChatTemplatePath string
NCtx int32
NThreads int32
NThreadsBatch int32
NBatch int32
NUbatch int32
NSeqMax int32
NGpuLayers int32
ChatTemplatePath string
ChatTemplateContent string
EnableSampling bool
GrammarStr string
MaxTokens int32
EnableThinking bool
Verbose bool
QnnModelFolderPath string
QnnLibFolderPath string
EnableSampling bool
GrammarStr string
MaxTokens int32
EnableThinking bool
Verbose bool
QnnModelFolderPath string
QnnLibFolderPath string
}

func (mc DiarizeModelConfig) toCPtr() *C.ml_ModelConfig {
Expand Down Expand Up @@ -232,10 +232,10 @@ func freeDiarizeInferInput(cPtr *C.ml_DiarizeInferInput) {

// DiarizeInferOutput represents output from diarization inference
type DiarizeInferOutput struct {
Segments []DiarizeSpeechSegment
NumSpeakers int32
Duration float32
ProfileData ProfileData
Segments []DiarizeSpeechSegment
NumSpeakers int32
Duration float32
ProfileData ProfileData
}

func newDiarizeInferOutputFromCPtr(c *C.ml_DiarizeInferOutput) DiarizeInferOutput {
Expand Down
4 changes: 2 additions & 2 deletions runner/server/utils/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ func SaveURIToTempFile(uri string) (string, error) {

// Detect content type
contentType := http.DetectContentType(data)

// Convert WebP to PNG for compatibility with native SDK
if strings.HasPrefix(contentType, "image/webp") || strings.HasSuffix(strings.ToLower(u.Path), ".webp") {
img, _, err := image.Decode(bytes.NewReader(data))
Expand All @@ -109,7 +109,7 @@ func SaveURIToTempFile(uri string) (string, error) {
if exts, err := mime.ExtensionsByType(contentType); err == nil && len(exts) > 0 {
fileExt = exts[0]
}

tmpFile, err := os.CreateTemp("", "uri-*"+fileExt)
if err != nil {
return "", err
Expand Down