Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions pkg/cmd/kitimport/hfimport.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ import (
)

func importUsingHF(ctx context.Context, opts *importOptions) error {
// Handle full HF URLs by extracting repository name from URL
repo, err := extractRepoFromURL(opts.repo)
// Parse HuggingFace repository URL to extract repo name and type
repo, repoType, err := hf.ParseHuggingFaceRepo(opts.repo)
if err != nil {
Comment on lines +39 to 41

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Use dataset resolve URLs when repoType is dataset

Issue Statement: kit import now accepts dataset repos via ParseHuggingFaceRepo, but downloads still use the model resolve URL so dataset imports fail. Location: pkg/cmd/kitimport/hfimport.go:39-41. Investigation Summary: I traced the repoType parsed here through importUsingHF and confirmed hf.DownloadFiles in pkg/lib/hf/download.go still hardcodes https://huggingface.co/%s/resolve/... with no /datasets/ variant and no other dataset-specific download path exists. Trigger Conditions: using a HuggingFace dataset repo such as datasets/org/repo or https://huggingface.co/datasets/... so repoType is RepoTypeDataset. Impact Statement: file listing succeeds but download requests return 404, causing the import to abort and making dataset imports unusable.

Useful? React with 👍 / 👎.

return fmt.Errorf("could not process URL %s: %w", opts.repo, err)
return fmt.Errorf("could not process repository %s: %w", opts.repo, err)
}

tmpDir, cleanupTmp, err := cache.MkCacheDir("import", "")
Expand All @@ -53,7 +53,7 @@ func importUsingHF(ctx context.Context, opts *importOptions) error {
}
}()

dirListing, err := hf.ListFiles(ctx, repo, opts.repoRef, opts.token)
dirListing, err := hf.ListFiles(ctx, repo, opts.repoRef, opts.token, repoType)
if err != nil {
return fmt.Errorf("failed to list files from HuggingFace API: %w", err)
}
Expand Down
260 changes: 226 additions & 34 deletions pkg/cmd/kitinit/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,31 +23,52 @@ import (
"io/fs"
"os"
"path/filepath"
"strings"
"syscall"

"github.com/kitops-ml/kitops/pkg/artifact"
"github.com/kitops-ml/kitops/pkg/lib/constants"
"github.com/kitops-ml/kitops/pkg/lib/hf"
kfgen "github.com/kitops-ml/kitops/pkg/lib/kitfile/generate"
"github.com/kitops-ml/kitops/pkg/lib/util"
"github.com/kitops-ml/kitops/pkg/output"

"github.com/spf13/cobra"
"golang.org/x/term"
)

const (
shortDesc = `Generate a Kitfile for the contents of a directory`
longDesc = `Examine the contents of a directory and attempt to generate a basic Kitfile
based on common file formats. Any files whose type (i.e. model, dataset, etc.)
cannot be determined will be included in a code layer.
shortDesc = `Generate a Kitfile for the contents of a directory or remote repository`
longDesc = `Examine the contents of a directory or remote repository and attempt to generate
a basic Kitfile based on common file formats. Any files whose type (i.e. model,
dataset, etc.) cannot be determined will be included in a code layer.

For local directories, the generated Kitfile is saved in the target directory.
For remote repositories (e.g. HuggingFace), the Kitfile is printed to stdout
or saved to a path specified with --output.

By default the command will prompt for input for a name and description for the Kitfile.`

By default the command will prompt for input for a name and description for the Kitfile`
example = `# Generate a Kitfile for the current directory:
kit init .

# Generate a Kitfile for files in ./my-model, with name "mymodel" and a description:
kit init ./my-model --name "mymodel" --desc "This is my model's description"

# Generate a Kitfile, overwriting any existing Kitfile:
kit init ./my-model --force`
kit init ./my-model --force

# Generate a Kitfile for a remote HuggingFace model:
kit init https://huggingface.co/myorg/mymodel

# Generate a Kitfile for a HuggingFace dataset:
kit init huggingface.co/datasets/myorg/mydataset

# Generate a Kitfile for a remote repository with a specific ref:
kit init myorg/mymodel --ref v1.0

# Save the generated Kitfile to a specific path:
kit init myorg/mymodel --output ./Kitfile`
)

type initOptions struct {
Expand All @@ -57,6 +78,14 @@ type initOptions struct {
modelkitDescription string
modelkitAuthor string
overwrite bool
// Remote repository options
repoRef string
token string
outputPath string
// Computed fields
isRemote bool
repo string
repoType hf.RepositoryType
}

func InitCommand() *cobra.Command {
Expand All @@ -75,6 +104,9 @@ func InitCommand() *cobra.Command {
cmd.Flags().StringVar(&opts.modelkitDescription, "desc", "", "Description for the ModelKit")
cmd.Flags().StringVar(&opts.modelkitAuthor, "author", "", "Author for the ModelKit")
cmd.Flags().BoolVarP(&opts.overwrite, "force", "f", false, "Overwrite existing Kitfile if present")
cmd.Flags().StringVar(&opts.repoRef, "ref", "main", "Branch or tag to use for remote repositories")
cmd.Flags().StringVar(&opts.token, "token", "", "Token for authentication with remote repositories")
cmd.Flags().StringVarP(&opts.outputPath, "output", "o", "", "Output path for generated Kitfile (default: stdout for remotes, Kitfile in directory for local)")
cmd.Flags().SortFlags = false
return cmd
}
Expand All @@ -85,46 +117,126 @@ func runCommand(opts *initOptions) func(*cobra.Command, []string) error {
return output.Fatalf("Invalid arguments: %s", err)
}

var modelPackage *artifact.Package
if opts.modelkitName != "" || opts.modelkitDescription != "" {
modelPackage = &artifact.Package{
Name: opts.modelkitName,
Description: opts.modelkitDescription,
}
if opts.isRemote {
return runRemoteInit(cmd.Context(), opts)
}
if opts.modelkitAuthor != "" {
modelPackage.Authors = append(modelPackage.Authors, opts.modelkitAuthor)
return runLocalInit(opts)
}
}

func runLocalInit(opts *initOptions) error {
var modelPackage *artifact.Package
if opts.modelkitName != "" || opts.modelkitDescription != "" {
modelPackage = &artifact.Package{
Name: opts.modelkitName,
Description: opts.modelkitDescription,
}
}
if opts.modelkitAuthor != "" {
if modelPackage == nil {
modelPackage = &artifact.Package{}
}
modelPackage.Authors = append(modelPackage.Authors, opts.modelkitAuthor)
}

kitfilePath := opts.outputPath
if kitfilePath == "" {
kitfilePath = filepath.Join(opts.path, constants.DefaultKitfileName)
}

if _, err := os.Stat(kitfilePath); err == nil {
if !opts.overwrite {
return output.Fatalf("Kitfile already exists at %s. Use '--force' to overwrite", kitfilePath)
}
} else if !errors.Is(err, fs.ErrNotExist) {
return output.Fatalf("Error checking for existing Kitfile: %s", err)
}

dirContents, err := kfgen.DirectoryListingFromFS(opts.path)
if err != nil {
return output.Fatalf("Error processing directory: %s", err)
}
kitfile, err := kfgen.GenerateKitfile(dirContents, modelPackage)
if err != nil {
return output.Fatalf("Error generating Kitfile: %s", err)
}
bytes, err := kitfile.MarshalToYAML()
if err != nil {
return output.Fatalf("Error formatting Kitfile: %s", err)
}
if err := os.WriteFile(kitfilePath, bytes, 0644); err != nil {
return output.Fatalf("Failed to write Kitfile: %s", err)
}
output.Infof("Generated Kitfile:\n\n%s", string(bytes))
output.Infof("Saved to path '%s'", kitfilePath)
return nil
}

func runRemoteInit(ctx context.Context, opts *initOptions) error {
if opts.outputPath == "" {
output.SystemInfof("Fetching file listing from remote repository %s (ref: %s)", opts.repo, opts.repoRef)
} else {
output.Infof("Fetching file listing from remote repository %s (ref: %s)", opts.repo, opts.repoRef)
}

// Check for existing Kitfile
kitfilePath := filepath.Join(opts.path, constants.DefaultKitfileName)
if _, err := os.Stat(kitfilePath); err == nil {
dirContents, err := hf.ListFiles(ctx, opts.repo, opts.repoRef, opts.token, opts.repoType)
if err != nil {
return output.Fatalf("Error fetching remote repository: %s", err)
}

modelPackage := buildPackageFromRepo(opts.repo, opts.modelkitName, opts.modelkitDescription, opts.modelkitAuthor)

kitfile, err := kfgen.GenerateKitfile(dirContents, modelPackage)
if err != nil {
return output.Fatalf("Error generating Kitfile: %s", err)
}
bytes, err := kitfile.MarshalToYAML()
if err != nil {
return output.Fatalf("Error formatting Kitfile: %s", err)
}

if opts.outputPath != "" {
if _, err := os.Stat(opts.outputPath); err == nil {
if !opts.overwrite {
return output.Fatalf("Kitfile already exists at %s. Use '--force' to overwrite", kitfilePath)
return output.Fatalf("File already exists at %s. Use '--force' to overwrite", opts.outputPath)
}
} else if !errors.Is(err, fs.ErrNotExist) {
return output.Fatalf("Error checking for existing Kitfile: %s", err)
return output.Fatalf("Error checking for existing file: %s", err)
}

dirContents, err := kfgen.DirectoryListingFromFS(opts.path)
if err != nil {
return output.Fatalf("Error processing directory: %s", err)
}
kitfile, err := kfgen.GenerateKitfile(dirContents, modelPackage)
if err != nil {
return output.Fatalf("Error generating Kitfile: %s", err)
}
bytes, err := kitfile.MarshalToYAML()
if err != nil {
return output.Fatalf("Error formatting Kitfile: %s", err)
}
if err := os.WriteFile(kitfilePath, bytes, 0644); err != nil {
if err := os.WriteFile(opts.outputPath, bytes, 0644); err != nil {
return output.Fatalf("Failed to write Kitfile: %s", err)
}
output.Infof("Generated Kitfile:\n\n%s", string(bytes))
output.Infof("Saved to path '%s'", kitfilePath)
return nil
output.Infof("Saved to path '%s'", opts.outputPath)
} else {
fmt.Print(string(bytes))
}

return nil
}

func buildPackageFromRepo(repo, name, description, author string) *artifact.Package {
sections := strings.Split(repo, "/")
modelPackage := &artifact.Package{}

if name != "" {
modelPackage.Name = name
} else if len(sections) >= 2 {
modelPackage.Name = sections[len(sections)-1]
}

if description != "" {
modelPackage.Description = description
}

if author != "" {
modelPackage.Authors = append(modelPackage.Authors, author)
} else if len(sections) >= 2 {
modelPackage.Authors = append(modelPackage.Authors, sections[len(sections)-2])
}

return modelPackage
}

func (opts *initOptions) complete(ctx context.Context, args []string) error {
Expand All @@ -134,7 +246,27 @@ func (opts *initOptions) complete(ctx context.Context, args []string) error {
}
opts.configHome = configHome
opts.path = args[0]
if opts.path == "~" || strings.HasPrefix(opts.path, "~/") || strings.HasPrefix(opts.path, "~\\") {
home, err := os.UserHomeDir()
if err != nil {
return err
}
if opts.path == "~" {
opts.path = home
} else {
opts.path = filepath.Join(home, opts.path[2:])
}
}

opts.isRemote, opts.repo, opts.repoType = detectRemoteRepo(opts.path)

if opts.isRemote {
return opts.completeRemote()
}
return opts.completeLocal()
}

func (opts *initOptions) completeLocal() error {
if util.IsInteractiveSession() {
if opts.modelkitName == "" {
name, err := util.PromptForInput("Enter a name for the ModelKit: ", false)
Expand All @@ -158,6 +290,66 @@ func (opts *initOptions) complete(ctx context.Context, args []string) error {
opts.modelkitAuthor = author
}
}
return nil
}

func (opts *initOptions) completeRemote() error {
// For remote repos, only prompt if:
// 1. stdin is a terminal (interactive session)
// 2. stdout is a terminal (not redirected with >)
// 3. no output path specified (output goes to stdout)
stdoutIsTerminal := term.IsTerminal(int(syscall.Stdout))
if util.IsInteractiveSession() && stdoutIsTerminal && opts.outputPath == "" {
if opts.modelkitDescription == "" {
desc, err := util.PromptForInput("Enter a short description for the ModelKit: ", false)
if err != nil {
return err
}
opts.modelkitDescription = desc
}
}
return nil
}

func detectRemoteRepo(path string) (isRemote bool, repo string, repoType hf.RepositoryType) {
// First, check if this looks like an explicit local filesystem path
// Local paths include: ".", "..", paths starting with "./", "../", "/", or containing backslashes
if isLocalPath(path) {
return false, "", hf.RepoTypeUnknown
}

// Check if the path exists on the filesystem
// This handles cases like "models/my-model" which could be either local or remote
if _, err := os.Stat(path); err == nil {
// Path exists locally - treat as local
Comment on lines +321 to +324

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Handle os.Stat errors as local in remote detection

Issue Statement: detectRemoteRepo ignores os.Stat errors other than nil, so local paths that exist but are unreadable are misclassified as remote. Location: pkg/cmd/kitinit/cmd.go:321-324. Investigation Summary: I inspected detectRemoteRepo, complete, and runLocalInit and found no other permission/error handling before the HF parse; the code only treats the path as local when os.Stat succeeds. Trigger Conditions: a relative path like private/model (no ./ prefix) that exists but returns a permission/IO error on stat. Impact Statement: kit init will call the HuggingFace API instead of surfacing the local filesystem error, leading to confusing failures or unintended remote output.

Useful? React with 👍 / 👎.

return false, "", hf.RepoTypeUnknown
}

// Path doesn't exist locally - try to parse as a HuggingFace URL/repo
if repo, repoType, err := hf.ParseHuggingFaceRepo(path); err == nil {
return true, repo, repoType
}

return false, "", hf.RepoTypeUnknown
}

func isLocalPath(path string) bool {
// Check for common local path patterns
if path == "." || path == ".." {
return true
}
if strings.HasPrefix(path, "./") || strings.HasPrefix(path, "../") {
return true
}
if path == "~" || strings.HasPrefix(path, "~/") || strings.HasPrefix(path, "~\\") {
return true
}
if strings.HasPrefix(path, "/") {
return true
}
// Windows-style paths
if strings.HasPrefix(path, "\\") || (len(path) >= 2 && path[1] == ':') {
return true
}
return false
}
Loading