Skip to content

Commit 0cbbeef

Browse files
authored
feat(migrate/python): transform preserve regexes into keep entries (#3888)
Finds all files that would currently be preserved, and adds those specific files to the keep list, with a path relative to the library's output directory. While we may eventually want to make librarian's keep list more flexible (using globs or regexes) for now it needs to be specific files (which must exist).
1 parent 529001f commit 0cbbeef

File tree

9 files changed

+277
-4
lines changed

9 files changed

+277
-4
lines changed

tool/cmd/migrate/legacylibrarian.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,10 @@ func buildConfigFromLibrarian(ctx context.Context, input *MigrationInput) (*conf
114114
}
115115

116116
if input.lang == "python" {
117-
cfg.Libraries = buildPythonLibraries(input)
117+
cfg.Libraries, err = buildPythonLibraries(input)
118+
if err != nil {
119+
return nil, err
120+
}
118121
cfg.Default.Output = "packages"
119122
cfg.Default.ReleaseLevel = "stable"
120123
cfg.Default.Transport = "grpc+rest"

tool/cmd/migrate/legacylibrarian_test.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ func TestBuildConfigFromLibrarian(t *testing.T) {
9292
for _, test := range []struct {
9393
name string
9494
lang string
95+
repoPath string
9596
state *legacyconfig.LibrarianState
9697
cfg *legacyconfig.LibrarianConfig
9798
fetchSource func(ctx context.Context) (*config.Source, error)
@@ -252,13 +253,33 @@ func TestBuildConfigFromLibrarian(t *testing.T) {
252253
},
253254
wantErr: errFetchSource,
254255
},
256+
{
257+
// This is just one example of how python migration can fail,
258+
// used to check that when it does, the error is propagated.
259+
name: "python migration fails - source root doesn't exist",
260+
lang: "python",
261+
state: &legacyconfig.LibrarianState{
262+
Libraries: []*legacyconfig.LibraryState{
263+
{
264+
ID: "example-library",
265+
Version: "1.0.0",
266+
SourceRoots: []string{"packages/non-existent"},
267+
PreserveRegex: []string{"docs/CHANGELOG.md"},
268+
},
269+
},
270+
},
271+
cfg: &legacyconfig.LibrarianConfig{},
272+
fetchSource: defaultFetchSource,
273+
wantErr: os.ErrNotExist,
274+
},
255275
} {
256276
t.Run(test.name, func(t *testing.T) {
257277
fetchSource = test.fetchSource
258278
input := &MigrationInput{
259279
librarianState: test.state,
260280
librarianConfig: test.cfg,
261281
lang: test.lang,
282+
repoPath: test.repoPath,
262283
}
263284
got, err := buildConfigFromLibrarian(t.Context(), input)
264285
if test.wantErr != nil {

tool/cmd/migrate/python.go

Lines changed: 130 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,21 @@
1515
package main
1616

1717
import (
18+
"fmt"
19+
"io/fs"
20+
"path/filepath"
21+
"regexp"
22+
"slices"
23+
"strings"
24+
1825
"github.com/googleapis/librarian/internal/config"
26+
"github.com/googleapis/librarian/internal/legacylibrarian/legacyconfig"
1927
)
2028

2129
// buildPythonLibraries builds a set of librarian libraries from legacylibrarian
2230
// libraries and the googleapis directory used to find settings in service
2331
// config files, BUILD.bazel files etc.
24-
func buildPythonLibraries(input *MigrationInput) []*config.Library {
32+
func buildPythonLibraries(input *MigrationInput) ([]*config.Library, error) {
2533
var libraries []*config.Library
2634
// No need to use legacyconfig.LibraryConfig - the only thing in
2735
// the python config is a single global file entry.
@@ -34,7 +42,127 @@ func buildPythonLibraries(input *MigrationInput) []*config.Library {
3442
if libState.APIs != nil {
3543
library.APIs = toAPIs(libState.APIs)
3644
}
45+
// Convert "preserve" regexes into "keep" paths, sorted for ease
46+
// of testing.
47+
keep, err := transformPreserveToKeep(input.repoPath, libState)
48+
if err != nil {
49+
return nil, err
50+
}
51+
slices.Sort(keep)
52+
library.Keep = keep
53+
3754
libraries = append(libraries, library)
3855
}
39-
return libraries
56+
return libraries, nil
57+
}
58+
59+
// transformPreserveToKeep converts the "preserve" entries in a legacylibrarian
60+
// state file into "keep" entries in a new library config. Differences:
61+
// - Preserve entries are repo-root-relative; keep entries are
62+
// library-output-relative.
63+
// - Preserve entries are regular expressions; keep entries are just filenames.
64+
// - Preserve entries don't have to match anything; keep entries must exist.
65+
//
66+
// transformPreserveToKeep finds all files which would *currently* be kept, and
67+
// creates a keep entry for each of them.
68+
func transformPreserveToKeep(rootDir string, libState *legacyconfig.LibraryState) ([]string, error) {
69+
if len(libState.PreserveRegex) == 0 {
70+
return nil, nil
71+
}
72+
if len(libState.SourceRoots) != 1 {
73+
return nil, fmt.Errorf("cannot migrate %s with %d source roots and %d preserve regexes", libState.ID, len(libState.SourceRoots), len(libState.PreserveRegex))
74+
}
75+
sourceRoot := libState.SourceRoots[0]
76+
77+
// relPaths contains a list of files in the source root, but relative to
78+
// rootDir.
79+
relPaths, err := findSubDirRelPaths(rootDir, filepath.Join(rootDir, sourceRoot))
80+
if err != nil {
81+
return nil, err
82+
}
83+
84+
// Apply all the preserve regular expressions to all the files we've found,
85+
// adding each match to the keep list.
86+
preserveRegexps, err := compileRegexps(libState.PreserveRegex)
87+
if err != nil {
88+
return nil, err
89+
}
90+
var keepPaths []string
91+
for _, path := range filterPathsByRegex(relPaths, preserveRegexps) {
92+
relative, err := filepath.Rel(sourceRoot, path)
93+
if err != nil {
94+
// This should not happen given the logic in findSubDirRelPaths
95+
return nil, fmt.Errorf("could not make path %q relative to %q: %w", path, sourceRoot, err)
96+
}
97+
keepPaths = append(keepPaths, relative)
98+
}
99+
return keepPaths, nil
100+
}
101+
102+
// compileRegexps takes a slice of string patterns and compiles each one into a
103+
// regular expression. It returns a slice of compiled regexps or an error if any
104+
// pattern is invalid.
105+
// This function is copied from legacylibrarian. Neither this code nor
106+
// legacylibrarian is expected to be long-lived, so it's not worth refactoring
107+
// for reuse without duplication.
108+
func compileRegexps(patterns []string) ([]*regexp.Regexp, error) {
109+
var regexps []*regexp.Regexp
110+
for _, pattern := range patterns {
111+
re, err := regexp.Compile(pattern)
112+
if err != nil {
113+
return nil, fmt.Errorf("invalid regex %q: %w", pattern, err)
114+
}
115+
regexps = append(regexps, re)
116+
}
117+
return regexps, nil
118+
}
119+
120+
// findSubDirRelPaths walks the subDir tree returns a slice of all file and
121+
// directory paths relative to the dir. This is repeated for all nested
122+
// directories. subDir must be under or the same as dir.
123+
// This function is copied from legacylibrarian. Neither this code nor
124+
// legacylibrarian is expected to be long-lived, so it's not worth refactoring
125+
// for reuse without duplication.
126+
func findSubDirRelPaths(dir, subDir string) ([]string, error) {
127+
dirRelPath, err := filepath.Rel(dir, subDir)
128+
if err != nil {
129+
return nil, fmt.Errorf("cannot establish the relationship between %s and %s: %w", dir, subDir, err)
130+
}
131+
// '..' signifies that the subDir exists outside of dir
132+
if strings.HasPrefix(dirRelPath, "..") {
133+
return nil, fmt.Errorf("subDir is not nested within the dir: %s, %s", subDir, dir)
134+
}
135+
136+
var paths []string
137+
err = filepath.WalkDir(subDir, func(path string, d fs.DirEntry, err error) error {
138+
if err != nil {
139+
return err
140+
}
141+
// error is ignored as we have confirmed that subDir is child or equal to rootDir
142+
relPath, _ := filepath.Rel(dir, path)
143+
// Special case when subDir is equal to dir. Drop the "." as it references itself
144+
if relPath != "." {
145+
paths = append(paths, relPath)
146+
}
147+
return nil
148+
})
149+
return paths, err
150+
}
151+
152+
// filterPathsByRegex returns a new slice containing only the paths from the
153+
// input slice that match at least one of the provided regular expressions.
154+
// This function is copied from legacylibrarian. Neither this code nor
155+
// legacylibrarian is expected to be long-lived, so it's not worth refactoring
156+
// for reuse without duplication.
157+
func filterPathsByRegex(paths []string, regexps []*regexp.Regexp) []string {
158+
var filtered []string
159+
for _, path := range paths {
160+
for _, re := range regexps {
161+
if re.MatchString(path) {
162+
filtered = append(filtered, path)
163+
break
164+
}
165+
}
166+
}
167+
return filtered
40168
}

tool/cmd/migrate/python_test.go

Lines changed: 118 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,12 +59,129 @@ func TestBuildPythonLibraries(t *testing.T) {
5959
},
6060
},
6161
},
62+
{
63+
name: "keep paths",
64+
input: &MigrationInput{
65+
repoPath: "testdata/google-cloud-python",
66+
librarianState: &legacyconfig.LibrarianState{
67+
Libraries: []*legacyconfig.LibraryState{
68+
{
69+
ID: "google-cloud-secret-manager",
70+
SourceRoots: []string{"packages/google-cloud-secret-manager"},
71+
PreserveRegex: []string{
72+
"packages/google-cloud-secret-manager/CHANGELOG.md",
73+
"docs/CHANGELOG.md",
74+
},
75+
},
76+
},
77+
},
78+
librarianConfig: &legacyconfig.LibrarianConfig{},
79+
},
80+
want: []*config.Library{
81+
{
82+
Name: "google-cloud-secret-manager",
83+
Keep: []string{
84+
"CHANGELOG.md",
85+
"docs/CHANGELOG.md",
86+
},
87+
},
88+
},
89+
},
6290
} {
6391
t.Run(test.name, func(t *testing.T) {
64-
got := buildPythonLibraries(test.input)
92+
got, err := buildPythonLibraries(test.input)
93+
if err != nil {
94+
t.Fatal(err)
95+
}
6596
if diff := cmp.Diff(test.want, got); diff != "" {
6697
t.Errorf("mismatch (-want +got):\n%s", diff)
6798
}
6899
})
69100
}
70101
}
102+
103+
func TestBuildPythonLibraries_Error(t *testing.T) {
104+
for _, test := range []struct {
105+
name string
106+
input *MigrationInput
107+
}{
108+
{
109+
name: "preserve regex but no source roots",
110+
input: &MigrationInput{
111+
librarianState: &legacyconfig.LibrarianState{
112+
Libraries: []*legacyconfig.LibraryState{
113+
{
114+
ID: "google-cloud-secret-manager",
115+
PreserveRegex: []string{
116+
"packages/google-cloud-secret-manager/CHANGELOG.md",
117+
"docs/CHANGELOG.md",
118+
},
119+
},
120+
},
121+
},
122+
librarianConfig: &legacyconfig.LibrarianConfig{},
123+
},
124+
},
125+
{
126+
name: "invalid preserve regex",
127+
input: &MigrationInput{
128+
repoPath: "testdata/google-cloud-python",
129+
librarianState: &legacyconfig.LibrarianState{
130+
Libraries: []*legacyconfig.LibraryState{
131+
{
132+
ID: "google-cloud-secret-manager",
133+
SourceRoots: []string{"packages/google-cloud-secret-manager"},
134+
PreserveRegex: []string{
135+
"*",
136+
},
137+
},
138+
},
139+
},
140+
librarianConfig: &legacyconfig.LibrarianConfig{},
141+
},
142+
},
143+
{
144+
name: "source root doesn't exist",
145+
input: &MigrationInput{
146+
repoPath: "testdata/google-cloud-python",
147+
librarianState: &legacyconfig.LibrarianState{
148+
Libraries: []*legacyconfig.LibraryState{
149+
{
150+
ID: "google-cloud-secret-manager",
151+
SourceRoots: []string{"packages/missing"},
152+
PreserveRegex: []string{
153+
"*",
154+
},
155+
},
156+
},
157+
},
158+
librarianConfig: &legacyconfig.LibrarianConfig{},
159+
},
160+
},
161+
{
162+
name: "source root isn't in root",
163+
input: &MigrationInput{
164+
repoPath: "testdata/google-cloud-python",
165+
librarianState: &legacyconfig.LibrarianState{
166+
Libraries: []*legacyconfig.LibraryState{
167+
{
168+
ID: "google-cloud-secret-manager",
169+
SourceRoots: []string{"../get-git-commit"},
170+
PreserveRegex: []string{
171+
"*",
172+
},
173+
},
174+
},
175+
},
176+
librarianConfig: &legacyconfig.LibrarianConfig{},
177+
},
178+
},
179+
} {
180+
t.Run(test.name, func(t *testing.T) {
181+
_, err := buildPythonLibraries(test.input)
182+
if err == nil {
183+
t.Errorf("expected error; got none")
184+
}
185+
})
186+
}
187+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# Placeholder file.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# Placeholder file.

tool/cmd/migrate/testdata/google-cloud-python/packages/google-cloud-secret-manager/docs/generated.rst

Whitespace-only changes.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# Placeholder file.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# Placeholder file.

0 commit comments

Comments
 (0)