@@ -10,6 +10,14 @@ import (
1010 "sync"
1111 "time"
1212
13+ gogit "github.com/go-git/go-git/v5"
14+ "github.com/gobwas/glob"
15+ gitlab "gitlab.com/gitlab-org/api/client-go"
16+ "golang.org/x/oauth2"
17+ "golang.org/x/sync/errgroup"
18+ "google.golang.org/protobuf/proto"
19+ "google.golang.org/protobuf/types/known/anypb"
20+
1321 "github.com/trufflesecurity/trufflehog/v3/pkg/common"
1422 "github.com/trufflesecurity/trufflehog/v3/pkg/context"
1523 "github.com/trufflesecurity/trufflehog/v3/pkg/feature"
@@ -20,20 +28,12 @@ import (
2028 "github.com/trufflesecurity/trufflehog/v3/pkg/sanitizer"
2129 "github.com/trufflesecurity/trufflehog/v3/pkg/sources"
2230 "github.com/trufflesecurity/trufflehog/v3/pkg/sources/git"
23-
24- gogit "github.com/go-git/go-git/v5"
25- "github.com/gobwas/glob"
26- gitlab "gitlab.com/gitlab-org/api/client-go"
27- "golang.org/x/oauth2"
28- "golang.org/x/sync/errgroup"
29- "google.golang.org/protobuf/proto"
30- "google.golang.org/protobuf/types/known/anypb"
3131)
3232
3333const SourceType = sourcespb .SourceType_SOURCE_TYPE_GITLAB
3434
35- // This is the URL for gitlab hosted at gitlab.com
36- const gitlabBaseURL = "https://gitlab.com/"
35+ // Base URL for GitLab Cloud ( hosted at gitlab.com)
36+ const gitlabCloudBaseUrl = "https://gitlab.com/"
3737
3838type Source struct {
3939 name string
@@ -72,16 +72,15 @@ type Source struct {
7272 sources.CommonSourceUnitUnmarshaller
7373
7474 useAuthInUrl bool
75-
76- clonePath string
77- noCleanup bool
75+ clonePath string
76+ noCleanup bool
7877
7978 printLegacyJSON bool
8079
8180 projectsPerPage int64
8281
8382 // cache of repo URL to project info, used when generating metadata for chunks
84- repoToProjCache repoToProjectCache
83+ * projectMetadataCache
8584}
8685
8786// WithCustomContentWriter sets the useCustomContentWriter flag on the source.
@@ -242,11 +241,12 @@ func (s *Source) Init(ctx context.Context, name string, jobId sources.JobID, sou
242241 Timestamp : sanitizer .UTF8 (timestamp ),
243242 Line : line ,
244243 }
245- proj , ok := s .repoToProjCache .get (repository )
244+ // check for project metadata in the cache
245+ project , ok := s .projectMetadataCache .get (repository )
246246 if ok {
247- gitlabMetadata .ProjectId = int64 ( proj .id )
248- gitlabMetadata .ProjectName = proj .name
249- gitlabMetadata .ProjectOwner = proj .owner
247+ gitlabMetadata .ProjectId = project .id
248+ gitlabMetadata .ProjectName = project .name
249+ gitlabMetadata .ProjectOwner = project .owner
250250 }
251251
252252 return & source_metadatapb.MetaData {
@@ -260,9 +260,7 @@ func (s *Source) Init(ctx context.Context, name string, jobId sources.JobID, sou
260260 }
261261 s .git = git .NewGit (cfg )
262262
263- s .repoToProjCache = repoToProjectCache {
264- cache : make (map [string ]* project ),
265- }
263+ s .projectMetadataCache = NewProjectMetadataCache ()
266264
267265 return nil
268266}
@@ -314,8 +312,10 @@ func (s *Source) Chunks(ctx context.Context, chunksChan chan *sources.Chunk, tar
314312
315313 } else {
316314 gitlabReposEnumerated .WithLabelValues (s .name ).Set (float64 (len (repos )))
317- // ensure project details for specified repos are cached
318- // this is required to populate metadata during chunking
315+ // Ensure project details for the specified repositories are cached.
316+ // This is required to populate metadata during chunking.
317+ // Note: Repository URLs are already normalized, so the cache check
318+ // uses the normalized repo URL directly.
319319 for _ , repo := range repos {
320320 s .ensureProjectInCache (ctx , repo )
321321 }
@@ -586,7 +586,10 @@ func (s *Source) getAllProjectRepos(
586586 }
587587 // Report the unit.
588588 ctx .Logger ().V (3 ).Info ("accepting project" )
589- s .cacheGitlabProject (proj )
589+
590+ // Cache the GitLab project metadata.
591+ s .cacheGitlabProjectMetadata (ctx , proj )
592+
590593 unit := git.SourceUnit {Kind : git .UnitRepo , ID : proj .HTTPURLToRepo }
591594 gitlabReposEnumerated .WithLabelValues (s .name ).Inc ()
592595 projectsWithNamespace = append (projectsWithNamespace , proj .NameWithNamespace )
@@ -630,7 +633,7 @@ func (s *Source) getAllProjectRepos(
630633 Owned : gitlab .Ptr (false ),
631634 }
632635
633- if s .url != gitlabBaseURL {
636+ if s .url != gitlabCloudBaseUrl {
634637 listGroupsOptions .AllAvailable = gitlab .Ptr (true )
635638 }
636639
@@ -722,7 +725,7 @@ func (s *Source) getAllProjectReposV2(
722725 }
723726
724727 // for gitlab.com instance, include only projects where the user is a member.
725- if s .url == gitlabBaseURL {
728+ if s .url == gitlabCloudBaseUrl {
726729 projectQueryOptions .Membership = gitlab .Ptr (true )
727730 }
728731
@@ -783,7 +786,9 @@ func (s *Source) getAllProjectReposV2(
783786 // report the unit.
784787 projCtx .Logger ().V (3 ).Info ("accepting project" )
785788
786- s .cacheGitlabProject (project )
789+ // Cache the GitLab project metadata.
790+ s .cacheGitlabProjectMetadata (projCtx , project )
791+
787792 unit := git.SourceUnit {Kind : git .UnitRepo , ID : project .HTTPURLToRepo }
788793 gitlabReposEnumerated .WithLabelValues (s .name ).Inc ()
789794
@@ -837,7 +842,7 @@ func (s *Source) getAllProjectReposInGroups(
837842 }
838843
839844 // For non gitlab.com instances, you might want to adjust access levels
840- if s .url != gitlabBaseURL {
845+ if s .url != gitlabCloudBaseUrl {
841846 projectOpts .MinAccessLevel = gitlab .Ptr (gitlab .GuestPermissions )
842847 }
843848
@@ -899,7 +904,9 @@ func (s *Source) getAllProjectReposInGroups(
899904 // report the unit.
900905 projCtx .Logger ().V (3 ).Info ("accepting project" )
901906
902- s .cacheGitlabProject (proj )
907+ // Cache the GitLab project metadata.
908+ s .cacheGitlabProjectMetadata (projCtx , proj )
909+
903910 unit := git.SourceUnit {Kind : git .UnitRepo , ID : proj .HTTPURLToRepo }
904911 gitlabReposEnumerated .WithLabelValues (s .name ).Inc ()
905912 projectsWithNamespace = append (projectsWithNamespace , proj .NameWithNamespace )
@@ -1022,77 +1029,6 @@ func (s *Source) WithScanOptions(scanOptions *git.ScanOptions) {
10221029 s .scanOptions = scanOptions
10231030}
10241031
1025- func buildIgnorer (include , exclude []string , onCompile func (err error , pattern string )) func (repo string ) bool {
1026-
1027- // compile and load globRepoFilter
1028- globRepoFilter := newGlobRepoFilter (include , exclude , onCompile )
1029-
1030- f := func (repo string ) bool {
1031- if ! globRepoFilter .includeRepo (repo ) || globRepoFilter .ignoreRepo (repo ) {
1032- return true
1033- }
1034- return false
1035- }
1036-
1037- return f
1038- }
1039-
1040- func normalizeRepos (repos []string ) ([]string , []error ) {
1041- // Optimistically allocate space for all valid repositories.
1042- validRepos := make ([]string , 0 , len (repos ))
1043- var errs []error
1044- for _ , prj := range repos {
1045- repo , err := giturl .NormalizeGitlabRepo (prj )
1046- if err != nil {
1047- errs = append (errs , fmt .Errorf ("unable to normalize gitlab repo url %q: %w" , prj , err ))
1048- continue
1049- }
1050-
1051- validRepos = append (validRepos , repo )
1052- }
1053- return validRepos , errs
1054- }
1055-
1056- // normalizeGitlabEndpoint ensures that if an endpoint is going to gitlab.com, we use https://gitlab.com/ as the endpoint.
1057- // If we see the protocol is http, we error, because this shouldn't be used.
1058- // Otherwise, it ensures we are using https as our protocol, if none was provided.
1059- func normalizeGitlabEndpoint (gitlabEndpoint string ) (string , error ) {
1060- if gitlabEndpoint == "" {
1061- return gitlabBaseURL , nil
1062- }
1063-
1064- gitlabURL , err := url .Parse (gitlabEndpoint )
1065- if err != nil {
1066- return "" , err
1067- }
1068-
1069- // We probably didn't receive a URL with a scheme, which messed up the parsing.
1070- if gitlabURL .Host == "" {
1071- gitlabURL , err = url .Parse ("https://" + gitlabEndpoint )
1072- if err != nil {
1073- return "" , err
1074- }
1075- }
1076-
1077- // If the host is gitlab.com, this is the cloud version, which has only one valid endpoint.
1078- if gitlabURL .Host == "gitlab.com" {
1079- return gitlabBaseURL , nil
1080- }
1081-
1082- // Beyond here, on-prem gitlab is being used, so we have to mostly leave things as-is.
1083-
1084- if gitlabURL .Scheme != "https" {
1085- return "" , fmt .Errorf ("https was not used as URL scheme, but is required. Please use https" )
1086- }
1087-
1088- // The gitlab library wants trailing slashes.
1089- if ! strings .HasSuffix (gitlabURL .Path , "/" ) {
1090- gitlabURL .Path = gitlabURL .Path + "/"
1091- }
1092-
1093- return gitlabURL .String (), nil
1094- }
1095-
10961032// Enumerate reports all GitLab repositories to be scanned to the reporter. If
10971033// none are configured, it will find all repositories within all projects that
10981034// the configured user has access to, while respecting the configured ignore
@@ -1191,23 +1127,25 @@ func (s *Source) ChunkUnit(ctx context.Context, unit sources.SourceUnit, reporte
11911127 return s .git .ScanRepo (ctx , repo , path , s .scanOptions , reporter )
11921128}
11931129
1194- // ensureProjectInCache checks if the project for the given repo URL is in the cache,
1195- // and if not, queries the GitLab API to fetch the project and adds it to the cache.
1196- func (s * Source ) ensureProjectInCache (ctx context.Context , repoUrl string ) {
1197- // check if project is already in cache
1198- if _ , ok := s .repoToProjCache .get (repoUrl ); ok {
1130+ // ensureProjectInCache ensures that the project for the given repository URL
1131+ // exists in the cache. If not, it fetches the project from the GitLab API
1132+ // and stores it in the cache.
1133+ func (s * Source ) ensureProjectInCache (ctx context.Context , repoURL string ) {
1134+ // Check if the project is already cached.
1135+ if _ , ok := s .projectMetadataCache .get (repoURL ); ok {
1136+ ctx .Logger ().V (5 ).Info ("cache hit: found project metadata in the cache" , "cache_key" , repoURL )
11991137 return
12001138 }
12011139
1202- // query project
1203- proj , err := s .getGitlabProject (ctx , repoUrl )
1140+ // Fetch the project from GitLab.
1141+ project , err := s .getGitlabProject (ctx , repoURL )
12041142 if err != nil {
1205- ctx .Logger ().Error (err , "could not fetch project for repo " , "repo" , repoUrl )
1143+ ctx .Logger ().Error (err , "failed to fetch GitLab project " , "repo" , repoURL )
12061144 return
12071145 }
12081146
1209- // add to cache
1210- s .cacheGitlabProject ( proj )
1147+ // Cache the project metadata.
1148+ s .cacheGitlabProjectMetadata ( ctx , project )
12111149}
12121150
12131151func (s * Source ) getGitlabProject (ctx context.Context , repoUrl string ) (* gitlab.Project , error ) {
@@ -1230,16 +1168,100 @@ func (s *Source) getGitlabProject(ctx context.Context, repoUrl string) (*gitlab.
12301168 return proj , nil
12311169}
12321170
1233- func (s * Source ) cacheGitlabProject (gitlabProj * gitlab.Project ) {
1171+ // cacheGitlabProjectMetadata caches GitLab project metadata keyed by the
1172+ // normalized GitLab repository URL.
1173+ func (s * Source ) cacheGitlabProjectMetadata (ctx context.Context , glProject * gitlab.Project ) {
12341174 proj := & project {
1235- id : gitlabProj .ID ,
1236- name : gitlabProj .NameWithNamespace ,
1175+ id : int64 (glProject .ID ),
1176+ name : glProject .NameWithNamespace ,
1177+ }
1178+
1179+ if glProject .Owner != nil {
1180+ if email := glProject .Owner .Email ; email != "" {
1181+ proj .owner = email
1182+ } else {
1183+ proj .owner = glProject .Owner .Username
1184+ }
12371185 }
1238- if gitlabProj .Owner != nil {
1239- proj .owner = gitlabProj .Owner .Email
1240- if proj .owner == "" {
1241- proj .owner = gitlabProj .Owner .Username
1186+
1187+ repoURL , err := giturl .NormalizeGitlabRepo (glProject .HTTPURLToRepo )
1188+ if err != nil {
1189+ ctx .Logger ().Error (err , "failed to normalize GitLab Repo" , "repo" , glProject .HTTPURLToRepo )
1190+ return
1191+ }
1192+
1193+ ctx .Logger ().V (5 ).Info ("cache set: added project metadata in the cache" , "cache_key" , repoURL )
1194+ s .projectMetadataCache .set (repoURL , proj )
1195+ }
1196+
1197+ func buildIgnorer (include , exclude []string , onCompile func (err error , pattern string )) func (repo string ) bool {
1198+
1199+ // compile and load globRepoFilter
1200+ globRepoFilter := newGlobRepoFilter (include , exclude , onCompile )
1201+
1202+ f := func (repo string ) bool {
1203+ if ! globRepoFilter .includeRepo (repo ) || globRepoFilter .ignoreRepo (repo ) {
1204+ return true
12421205 }
1206+ return false
12431207 }
1244- s .repoToProjCache .set (gitlabProj .HTTPURLToRepo , proj )
1208+
1209+ return f
1210+ }
1211+
1212+ // normalizeRepos convert the repo urls from https://gitlab.com/org/repo -> https://gitlab.com/org/repo.git
1213+ func normalizeRepos (repos []string ) ([]string , []error ) {
1214+ // Optimistically allocate space for all valid repositories.
1215+ validRepos := make ([]string , 0 , len (repos ))
1216+ var errs []error
1217+ for _ , prj := range repos {
1218+ repo , err := giturl .NormalizeGitlabRepo (prj )
1219+ if err != nil {
1220+ errs = append (errs , fmt .Errorf ("unable to normalize gitlab repo url %q: %w" , prj , err ))
1221+ continue
1222+ }
1223+
1224+ validRepos = append (validRepos , repo )
1225+ }
1226+ return validRepos , errs
1227+ }
1228+
1229+ // normalizeGitlabEndpoint ensures that if an endpoint is going to gitlab.com, we use https://gitlab.com/ as the endpoint.
1230+ // If we see the protocol is http, we error, because this shouldn't be used.
1231+ // Otherwise, it ensures we are using https as our protocol, if none was provided.
1232+ func normalizeGitlabEndpoint (gitlabEndpoint string ) (string , error ) {
1233+ if gitlabEndpoint == "" {
1234+ return gitlabCloudBaseUrl , nil
1235+ }
1236+
1237+ gitlabURL , err := url .Parse (gitlabEndpoint )
1238+ if err != nil {
1239+ return "" , err
1240+ }
1241+
1242+ // We probably didn't receive a URL with a scheme, which messed up the parsing.
1243+ if gitlabURL .Host == "" {
1244+ gitlabURL , err = url .Parse ("https://" + gitlabEndpoint )
1245+ if err != nil {
1246+ return "" , err
1247+ }
1248+ }
1249+
1250+ // If the host is gitlab.com, this is the cloud version, which has only one valid endpoint.
1251+ if gitlabURL .Host == "gitlab.com" {
1252+ return gitlabCloudBaseUrl , nil
1253+ }
1254+
1255+ // Beyond here, on-prem gitlab is being used, so we have to mostly leave things as-is.
1256+
1257+ if gitlabURL .Scheme != "https" {
1258+ return "" , fmt .Errorf ("https was not used as URL scheme, but is required. Please use https" )
1259+ }
1260+
1261+ // The gitlab library wants trailing slashes.
1262+ if ! strings .HasSuffix (gitlabURL .Path , "/" ) {
1263+ gitlabURL .Path = gitlabURL .Path + "/"
1264+ }
1265+
1266+ return gitlabURL .String (), nil
12451267}
0 commit comments