From 720de5d6cfa848e56f57ebdb40ffe4e9b00e2b25 Mon Sep 17 00:00:00 2001 From: sergeyb Date: Wed, 17 Jun 2026 22:43:13 +0000 Subject: [PATCH] refactor(storage)!: sharpen Get and List contracts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two contract clarifications on the Storage interface, both addressing abstraction leaks where the documented behavior diverged from what implementations actually did. Get returns DownloadResponse by value, not by pointer. The pointer carried no extra information — absence is signaled by NotFoundError, not by a nil response — yet every caller defensively checked `resp == nil` and `resp.ReadCloser == nil`, both dead code under any conforming implementation. The interface now documents that on a nil error the ReadCloser is non-nil and caller-owned, and the redundant checks are removed. List has literal-string-prefix semantics instead of "directory prefix". The original wording was ambiguous and the two bundled implementations diverged: memstorage did strings.HasPrefix while disk did filepath.Walk on a real subdirectory, so the contracts only agreed when the caller passed a full slash-segmented path ending in "/". The interface now mandates strings.HasPrefix semantics — the natural primitive for S3/GCS/in-memory backends — and disk.List walks the longest slash-bounded prefix and filters by HasPrefix, preserving cheap walks for the common case while honoring partial-segment prefixes. Storage gains an interface-level comment stating keys are opaque and the interface has no concept of paths or directories; any structure is a caller convention. Breaking change: out-of-tree implementations of Storage must update Get to return a value and List to do literal-prefix matching. All in-tree implementations (memstorage, disk), callers, mocks, and tests are updated. Co-Authored-By: Claude Opus 4.7 --- controller/getchangedtargets.go | 3 -- controller/getchangedtargets_test.go | 65 ++++++++++-------------- controller/gettargetgraph.go | 4 -- controller/gettargetgraph_test.go | 30 +++++------ core/storage/changedtargetsreader.go | 3 -- core/storage/disk/disk.go | 30 +++++++---- core/storage/disk/disk_test.go | 34 ++++++++++++- core/storage/graphreader.go | 3 -- core/storage/memstorage.go | 10 ++-- core/storage/storage.go | 22 ++++++-- core/storage/storagemock/storagemock.go | 36 ++++++------- orchestrator/native_orchestrator_test.go | 8 +-- 12 files changed, 141 insertions(+), 107 deletions(-) diff --git a/controller/getchangedtargets.go b/controller/getchangedtargets.go index 42cdcbc..690569f 100644 --- a/controller/getchangedtargets.go +++ b/controller/getchangedtargets.go @@ -1086,9 +1086,6 @@ func readTreehash(ctx context.Context, st storage.Storage, buildDescription *pb. } return "", fmt.Errorf("treehash read failed for key %q: %w", key, err) } - if resp == nil || resp.ReadCloser == nil { - return "", fmt.Errorf("treehash read returned nil body for key %q", key) - } defer resp.ReadCloser.Close() b, err := io.ReadAll(resp.ReadCloser) if err != nil { diff --git a/controller/getchangedtargets_test.go b/controller/getchangedtargets_test.go index 3bdab2b..1a4b35c 100644 --- a/controller/getchangedtargets_test.go +++ b/controller/getchangedtargets_test.go @@ -187,11 +187,11 @@ func TestGetChangedTargets_CacheHit(t *testing.T) { // First two Gets resolve the treehashes, third gets the cached comparison result. gomock.InOrder( storagemock.EXPECT().Get(gomock.Any(), gomock.Any()). - Return(&storage.DownloadResponse{ReadCloser: io.NopCloser(bytes.NewReader([]byte("treehash1")))}, nil), + Return(storage.DownloadResponse{ReadCloser: io.NopCloser(bytes.NewReader([]byte("treehash1")))}, nil), storagemock.EXPECT().Get(gomock.Any(), gomock.Any()). - Return(&storage.DownloadResponse{ReadCloser: io.NopCloser(bytes.NewReader([]byte("treehash2")))}, nil), + Return(storage.DownloadResponse{ReadCloser: io.NopCloser(bytes.NewReader([]byte("treehash2")))}, nil), storagemock.EXPECT().Get(gomock.Any(), gomock.Any()). - Return(&storage.DownloadResponse{ReadCloser: io.NopCloser(bytes.NewReader(cachedBytes))}, nil), + Return(storage.DownloadResponse{ReadCloser: io.NopCloser(bytes.NewReader(cachedBytes))}, nil), ) stream.EXPECT().Send(gomock.Any()).Return(nil).Times(2) @@ -224,7 +224,7 @@ func TestGetChangedTargets_TreehashReadError(t *testing.T) { // read or graph fetch happens, so this is the only Get expected. injected := errors.New("storage exploded") storagemock.EXPECT().Get(gomock.Any(), gomock.Any()). - Return(nil, injected).Times(1) + Return(storage.DownloadResponse{}, injected).Times(1) c := NewController(context.Background(), Params{ Logger: zap.NewNop(), @@ -254,7 +254,7 @@ func TestReadTreehash(t *testing.T) { ctrl := gomock.NewController(t) st := storagemock.NewMockStorage(ctrl) st.EXPECT().Get(gomock.Any(), gomock.Any()). - Return(nil, &storage.NotFoundError{Path: "missing"}) + Return(storage.DownloadResponse{}, &storage.NotFoundError{Path: "missing"}) val, err := readTreehash(context.Background(), st, bd) require.NoError(t, err) @@ -266,7 +266,7 @@ func TestReadTreehash(t *testing.T) { st := storagemock.NewMockStorage(ctrl) injected := errors.New("infra down") st.EXPECT().Get(gomock.Any(), gomock.Any()). - Return(nil, injected) + Return(storage.DownloadResponse{}, injected) val, err := readTreehash(context.Background(), st, bd) require.Error(t, err) @@ -274,22 +274,11 @@ func TestReadTreehash(t *testing.T) { assert.Empty(t, val) }) - t.Run("nil response surfaces as error", func(t *testing.T) { - ctrl := gomock.NewController(t) - st := storagemock.NewMockStorage(ctrl) - st.EXPECT().Get(gomock.Any(), gomock.Any()). - Return(nil, nil) - - val, err := readTreehash(context.Background(), st, bd) - require.Error(t, err) - assert.Empty(t, val) - }) - t.Run("success returns treehash", func(t *testing.T) { ctrl := gomock.NewController(t) st := storagemock.NewMockStorage(ctrl) st.EXPECT().Get(gomock.Any(), gomock.Any()). - Return(&storage.DownloadResponse{ReadCloser: io.NopCloser(strings.NewReader("deadbeef"))}, nil) + Return(storage.DownloadResponse{ReadCloser: io.NopCloser(strings.NewReader("deadbeef"))}, nil) val, err := readTreehash(context.Background(), st, bd) require.NoError(t, err) @@ -309,14 +298,14 @@ func TestGetChangedTargets_StreamSendError(t *testing.T) { gogio.NewDelimitedWriter(&buf).WriteMsg(&pb.GetTargetGraphResponse{ Item: &pb.GetTargetGraphResponse_Targets{Targets: &pb.OptimizedTargets{}}, }) - storagemock.EXPECT().Get(gomock.Any(), gomock.Any()).DoAndReturn(func(_ context.Context, req storage.DownloadRequest) (*storage.DownloadResponse, error) { + storagemock.EXPECT().Get(gomock.Any(), gomock.Any()).DoAndReturn(func(_ context.Context, req storage.DownloadRequest) (storage.DownloadResponse, error) { if strings.Contains(req.Key, "compared-targets") { - return nil, &storage.NotFoundError{Path: req.Key} + return storage.DownloadResponse{}, &storage.NotFoundError{Path: req.Key} } if strings.Contains(req.Key, "th") { - return &storage.DownloadResponse{ReadCloser: io.NopCloser(bytes.NewReader(buf.Bytes()))}, nil + return storage.DownloadResponse{ReadCloser: io.NopCloser(bytes.NewReader(buf.Bytes()))}, nil } - return &storage.DownloadResponse{ReadCloser: io.NopCloser(bytes.NewReader([]byte("th")))}, nil + return storage.DownloadResponse{ReadCloser: io.NopCloser(bytes.NewReader([]byte("th")))}, nil }).AnyTimes() // Put is launched in a goroutine — use a channel to wait for it before the test ends. @@ -409,20 +398,20 @@ func TestGetChangedTargets_streamChunks(t *testing.T) { // Each revision needs: treehash lookup + graph lookup. Plus one initial cache miss. storagemock.EXPECT().Get(gomock.Any(), gomock.Any()).DoAndReturn( - func(_ context.Context, req storage.DownloadRequest) (*storage.DownloadResponse, error) { + func(_ context.Context, req storage.DownloadRequest) (storage.DownloadResponse, error) { switch { case strings.Contains(req.Key, "compared-targets"): - return nil, &storage.NotFoundError{Path: req.Key} + return storage.DownloadResponse{}, &storage.NotFoundError{Path: req.Key} case strings.Contains(req.Key, "sha1"): - return &storage.DownloadResponse{ReadCloser: io.NopCloser(bytes.NewReader([]byte("treehash1")))}, nil + return storage.DownloadResponse{ReadCloser: io.NopCloser(bytes.NewReader([]byte("treehash1")))}, nil case strings.Contains(req.Key, "sha2"): - return &storage.DownloadResponse{ReadCloser: io.NopCloser(bytes.NewReader([]byte("treehash2")))}, nil + return storage.DownloadResponse{ReadCloser: io.NopCloser(bytes.NewReader([]byte("treehash2")))}, nil case strings.Contains(req.Key, "treehash1"): - return &storage.DownloadResponse{ReadCloser: io.NopCloser(bytes.NewReader(graph1Bytes))}, nil + return storage.DownloadResponse{ReadCloser: io.NopCloser(bytes.NewReader(graph1Bytes))}, nil case strings.Contains(req.Key, "treehash2"): - return &storage.DownloadResponse{ReadCloser: io.NopCloser(bytes.NewReader(graph2Bytes))}, nil + return storage.DownloadResponse{ReadCloser: io.NopCloser(bytes.NewReader(graph2Bytes))}, nil default: - return nil, fmt.Errorf("unexpected key: %s", req.Key) + return storage.DownloadResponse{}, fmt.Errorf("unexpected key: %s", req.Key) } // readTreehash (×2 pre) + comparison cache miss (×1) + graph computation (×4) + readTreehash (×2 post) = 9 }).Times(9) @@ -506,18 +495,18 @@ func TestGetChangedTargets_CacheWriteUsesAppCtx(t *testing.T) { graphBytes := graphBuf.Bytes() storagemock.EXPECT().Get(gomock.Any(), gomock.Any()).DoAndReturn( - func(_ context.Context, req storage.DownloadRequest) (*storage.DownloadResponse, error) { + func(_ context.Context, req storage.DownloadRequest) (storage.DownloadResponse, error) { switch { case strings.Contains(req.Key, "compared-targets"): - return nil, &storage.NotFoundError{Path: req.Key} + return storage.DownloadResponse{}, &storage.NotFoundError{Path: req.Key} case strings.Contains(req.Key, "sha1"): - return &storage.DownloadResponse{ReadCloser: io.NopCloser(bytes.NewReader([]byte("treehash1")))}, nil + return storage.DownloadResponse{ReadCloser: io.NopCloser(bytes.NewReader([]byte("treehash1")))}, nil case strings.Contains(req.Key, "sha2"): - return &storage.DownloadResponse{ReadCloser: io.NopCloser(bytes.NewReader([]byte("treehash2")))}, nil + return storage.DownloadResponse{ReadCloser: io.NopCloser(bytes.NewReader([]byte("treehash2")))}, nil case strings.Contains(req.Key, "treehash"): - return &storage.DownloadResponse{ReadCloser: io.NopCloser(bytes.NewReader(graphBytes))}, nil + return storage.DownloadResponse{ReadCloser: io.NopCloser(bytes.NewReader(graphBytes))}, nil default: - return nil, fmt.Errorf("unexpected key: %s", req.Key) + return storage.DownloadResponse{}, fmt.Errorf("unexpected key: %s", req.Key) } }).AnyTimes() @@ -1156,11 +1145,11 @@ func TestGetChangedTargets_CacheHitWithDistanceFilter(t *testing.T) { storagemock := storagemock.NewMockStorage(ctrl) gomock.InOrder( storagemock.EXPECT().Get(gomock.Any(), gomock.Any()). - Return(&storage.DownloadResponse{ReadCloser: io.NopCloser(bytes.NewReader([]byte("treehash1")))}, nil), + Return(storage.DownloadResponse{ReadCloser: io.NopCloser(bytes.NewReader([]byte("treehash1")))}, nil), storagemock.EXPECT().Get(gomock.Any(), gomock.Any()). - Return(&storage.DownloadResponse{ReadCloser: io.NopCloser(bytes.NewReader([]byte("treehash2")))}, nil), + Return(storage.DownloadResponse{ReadCloser: io.NopCloser(bytes.NewReader([]byte("treehash2")))}, nil), storagemock.EXPECT().Get(gomock.Any(), gomock.Any()). - Return(&storage.DownloadResponse{ReadCloser: io.NopCloser(bytes.NewReader(cachedBytes))}, nil), + Return(storage.DownloadResponse{ReadCloser: io.NopCloser(bytes.NewReader(cachedBytes))}, nil), ) var sent []*pb.GetChangedTargetsResponse diff --git a/controller/gettargetgraph.go b/controller/gettargetgraph.go index ff853c8..50045f0 100644 --- a/controller/gettargetgraph.go +++ b/controller/gettargetgraph.go @@ -110,10 +110,6 @@ func (c *controller) getGraph(ctx context.Context, buildDescription *pb.BuildDes logger.Error("getGraph: Storage error", zap.Error(err)) return nil, err } - } else if treehashResponse == nil || treehashResponse.ReadCloser == nil { - // This shouldn't happen with valid Storage implementation, but handle gracefully - logger.Info("getGraph: Empty response from Storage") - return nil, nil // Return nil to indicate no send should happen } else { defer treehashResponse.ReadCloser.Close() treehashBytes, err := io.ReadAll(treehashResponse.ReadCloser) diff --git a/controller/gettargetgraph_test.go b/controller/gettargetgraph_test.go index c614f87..fe2d535 100644 --- a/controller/gettargetgraph_test.go +++ b/controller/gettargetgraph_test.go @@ -41,9 +41,9 @@ func TestGetTargetGraph_CacheMiss_NoSend(t *testing.T) { // Return a valid treehash, then an empty graph blob (no messages). gomock.InOrder( store.EXPECT().Get(gomock.Any(), gomock.Any()). - Return(&storage.DownloadResponse{ReadCloser: newMockReadCloser([]byte("treehash-empty"))}, nil), + Return(storage.DownloadResponse{ReadCloser: newMockReadCloser([]byte("treehash-empty"))}, nil), store.EXPECT().Get(gomock.Any(), gomock.Any()). - Return(&storage.DownloadResponse{ReadCloser: newMockReadCloser(nil)}, nil), + Return(storage.DownloadResponse{ReadCloser: newMockReadCloser([]byte{})}, nil), ) c := NewController(context.Background(), Params{ Logger: zaptest.NewLogger(t), @@ -69,7 +69,7 @@ func TestGetTargetGraph_StorageError_Propagates(t *testing.T) { stream := tangomock.NewMockTangoServiceGetTargetGraphYARPCServer(ctrl) stream.EXPECT().Context().Return(context.Background()) storagemock := storagemock.NewMockStorage(ctrl) - storagemock.EXPECT().Get(gomock.Any(), gomock.Any()).Return(nil, expected) + storagemock.EXPECT().Get(gomock.Any(), gomock.Any()).Return(storage.DownloadResponse{}, expected) c := NewController(context.Background(), Params{ Logger: zaptest.NewLogger(t), Storage: storagemock, @@ -93,8 +93,8 @@ func TestGetTargetGraph_DecodeError_ReturnsError(t *testing.T) { stream.EXPECT().Context().Return(context.Background()) storagemock := storagemock.NewMockStorage(ctrl) gomock.InOrder( - storagemock.EXPECT().Get(gomock.Any(), gomock.Any()).Return(&storage.DownloadResponse{ReadCloser: newMockReadCloser([]byte("treehash-abc"))}, nil), - storagemock.EXPECT().Get(gomock.Any(), gomock.Any()).Return(&storage.DownloadResponse{ReadCloser: newMockReadCloser([]byte("bad-bytes"))}, nil), + storagemock.EXPECT().Get(gomock.Any(), gomock.Any()).Return(storage.DownloadResponse{ReadCloser: newMockReadCloser([]byte("treehash-abc"))}, nil), + storagemock.EXPECT().Get(gomock.Any(), gomock.Any()).Return(storage.DownloadResponse{ReadCloser: newMockReadCloser([]byte("bad-bytes"))}, nil), ) c := NewController(context.Background(), Params{ Logger: zaptest.NewLogger(t), @@ -126,8 +126,8 @@ func TestGetTargetGraph_SendsWhenItemPresent(t *testing.T) { require.NoError(t, err) gomock.InOrder( - store.EXPECT().Get(gomock.Any(), gomock.Any()).Return(&storage.DownloadResponse{ReadCloser: newMockReadCloser([]byte("treehash-xyz"))}, nil), - store.EXPECT().Get(gomock.Any(), gomock.Any()).Return(&storage.DownloadResponse{ReadCloser: newMockReadCloser(buf.Bytes())}, nil), + store.EXPECT().Get(gomock.Any(), gomock.Any()).Return(storage.DownloadResponse{ReadCloser: newMockReadCloser([]byte("treehash-xyz"))}, nil), + store.EXPECT().Get(gomock.Any(), gomock.Any()).Return(storage.DownloadResponse{ReadCloser: newMockReadCloser(buf.Bytes())}, nil), ) c := NewController(context.Background(), Params{ Logger: zaptest.NewLogger(t), @@ -188,7 +188,7 @@ func TestGetTargetGraph_TreehashNotFound_NoError(t *testing.T) { stream.EXPECT().Send(gomock.Any()).Return(nil) store := storagemock.NewMockStorage(ctrl) - store.EXPECT().Get(gomock.Any(), gomock.Any()).Return(nil, &storage.NotFoundError{Path: "x"}) + store.EXPECT().Get(gomock.Any(), gomock.Any()).Return(storage.DownloadResponse{}, &storage.NotFoundError{Path: "x"}) orchestrator := orchestratormock.NewMockOrchestrator(ctrl) // Provide a fake GraphReader that yields one message then EOF graphReader := storagemock.NewMockGraphReader(ctrl) @@ -218,7 +218,7 @@ func TestGetTargetGraph_TreehashReadError(t *testing.T) { stream := tangomock.NewMockTangoServiceGetTargetGraphYARPCServer(ctrl) stream.EXPECT().Context().Return(context.Background()) store := storagemock.NewMockStorage(ctrl) - store.EXPECT().Get(gomock.Any(), gomock.Any()).Return(&storage.DownloadResponse{ReadCloser: &errReadCloser{err: errors.New("readfail")}}, nil) + store.EXPECT().Get(gomock.Any(), gomock.Any()).Return(storage.DownloadResponse{ReadCloser: &errReadCloser{err: errors.New("readfail")}}, nil) c := NewController(context.Background(), Params{ Logger: zaptest.NewLogger(t), Storage: store, @@ -236,8 +236,8 @@ func TestGetTargetGraph_GraphFetchError(t *testing.T) { stream.EXPECT().Context().Return(context.Background()) store := storagemock.NewMockStorage(ctrl) gomock.InOrder( - store.EXPECT().Get(gomock.Any(), gomock.Any()).Return(&storage.DownloadResponse{ReadCloser: newMockReadCloser([]byte("treehash-abc"))}, nil), - store.EXPECT().Get(gomock.Any(), gomock.Any()).Return(nil, errors.New("graph error")), + store.EXPECT().Get(gomock.Any(), gomock.Any()).Return(storage.DownloadResponse{ReadCloser: newMockReadCloser([]byte("treehash-abc"))}, nil), + store.EXPECT().Get(gomock.Any(), gomock.Any()).Return(storage.DownloadResponse{}, errors.New("graph error")), ) c := NewController(context.Background(), Params{ Logger: zaptest.NewLogger(t), @@ -256,8 +256,8 @@ func TestGetTargetGraph_GraphReadError(t *testing.T) { stream.EXPECT().Context().Return(context.Background()) store := storagemock.NewMockStorage(ctrl) gomock.InOrder( - store.EXPECT().Get(gomock.Any(), gomock.Any()).Return(&storage.DownloadResponse{ReadCloser: newMockReadCloser([]byte("treehash-abc"))}, nil), - store.EXPECT().Get(gomock.Any(), gomock.Any()).Return(&storage.DownloadResponse{ReadCloser: &errReadCloser{err: errors.New("readfail")}}, nil), + store.EXPECT().Get(gomock.Any(), gomock.Any()).Return(storage.DownloadResponse{ReadCloser: newMockReadCloser([]byte("treehash-abc"))}, nil), + store.EXPECT().Get(gomock.Any(), gomock.Any()).Return(storage.DownloadResponse{ReadCloser: &errReadCloser{err: errors.New("readfail")}}, nil), ) c := NewController(context.Background(), Params{ Logger: zaptest.NewLogger(t), @@ -284,8 +284,8 @@ func TestGetTargetGraph_StreamSendError(t *testing.T) { stream.EXPECT().Send(gomock.Any()).Return(errors.New("send fail")) gomock.InOrder( - storagemock.EXPECT().Get(gomock.Any(), gomock.Any()).Return(&storage.DownloadResponse{ReadCloser: newMockReadCloser([]byte("treehash-abc"))}, nil), - storagemock.EXPECT().Get(gomock.Any(), gomock.Any()).Return(&storage.DownloadResponse{ReadCloser: newMockReadCloser(buf.Bytes())}, nil), + storagemock.EXPECT().Get(gomock.Any(), gomock.Any()).Return(storage.DownloadResponse{ReadCloser: newMockReadCloser([]byte("treehash-abc"))}, nil), + storagemock.EXPECT().Get(gomock.Any(), gomock.Any()).Return(storage.DownloadResponse{ReadCloser: newMockReadCloser(buf.Bytes())}, nil), ) c := NewController(context.Background(), Params{ Logger: zaptest.NewLogger(t), diff --git a/core/storage/changedtargetsreader.go b/core/storage/changedtargetsreader.go index 69df8b6..1f3806b 100644 --- a/core/storage/changedtargetsreader.go +++ b/core/storage/changedtargetsreader.go @@ -58,9 +58,6 @@ func NewChangedTargetsReader(ctx context.Context, st Storage, key string) (Chang if err != nil { return nil, err } - if resp == nil || resp.ReadCloser == nil { - return nil, nil - } return &changedTargetsReaderCloser{ reader: gogio.NewDelimitedReader(resp.ReadCloser, 32<<20), }, nil diff --git a/core/storage/disk/disk.go b/core/storage/disk/disk.go index aee8622..734a82f 100644 --- a/core/storage/disk/disk.go +++ b/core/storage/disk/disk.go @@ -21,6 +21,7 @@ import ( "io" "os" "path/filepath" + "strings" "github.com/uber/tango/core/storage" ) @@ -42,19 +43,19 @@ func New(rootDir string) (*diskStorage, error) { } // Get retrieves a blob by key. Returns storage.NotFoundError if not found. -func (d *diskStorage) Get(ctx context.Context, req storage.DownloadRequest) (*storage.DownloadResponse, error) { +func (d *diskStorage) Get(ctx context.Context, req storage.DownloadRequest) (storage.DownloadResponse, error) { if ctx.Err() != nil { - return nil, ctx.Err() + return storage.DownloadResponse{}, ctx.Err() } path := filepath.Join(d.rootDir, req.Key) file, err := os.Open(path) if err != nil { if os.IsNotExist(err) { - return nil, &storage.NotFoundError{Path: req.Key} + return storage.DownloadResponse{}, &storage.NotFoundError{Path: req.Key} } - return nil, err + return storage.DownloadResponse{}, err } - return &storage.DownloadResponse{ReadCloser: file}, nil + return storage.DownloadResponse{ReadCloser: file}, nil } // Put stores a blob with the given key. @@ -104,14 +105,22 @@ func (d *diskStorage) Exists(ctx context.Context, key string) (bool, error) { return false, err } -// List returns the relative paths of all regular files under the given directory prefix. -func (d *diskStorage) List(ctx context.Context, dir string) ([]string, error) { +// List returns all keys whose name starts with the given prefix. +// +// To honor the literal-prefix contract without walking the entire rootDir, this +// walks the longest path-prefix of `prefix` ending in "/" (or rootDir if none) +// and filters entries by strings.HasPrefix on the full key. +func (d *diskStorage) List(ctx context.Context, prefix string) ([]string, error) { if ctx.Err() != nil { return nil, ctx.Err() } - root := filepath.Join(d.rootDir, dir) + walkSubdir := "" + if idx := strings.LastIndex(prefix, "/"); idx >= 0 { + walkSubdir = prefix[:idx+1] + } + walkRoot := filepath.Join(d.rootDir, walkSubdir) var keys []string - err := filepath.Walk(root, func(path string, info os.FileInfo, err error) error { + err := filepath.Walk(walkRoot, func(path string, info os.FileInfo, err error) error { if err != nil { if os.IsNotExist(err) { return nil @@ -125,6 +134,9 @@ func (d *diskStorage) List(ctx context.Context, dir string) ([]string, error) { if err != nil { return err } + if !strings.HasPrefix(rel, prefix) { + return nil + } keys = append(keys, rel) return nil }) diff --git a/core/storage/disk/disk_test.go b/core/storage/disk/disk_test.go index 2f4e677..ecae9a7 100644 --- a/core/storage/disk/disk_test.go +++ b/core/storage/disk/disk_test.go @@ -169,6 +169,38 @@ func TestStorage_List(t *testing.T) { _, err := s.List(cancelledCtx, "itg") assert.Error(t, err) }) + + t.Run("partial-segment prefix matches sibling keys (literal prefix)", func(t *testing.T) { + // Both "itg/repoA..." and "itg/repoB..." start with "itg/repo" — the + // literal-prefix contract returns both, even though they are different + // "directories" in a filesystem sense. + keys, err := s.List(ctx, "itg/repo") + require.NoError(t, err) + assert.ElementsMatch(t, []string{ + "itg/repoA/2024-01-01/100_abc", + "itg/repoA/2024-01-02/200_def", + "itg/repoB/2024-01-01/300_ghi", + }, keys) + }) + + t.Run("trailing slash delimits segment", func(t *testing.T) { + // Same data, but the trailing "/" enforces a segment boundary so only + // repoA's keys match. + keys, err := s.List(ctx, "itg/repoA/") + require.NoError(t, err) + assert.ElementsMatch(t, []string{ + "itg/repoA/2024-01-01/100_abc", + "itg/repoA/2024-01-02/200_def", + }, keys) + }) + + t.Run("top-level partial prefix without slash", func(t *testing.T) { + // "g" matches "graph/treehash123" only — proves the walk doesn't require + // the prefix to name a real directory. + keys, err := s.List(ctx, "g") + require.NoError(t, err) + assert.ElementsMatch(t, []string{"graph/treehash123"}, keys) + }) } func TestStorage_Get_NotFound(t *testing.T) { @@ -178,7 +210,7 @@ func TestStorage_Get_NotFound(t *testing.T) { require.NoError(t, err) resp, err := s.Get(ctx, storage.DownloadRequest{Key: "nonexistent.txt"}) - assert.Nil(t, resp) + assert.Nil(t, resp.ReadCloser) assert.Error(t, err) assert.True(t, storage.IsNotFound(err)) } diff --git a/core/storage/graphreader.go b/core/storage/graphreader.go index 9be4bc5..544960a 100644 --- a/core/storage/graphreader.go +++ b/core/storage/graphreader.go @@ -63,9 +63,6 @@ func NewGraphReader(ctx context.Context, st Storage, key string) (GraphReader, e if err != nil { return nil, err } - if resp == nil || resp.ReadCloser == nil { - return nil, nil - } return &graphReaderCloser{ reader: gogio.NewDelimitedReader(resp.ReadCloser, 512<<20), // 512MB/message limit }, nil diff --git a/core/storage/memstorage.go b/core/storage/memstorage.go index 2fc1ce3..0868dae 100644 --- a/core/storage/memstorage.go +++ b/core/storage/memstorage.go @@ -36,14 +36,14 @@ func NewMemoryStorage() Storage { } // Get downloads a blob from the storage. Return NotFoundError when the blob is not found. -func (m *memoryStorage) Get(ctx context.Context, req DownloadRequest) (*DownloadResponse, error) { +func (m *memoryStorage) Get(ctx context.Context, req DownloadRequest) (DownloadResponse, error) { m.mu.RLock() defer m.mu.RUnlock() b, ok := m.data[req.Key] if !ok { - return nil, &NotFoundError{Path: req.Key} + return DownloadResponse{}, &NotFoundError{Path: req.Key} } - return &DownloadResponse{ReadCloser: io.NopCloser(bytes.NewReader(b))}, nil + return DownloadResponse{ReadCloser: io.NopCloser(bytes.NewReader(b))}, nil } func (m *memoryStorage) Put(ctx context.Context, req UploadRequest) error { @@ -67,12 +67,12 @@ func (m *memoryStorage) Exists(ctx context.Context, key string) (bool, error) { return ok, nil } -func (m *memoryStorage) List(ctx context.Context, dir string) ([]string, error) { +func (m *memoryStorage) List(ctx context.Context, prefix string) ([]string, error) { m.mu.RLock() defer m.mu.RUnlock() var keys []string for k := range m.data { - if strings.HasPrefix(k, dir) { + if strings.HasPrefix(k, prefix) { keys = append(keys, k) } } diff --git a/core/storage/storage.go b/core/storage/storage.go index 33ab304..085fb0e 100644 --- a/core/storage/storage.go +++ b/core/storage/storage.go @@ -51,13 +51,27 @@ type UploadRequest struct { } // Storage is an abstract interface for remote data storage. +// +// Keys are opaque strings; the interface has no concept of paths, directories, +// or segments. Any structure (e.g. "/"-delimited paths) is a convention of the +// caller, and implementations MUST NOT impose path semantics of their own. type Storage interface { - // Get downloads a blob from the storage. Return NotFoundError when the blob is not found. - Get(ctx context.Context, req DownloadRequest) (*DownloadResponse, error) + // Get downloads a blob from the storage. On success the returned DownloadResponse.ReadCloser + // is non-nil and the caller owns closing it. Returns NotFoundError when the blob is not found. + Get(ctx context.Context, req DownloadRequest) (DownloadResponse, error) // Put uploads a blob to the storage Put(ctx context.Context, req UploadRequest) error // Exists checks whether a blob exists in the storage. Exists(ctx context.Context, key string) (bool, error) - // List returns the keys of all blobs under the given directory prefix. - List(ctx context.Context, dir string) ([]string, error) + // List returns all keys whose name starts with the given prefix, semantically + // equivalent to filtering the full key namespace by strings.HasPrefix(key, prefix). + // + // Implementations MUST treat prefix as a literal string prefix and MUST NOT + // interpret it as a directory path. Callers control segment boundaries by + // including a trailing "/" in their prefix: List(ctx, "foo") matches both + // "foo/bar" and "foo-bar", while List(ctx, "foo/") matches only the former. + // + // An empty prefix lists every key. The returned slice is unordered and may be + // nil when no key matches. + List(ctx context.Context, prefix string) ([]string, error) } diff --git a/core/storage/storagemock/storagemock.go b/core/storage/storagemock/storagemock.go index 6ae56a9..b8a1caa 100644 --- a/core/storage/storagemock/storagemock.go +++ b/core/storage/storagemock/storagemock.go @@ -3,7 +3,7 @@ // // Generated by this command: // -// mockgen -destination=storagemock/storagemock.go . Storage +// mockgen -destination=storagemock/storagemock.go -package=storagemock . Storage // // Package storagemock is a generated GoMock package. @@ -57,10 +57,10 @@ func (mr *MockStorageMockRecorder) Exists(ctx, key any) *gomock.Call { } // Get mocks base method. -func (m *MockStorage) Get(ctx context.Context, req storage.DownloadRequest) (*storage.DownloadResponse, error) { +func (m *MockStorage) Get(ctx context.Context, req storage.DownloadRequest) (storage.DownloadResponse, error) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "Get", ctx, req) - ret0, _ := ret[0].(*storage.DownloadResponse) + ret0, _ := ret[0].(storage.DownloadResponse) ret1, _ := ret[1].(error) return ret0, ret1 } @@ -71,6 +71,21 @@ func (mr *MockStorageMockRecorder) Get(ctx, req any) *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Get", reflect.TypeOf((*MockStorage)(nil).Get), ctx, req) } +// List mocks base method. +func (m *MockStorage) List(ctx context.Context, prefix string) ([]string, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "List", ctx, prefix) + ret0, _ := ret[0].([]string) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// List indicates an expected call of List. +func (mr *MockStorageMockRecorder) List(ctx, prefix any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "List", reflect.TypeOf((*MockStorage)(nil).List), ctx, prefix) +} + // Put mocks base method. func (m *MockStorage) Put(ctx context.Context, req storage.UploadRequest) error { m.ctrl.T.Helper() @@ -84,18 +99,3 @@ func (mr *MockStorageMockRecorder) Put(ctx, req any) *gomock.Call { mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Put", reflect.TypeOf((*MockStorage)(nil).Put), ctx, req) } - -// List mocks base method. -func (m *MockStorage) List(ctx context.Context, dir string) ([]string, error) { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "List", ctx, dir) - ret0, _ := ret[0].([]string) - ret1, _ := ret[1].(error) - return ret0, ret1 -} - -// List indicates an expected call of List. -func (mr *MockStorageMockRecorder) List(ctx, dir any) *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "List", reflect.TypeOf((*MockStorage)(nil).List), ctx, dir) -} diff --git a/orchestrator/native_orchestrator_test.go b/orchestrator/native_orchestrator_test.go index 257b6e8..9739ad4 100644 --- a/orchestrator/native_orchestrator_test.go +++ b/orchestrator/native_orchestrator_test.go @@ -48,7 +48,7 @@ func TestNative_GetTargetGraph_Success(t *testing.T) { }) require.NoError(t, err) // Single fetch by remote/treehash for the graph - st.EXPECT().Get(gomock.Any(), gomock.Any()).Return(&storage.DownloadResponse{ + st.EXPECT().Get(gomock.Any(), gomock.Any()).Return(storage.DownloadResponse{ ReadCloser: io.NopCloser(bytes.NewReader(buf.Bytes())), }, nil) @@ -92,7 +92,7 @@ func TestNative_GetTargetGraph_TreehashNotFound_NoError(t *testing.T) { defer ctrl.Finish() st := storagemock.NewMockStorage(ctrl) // First attempt returns NotFound to trigger compute path. - st.EXPECT().Get(gomock.Any(), gomock.Any()).Return(nil, &storage.NotFoundError{Path: "missing"}) + st.EXPECT().Get(gomock.Any(), gomock.Any()).Return(storage.DownloadResponse{}, &storage.NotFoundError{Path: "missing"}) // Expect writes (graph list and treehash cache mapping) st.EXPECT().Put(gomock.Any(), gomock.Any()).Return(nil).MinTimes(2) // After compute, second read returns a valid delimited stream with one message @@ -100,7 +100,7 @@ func TestNative_GetTargetGraph_TreehashNotFound_NoError(t *testing.T) { _ = gogio.NewDelimitedWriter(&buf).WriteMsg(&pb.GetTargetGraphResponse{ Item: &pb.GetTargetGraphResponse_Targets{Targets: &pb.OptimizedTargets{}}, }) - st.EXPECT().Get(gomock.Any(), gomock.Any()).Return(&storage.DownloadResponse{ + st.EXPECT().Get(gomock.Any(), gomock.Any()).Return(storage.DownloadResponse{ ReadCloser: io.NopCloser(bytes.NewReader(buf.Bytes())), }, nil) g := gitmock.NewMockInterface(ctrl) @@ -180,7 +180,7 @@ func TestNative_GetTargetGraph_AppliesGitHubPR(t *testing.T) { // Compute treehash g.EXPECT().RevParse(gomock.Any(), "HEAD^{tree}").Return("treehash", nil) // Single storage fetch for graph by remote/treehash - st.EXPECT().Get(gomock.Any(), gomock.Any()).Return(&storage.DownloadResponse{ + st.EXPECT().Get(gomock.Any(), gomock.Any()).Return(storage.DownloadResponse{ ReadCloser: io.NopCloser(bytes.NewReader(buf.Bytes())), }, nil) ws := workspacemock.NewMockWorkspace(ctrl)