From f9ba4a943f62cc74c29b43fa1d97c78ed6211eb7 Mon Sep 17 00:00:00 2001 From: Dan Allen Date: Thu, 11 Jul 2024 02:21:23 -0600 Subject: [PATCH] resolves #15 add option on extension to control whether temporary/managed worktrees are kept and for how long --- CHANGELOG.adoc | 4 + .../ROOT/pages/configuration-keys.adoc | 7 +- docs/modules/ROOT/pages/register.adoc | 21 +++++- packages/collector-extension/lib/index.js | 56 ++++++++++++-- .../test/collector-extension-test.js | 73 +++++++++++++++++-- 5 files changed, 146 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.adoc b/CHANGELOG.adoc index 84c85e5..c7a5298 100644 --- a/CHANGELOG.adoc +++ b/CHANGELOG.adoc @@ -6,6 +6,10 @@ For a detailed view of what's changed, refer to the {url-repo}/commits[commit hi == Unreleased +=== Added + +* add `keep_worktrees` option on extension to control whether temporary worktrees are kept and for how long (#15) + === Changed * upgrade to glob-stream 8.0.x to remove 16 file limit when scanning for files in single directory and restore multiple glob patterns (#17) diff --git a/docs/modules/ROOT/pages/configuration-keys.adoc b/docs/modules/ROOT/pages/configuration-keys.adoc index 33b47e3..8fce94a 100644 --- a/docs/modules/ROOT/pages/configuration-keys.adoc +++ b/docs/modules/ROOT/pages/configuration-keys.adoc @@ -82,7 +82,12 @@ If there's only a single entry, the array can be replaced by a map for a single If the directory to clean is the same as the directory to scan, the clean entry can be created implicitly by setting the `clean` key on the scan entry to `true` (e.g., `clean: true`). See <>. -Note that when running collector on a content source without a worktree (such as a remote repository), the worktree will always start out in a clean state. +Note that when running collector on a content source without a worktree (such as a remote repository), it will create a temporary one. +By default, the worktree will be removed once Collector is finished using it. +In this scenario, the worktree will always start out in a clean slate. +If the `keep_worktrees` option on the extension is set to `true`, the worktree from a previous run will be reused and thus not guaranteed to be in a clean state. +If the `keep_worktrees` option is either `true` or a specified event (e.g., `until:contextClosed`), the location of the worktree will be assigned to the `collectorWorktree` option on the origin and thus available to other extensions. + When the content source is local and has a worktree, the worktree may already contain files that are untracked or ignored by git. In this case, the first clean entry can be important to ensuring a predictable result. diff --git a/docs/modules/ROOT/pages/register.adoc b/docs/modules/ROOT/pages/register.adoc index 324d2d8..208e40e 100644 --- a/docs/modules/ROOT/pages/register.adoc +++ b/docs/modules/ROOT/pages/register.adoc @@ -21,9 +21,28 @@ antora: NOTE: The quotes are required around the package name because `@` is a special character in YAML. +By default, any temporary worktrees that Collector creates are removed as soon as Collector finishes running (after the `contentAggregated` event). +If you want the keep the temporary worktrees indefinitely or until the specified event, you must set the `keep_worktrees` option on the extension. + +.antora-playbook.yml +[,yaml] +---- +antora: + extensions: + - register: '@antora/collector-extension' + keep_worktrees: until:contextClosed +# ... +---- + +The accepted values for this key are `true` (keep worktrees indefinitely), `false` (don't keep worktrees), or `until:` (keep worktrees until the specified event, such as `contextClosed`). +If keep worktrees is enabled, the worktrees will be unique by git repository and refname. +Unless the value is `true`, Collector will ensure the worktree from a previous run is removed before creating it. + +This key only affects temporary worktrees, not worktrees used as a content source. + [#cli] == Register from the CLI Alternately, you can register the PDF extension at your discretion using the `--extension` CLI option of the `antora` command: - $ antora --extension @antora/collector-extension antora-playbook.yml \ No newline at end of file + $ antora --extension @antora/collector-extension antora-playbook.yml diff --git a/packages/collector-extension/lib/index.js b/packages/collector-extension/lib/index.js index 95ec169..48877ae 100644 --- a/packages/collector-extension/lib/index.js +++ b/packages/collector-extension/lib/index.js @@ -19,21 +19,31 @@ const yaml = require('js-yaml') const GLOB_OPTS = { ignore: ['.git'], objectMode: true, onlyFiles: false, unique: false } const PACKAGE_NAME = require('../package.json').name -module.exports.register = function () { +module.exports.register = function ({ config: { keepWorktrees = false } }) { this.once('contentAggregated', async ({ playbook, contentAggregate }) => { let logger const quiet = playbook.runtime?.quiet const cacheDir = ospath.join(getBaseCacheDir(playbook), 'collector') - //await fsp.rm(cacheDir, { force: true, recursive: true, force: true }) // Q: should we try to reuse existing cache? await fsp.mkdir(cacheDir, { recursive: true }) const gitCache = {} + const managedWorktrees = new Map() for (const componentVersionBucket of contentAggregate) { const { files: filesInBucket, origins = [] } = componentVersionBucket for (const origin of origins) { const { url, gitdir, refname, reftype, remote, worktree, startPath, descriptor } = origin const collectorConfig = descriptor?.ext?.collector || [] if (Array.isArray(collectorConfig) && !collectorConfig.length) continue - const worktreeDir = worktree || ospath.join(cacheDir, generateWorktreeFolderName({ url, gitdir, worktree })) + let worktreeDir = worktree + if (!worktreeDir) { + worktreeDir = ospath.join(cacheDir, generateWorktreeFolderName(origin, keepWorktrees)) + if (managedWorktrees.has(worktreeDir)) { + managedWorktrees.get(worktreeDir).add(origin) + } else { + managedWorktrees.set(worktreeDir, new Set([origin])) + if (keepWorktrees !== true) await fsp.rm(worktreeDir, { force: true, recursive: true }) + } + origin.collectorWorktree = worktreeDir + } const expandPathContext = { base: worktreeDir, cwd: worktreeDir, dot: ospath.join(worktreeDir, startPath) } const collectors = (Array.isArray(collectorConfig) ? collectorConfig : [collectorConfig]).map((collector) => { const { clean: cleanConfig = [], run: runConfig = {}, scan: scanConfig = [] } = collector @@ -119,8 +129,29 @@ module.exports.register = function () { } } } - await fsp.rm(cacheDir, { recursive: true }) + if (!keepWorktrees) { + for (const [collectorWorktree, origins] of managedWorktrees) { + for (const origin of origins) delete origin.collectorWorktree + await fsp.rm(collectorWorktree, { recursive: true }) + } + } }) + + if (typeof keepWorktrees === 'string' && keepWorktrees.startsWith('until:')) { + const eventName = keepWorktrees.slice(6) + this.once(eventName === 'exit' ? 'contextClosed' : eventName, async ({ contentCatalog }) => { + // TODO get origins from component version once implemented in Antora core + const collectorWorktrees = contentCatalog.getFiles().reduce((accum, { src }) => { + const collectorWorktree = src.origin?.collectorWorktree + if (collectorWorktree) { + accum.add(collectorWorktree) + delete src.origin.collectorWorktree + } + return accum + }, new Set()) + for (const collectorWorktree of collectorWorktrees) await fsp.rm(collectorWorktree, { recursive: true }) + }) + } } /** @@ -167,9 +198,20 @@ async function prepareWorktree (repo) { } } -function generateWorktreeFolderName ({ url, gitdir, worktree }) { - if (worktree === undefined) return ospath.basename(gitdir, '.git') - return `${url.substr(url.lastIndexOf('/') + 1)}-${createHash('sha1').update(url).digest('hex')}` +function generateWorktreeFolderName ({ url, gitdir, refname, worktree }, keepWorktrees) { + const refnameQualifier = keepWorktrees ? '@' + refname.replace(/[/]/g, '-') : undefined + if (worktree === undefined) { + const folderName = ospath.basename(gitdir, '.git') + if (!refnameQualifier) return folderName + const lastHyphenIdx = folderName.lastIndexOf('-') + return `${folderName.slice(0, lastHyphenIdx)}${refnameQualifier}${folderName.slice(lastHyphenIdx)}` + } + let normalizedUrl = (url || gitdir).toLowerCase() + if (posixify) normalizedUrl = posixify(normalizedUrl) + normalizedUrl = normalizedUrl.replace(/(?:[/]?\.git|[/])$/, '') + const slug = ospath.basename(normalizedUrl) + (refnameQualifier || '') + const hash = createHash('sha1').update(normalizedUrl).digest('hex') + return `${slug}-${hash}` } function getBaseCacheDir ({ dir: dot, runtime: { cacheDir: preferredDir } }) { diff --git a/packages/collector-extension/test/collector-extension-test.js b/packages/collector-extension/test/collector-extension-test.js index 8cb51c7..7ec7b3f 100644 --- a/packages/collector-extension/test/collector-extension-test.js +++ b/packages/collector-extension/test/collector-extension-test.js @@ -45,8 +45,8 @@ describe('collector extension', () => { if (origin.worktree === undefined) { folderName = ospath.basename(origin.gitdir, '.git') } else { - const url = origin.url - folderName = `${url.substr(url.lastIndexOf('/') + 1)}-${createHash('sha1').update(url).digest('hex')}` + const url = origin.url.toLowerCase() + folderName = `${url.slice(url.lastIndexOf('/') + 1)}-${createHash('sha1').update(url).digest('hex')}` } return ospath.join(getCollectorCacheDir(), folderName) } @@ -162,9 +162,21 @@ describe('collector extension', () => { const contentAggregate = await aggregateContent(playbook) if (before) isAsync(before) ? await before(contentAggregate, playbook) : before(contentAggregate, playbook) const generatorContext = createGeneratorContext() - ext.register.call(generatorContext) + const registerVars = { config: collectorConfig?.globalConfig || {} } + ext.register.call(generatorContext, registerVars) await generatorContext.contentAggregated({ playbook, contentAggregate }) if (after) isAsync(after) ? await after(contentAggregate) : after(contentAggregate) + if (typeof generatorContext.contextClosed === 'function') { + const contentCatalog = { + getFiles () { + return contentAggregate.reduce((accum, { origins = [] }) => { + origins.forEach((origin) => accum.push({ src: { origin } })) + return accum + }, []) + }, + } + await generatorContext.contextClosed({ contentCatalog }) + } } it('should not allocate worktree for reference in git tree if collector is not configured', async () => { @@ -203,6 +215,55 @@ describe('collector extension', () => { }) }) + it('should keep temporary worktree if specified', async () => { + const collectorConfig = { + globalConfig: { keepWorktrees: true }, + run: { command: 'node .gen-start-page.js' }, + scan: { dir: 'build' }, + } + let collectorWorktree + await runScenario({ + repoName: 'test-at-root', + collectorConfig, + before: (contentAggregate) => { + expect(contentAggregate).to.have.lengthOf(1) + expect(contentAggregate[0].files).to.be.empty() + }, + after: (contentAggregate) => { + expect(contentAggregate[0].files).to.have.lengthOf(1) + collectorWorktree = contentAggregate[0].files[0].src.origin.collectorWorktree + expect(collectorWorktree).to.be.a.directory() + expect(ospath.basename(collectorWorktree)).to.include('@main') + }, + }) + expect(collectorWorktree).to.be.a.directory() + await fsp.rm(collectorWorktree, { recursive: true }) + }) + + it('should keep temporary worktree until specified event', async () => { + const collectorConfig = { + globalConfig: { keepWorktrees: 'until:contextClosed' }, + run: { command: 'node .gen-start-page.js' }, + scan: { dir: 'build' }, + } + let collectorWorktree + await runScenario({ + repoName: 'test-at-root', + collectorConfig, + before: (contentAggregate) => { + expect(contentAggregate).to.have.lengthOf(1) + expect(contentAggregate[0].files).to.be.empty() + }, + after: (contentAggregate) => { + expect(contentAggregate[0].files).to.have.lengthOf(1) + collectorWorktree = contentAggregate[0].files[0].src.origin.collectorWorktree + expect(collectorWorktree).to.be.a.directory() + expect(ospath.basename(collectorWorktree)).to.include('@main') + }, + }) + expect(collectorWorktree).to.not.be.a.path() + }) + it('should populate properties of file collected from temporary worktree', async () => { const collectorConfig = { run: { command: 'node .gen-start-page.js' }, @@ -831,9 +892,9 @@ describe('collector extension', () => { }) }) - it('should remove dedicated cache dir for collector after run', async () => { + it('should empty dedicated cache dir for collector after run', async () => { await runScenario({ repoName: 'test-at-root' }) - expect(getCollectorCacheDir()).to.not.be.a.path() + expect(getCollectorCacheDir()).to.be.a.directory().and.empty() }) it('should reuse worktree without cleaning if no clean step is specified', async () => { @@ -1321,7 +1382,7 @@ describe('collector extension', () => { } const contentAggregate = await aggregateContent(playbook) const generatorContext = createGeneratorContext() - ext.register.call(generatorContext) + ext.register.call(generatorContext, { config: {} }) await generatorContext.contentAggregated({ playbook, contentAggregate }) expect(contentAggregate).to.have.lengthOf(1) expect(contentAggregate[0].files.map((it) => it.path)).to.have.members([ -- GitLab