Skip to content

Commit

Permalink
Create DB while fetching
Browse files Browse the repository at this point in the history
Previously we were missing resources and Prop
  • Loading branch information
tmclaugh committed Nov 4, 2024
1 parent 2e58c62 commit bfc9d5d
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 110 deletions.
78 changes: 0 additions & 78 deletions src/createDb.ts

This file was deleted.

52 changes: 32 additions & 20 deletions src/createWorkspace.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import fs from 'fs-extra'
import path from 'path'

import { open, Database } from 'sqlite'
import sqlite3 from 'sqlite3'

const DOCSET_DIR = 'aws-cloudformation.docset'
const DOCSET_CONTENTS_DIR = 'Contents'
const DOCSET_RESOURCES_DIR = path.join(DOCSET_CONTENTS_DIR, 'Resources')
const DOCSET_DOCS_DIR = path.join(DOCSET_RESOURCES_DIR, 'Documents')
const PLIST_FILE_NAME = 'Info.plist'
const PLIST_FILE_PATH = path.join(DOCSET_CONTENTS_DIR, PLIST_FILE_NAME)
const ICON_FILE_NAME = 'icon.png'
import {
DOCSET_DIR,
DOCSET_DOCS_DIR,
DB_FILE_PATH
} from './paths.js'


export interface WorkspaceDirs {
Expand All @@ -21,32 +21,44 @@ export interface WorkspaceDirs {
}


/**
* Initialize database
*
* Create searchIndex table with columns id, name, type, and path
*
* @param dbFile Database file path
*/
export async function initializeDb(dbFile: string): Promise<Database> {
const db = await open({
filename: dbFile,
driver: sqlite3.Database,
})

await db.run(
'CREATE TABLE searchIndex(id INTEGER PRIMARY KEY, name TEXT, type TEXT, path TEXT);'
)
await db.run('CREATE UNIQUE INDEX anchor ON searchIndex (name, type, path);')

return db
}


/**
* Create workspace for building docset
*
* @param appRoot Root directory for app
* @param buildRoot Root directory for building docset
*/
export async function createWorkspace(appRoot: string, buildRoot: string): Promise<WorkspaceDirs> {
export async function createWorkspace(appRoot: string, buildRoot: string): Promise<void> {

const docsetDir = path.join(buildRoot, DOCSET_DIR)
const docsetContentsDir = path.join(docsetDir, DOCSET_CONTENTS_DIR)
const docsetResourcesDir = path.join(docsetDir, DOCSET_RESOURCES_DIR)
const docsetDocsDir = path.join(docsetDir, DOCSET_DOCS_DIR)
const plistFilePath = path.join(docsetDir, PLIST_FILE_PATH)
const iconFilePath = path.join(docsetDir, ICON_FILE_NAME)

await fs.emptyDir(buildRoot);
await fs.ensureDir(docsetDocsDir) // deepest directory path
await fs.copy(path.join(appRoot, 'static'), docsetDir)

await initializeDb(path.join(docsetDir, DB_FILE_PATH))

return {
docsetDir,
docsetContentsDir,
docsetResourcesDir,
docsetDocsDir,
plistFilePath,
iconFilePath
}
return
}
45 changes: 41 additions & 4 deletions src/fetchDocs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,16 @@ import fs from 'fs-extra'
import got from 'got'
import path from 'path'
import * as cheerio from 'cheerio'
import { open } from 'sqlite'
import sqlite3 from 'sqlite3'

import { addTocAnchors } from './addTocAnchors.js'
import { highlightCode } from './highlightCode.js'
import {
DB_FILE_PATH,
DOCSET_DIR,
DOCSET_DOCS_DIR
} from './paths.js'
import { TocItem } from './types.js'
import { template } from './template.html.js'

Expand All @@ -25,19 +32,46 @@ export async function createPage(pageBody: string): Promise<cheerio.CheerioAPI>
return $
}


/**
* Populate database with documents
*
* @param db Database instance
* @param tocItem TocItem to populate database with
*/
export async function populateDb(dbFile: string, tocItem: TocItem): Promise<void> {
console.info('Populating db with:', tocItem.title)

const db = await open({
filename: dbFile,
driver: sqlite3.Database,
})

let title = tocItem.title
if (tocItem.docType === 'Service') {
title = tocItem.title.replace(/^(AWS|Amazon)/, '').trim()
}
await db.run(
`INSERT OR IGNORE INTO searchIndex(name, type, path) VALUES ('${title}', '${tocItem.docType}', '${tocItem.href}');`
)

await db.close()
}


/**
* Given a tocItem and urlRoot fetch the page from the href property and save it as a file in
* the docRoot directory. Then iterate through the contents property calling itself on the
* TocItems in the array.
*
* @param tocItem TocItem to fetch
* @param urlRoot Root URL for the docs
* @param docsDir Directory to save the docs
* @param buildRoot Root directory for building the docset
*/
export async function fetchDocs(
tocItem: TocItem,
urlRoot: string,
docsDir: string
buildRoot: string
): Promise<void> {
// Fetch the page
console.info('Fetching doc page:', [urlRoot, tocItem.href].join('/'))
Expand All @@ -52,19 +86,22 @@ export async function fetchDocs(
await highlightCode($page)

// Save the page to a file
const filePath = path.join(docsDir, tocItem.href)
const filePath = path.join(buildRoot, DOCSET_DIR, DOCSET_DOCS_DIR, tocItem.href)

if ( !fs.existsSync(filePath) ) {
await fs.promises.writeFile(filePath, $page.html())
} else {
throw new Error(`File already exists: ${filePath}`)
}

// Insert into Db
await populateDb(path.join(buildRoot, DOCSET_DIR, DB_FILE_PATH), tocItem)

// Recursively fetch the contents tocItems
if (tocItem.contents) {
await Promise.all(
tocItem.contents.map( (content) => {
return fetchDocs(content, urlRoot, docsDir)
return fetchDocs(content, urlRoot, buildRoot)
})
)
}
Expand Down
10 changes: 2 additions & 8 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ import path from 'path'
import { createWorkspace } from './createWorkspace.js'
import { fetchDocsToc } from './fetchDocsToc.js'
import { fetchDocs } from './fetchDocs.js'
import { createDb } from './createDb.js'

const AWS_CFN_DOC_ROOT = process.env.DOC_ROOT || 'https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide'
const AWS_CFN_TOC_FILE = 'toc-contents.json'
Expand All @@ -18,19 +17,14 @@ export const APP_ROOT = path.resolve(
export const DOC_BUILD_ROOT = path.join(APP_ROOT, 'docbuild')

export async function main(appRoot: string, docBuildRoot: string): Promise<void> {
const {
docsetDocsDir,
docsetResourcesDir
} = await createWorkspace(appRoot, docBuildRoot)
await createWorkspace(appRoot, docBuildRoot)
const tocSections = await fetchDocsToc(AWS_CFN_TOC_URL)

await Promise.all(
Object.entries(tocSections).map( ([_, items]) => {
return fetchDocs(items, AWS_CFN_DOC_ROOT, docsetDocsDir)
return fetchDocs(items, AWS_CFN_DOC_ROOT, docBuildRoot)
})
)

await createDb(docsetResourcesDir, tocSections)
}

// Check if the module is executed as the main module
Expand Down
11 changes: 11 additions & 0 deletions src/paths.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import path from 'path'

export const DOCSET_DIR = 'aws-cloudformation.docset'
export const DOCSET_CONTENTS_DIR = 'Contents'
export const DOCSET_RESOURCES_DIR = path.join(DOCSET_CONTENTS_DIR, 'Resources')
export const DOCSET_DOCS_DIR = path.join(DOCSET_RESOURCES_DIR, 'Documents')
export const PLIST_FILE_NAME = 'Info.plist'
export const PLIST_FILE_PATH = path.join(DOCSET_CONTENTS_DIR, PLIST_FILE_NAME)
export const ICON_FILE_NAME = 'icon.png'
export const DB_FILE_NAME = 'docSet.dsidx'
export const DB_FILE_PATH = path.join(DOCSET_RESOURCES_DIR, DB_FILE_NAME)

0 comments on commit bfc9d5d

Please sign in to comment.