diff --git a/lib/fileworker.js b/lib/fileworker.js index ab51522..d66b30f 100644 --- a/lib/fileworker.js +++ b/lib/fileworker.js @@ -3,7 +3,7 @@ import HTMLParse from "node-html-parser"; export async function getTableRows(data) { let retryLeft = 5; - const delayMs = 500 + const delayMs = 500 while (retryLeft) { try{ return await fetch(data.url) @@ -40,6 +40,7 @@ export async function parseOutFile(data) { let path = file.querySelector(".link").firstChild.getAttribute("href"); if (path == "../") return; let name = innertext(file.querySelector(".link").innerHTML).trim(); + if (name == "Parent directory/") return; let fullName = data.base + name; let size = innertext(file.querySelector(".size").innerHTML).trim(); let cats = findCategory(fullName, data.catList) diff --git a/lib/services/elasticsearch.js b/lib/services/elasticsearch.js index d349959..9373e13 100644 --- a/lib/services/elasticsearch.js +++ b/lib/services/elasticsearch.js @@ -1,5 +1,6 @@ import { Client } from '@elastic/elasticsearch'; import debugPrint from '../debugprint.js'; +import { File } from '../models/index.js'; const client = new Client({ node: process.env.ELASTICSEARCH_URL || 'http://localhost:9200' @@ -48,11 +49,7 @@ export async function initElasticsearch() { region: { type: 'text', analyzer: 'standard' - }, - path: { type: 'keyword' }, - size: { type: 'keyword' }, - date: { type: 'keyword' }, - group: { type: 'keyword' } + } } } } @@ -81,7 +78,12 @@ export async function indexFile(file) { export async function bulkIndexFiles(files) { const operations = files.flatMap(file => [ { index: { _index: INDEX_NAME, _id: file.id.toString() } }, - file + { + filename: file.filename, + category: file.category, + type: file.type, + region: file.region + } ]); try { @@ -132,9 +134,22 @@ export async function search(query, options) { const response = await client.search(searchQuery); const elapsed = parseHrtimeToSeconds(process.hrtime(startTime)); + // Fetch full records from PostgreSQL for the search results + const ids = response.hits.hits.map(hit => hit._id); + const fullRecords = await File.findAll({ + where: { id: ids } + }); + + // Create a map of full records by id + const recordMap = fullRecords.reduce((map, record) => { + map[record.id] = record; + return map; + }, {}); + + // Combine Elasticsearch results with full PostgreSQL records return { items: response.hits.hits.map(hit => ({ - ...hit._source, + ...recordMap[hit._id].dataValues, score: hit._score, highlights: hit.highlight })),