Simplify Elasticsearch index mapping

This commit is contained in:
2025-02-22 20:12:05 -03:00
parent 0b58c47623
commit 80c7273501
2 changed files with 24 additions and 8 deletions

View File

@@ -3,7 +3,7 @@ import HTMLParse from "node-html-parser";
export async function getTableRows(data) {
let retryLeft = 5;
const delayMs = 500
const delayMs = 500
while (retryLeft) {
try{
return await fetch(data.url)
@@ -40,6 +40,7 @@ export async function parseOutFile(data) {
let path = file.querySelector(".link").firstChild.getAttribute("href");
if (path == "../") return;
let name = innertext(file.querySelector(".link").innerHTML).trim();
if (name == "Parent directory/") return;
let fullName = data.base + name;
let size = innertext(file.querySelector(".size").innerHTML).trim();
let cats = findCategory(fullName, data.catList)

View File

@@ -1,5 +1,6 @@
import { Client } from '@elastic/elasticsearch';
import debugPrint from '../debugprint.js';
import { File } from '../models/index.js';
const client = new Client({
node: process.env.ELASTICSEARCH_URL || 'http://localhost:9200'
@@ -48,11 +49,7 @@ export async function initElasticsearch() {
region: {
type: 'text',
analyzer: 'standard'
},
path: { type: 'keyword' },
size: { type: 'keyword' },
date: { type: 'keyword' },
group: { type: 'keyword' }
}
}
}
}
@@ -81,7 +78,12 @@ export async function indexFile(file) {
export async function bulkIndexFiles(files) {
const operations = files.flatMap(file => [
{ index: { _index: INDEX_NAME, _id: file.id.toString() } },
file
{
filename: file.filename,
category: file.category,
type: file.type,
region: file.region
}
]);
try {
@@ -132,9 +134,22 @@ export async function search(query, options) {
const response = await client.search(searchQuery);
const elapsed = parseHrtimeToSeconds(process.hrtime(startTime));
// Fetch full records from PostgreSQL for the search results
const ids = response.hits.hits.map(hit => hit._id);
const fullRecords = await File.findAll({
where: { id: ids }
});
// Create a map of full records by id
const recordMap = fullRecords.reduce((map, record) => {
map[record.id] = record;
return map;
}, {});
// Combine Elasticsearch results with full PostgreSQL records
return {
items: response.hits.hits.map(hit => ({
...hit._source,
...recordMap[hit._id].dataValues,
score: hit._score,
highlights: hit.highlight
})),