redo metadata, opting to pull everything in at once.

This commit is contained in:
Alexandra
2025-05-29 10:10:34 -06:00
parent cefa1f6a35
commit c231f7ffc9
5 changed files with 353 additions and 225 deletions

View File

@@ -8,4 +8,14 @@ export function debugPrintDir(string){
if(process.env.DEBUG == "1"){
console.dir(string)
}
}
export function singleLineStatus(string){
if(process.stdout.isTTY && process.env.DEBUG != "1"){
process.stdout.clearLine(0);
process.stdout.cursorTo(0);
process.stdout.write(string);
} else {
console.log(string);
}
}

View File

@@ -2,8 +2,8 @@ import { getTableRows, parseOutFile } from "./fileworker.js";
import { Piscina, FixedQueue } from "piscina";
import { resolve } from "path";
import debugPrint from "./debugprint.js";
import { File } from './models/index.js';
import { bulkIndexFiles } from './services/elasticsearch.js';
import { File } from "./models/index.js";
import { bulkIndexFiles } from "./services/elasticsearch.js";
import { optimizeDatabaseKws } from "./dboptimize.js";
import { Timer } from "./time.js";
@@ -15,7 +15,7 @@ let piscina = new Piscina({
const BATCH_SIZE = 1000; // Process files in batches for better performance
export default async function getAllFiles(catList) {
var proctime = new Timer()
var proctime = new Timer();
const url = "https://myrient.erista.me/files/";
let parentRows = await getTableRows({ url: url, base: "" });
let parents = [];
@@ -94,7 +94,9 @@ export default async function getAllFiles(catList) {
}
fetchTasks = [];
dirStatus = `Directories Remaining: ${dirs.length}, Files Found: ${fileCount}`;
dirStatus = `Directories Remaining: ${
dirs.length
}, Files Found: ${fileCount} (${proctime.elapsed()}`;
}
if (dirs.length == 0 && parseTasks.length > 0) {
@@ -122,7 +124,9 @@ export default async function getAllFiles(catList) {
}
parseTasks = [];
dirStatus = `Directories Remaining: ${dirs.length}, Files Found: ${fileCount}`;
dirStatus = `Directories Remaining: ${
dirs.length
}, Files Found: ${fileCount} (${proctime.elapsed()}`;
}
if (dirStatus) {
@@ -153,7 +157,7 @@ async function processBatch(files) {
for (let i = 0; i < files.length; i += chunkSize) {
const chunk = files.slice(i, i + chunkSize);
const dbFiles = await File.bulkCreate(
chunk.map(file => ({
chunk.map((file) => ({
filename: file.filename,
path: file.path,
size: file.size,
@@ -162,20 +166,24 @@ async function processBatch(files) {
date: file.date,
region: file.region,
group: file.group,
nongame: file.nongame
nongame: file.nongame,
})),
{
returning: true,
updateOnDuplicate: ['path']
updateOnDuplicate: ["path"],
}
);
// Index chunk in Elasticsearch
await bulkIndexFiles(dbFiles);
debugPrint(`Processed ${i + chunk.length} of ${files.length} files in current batch`);
debugPrint(
`Processed ${i + chunk.length} of ${
files.length
} files in current batch`
);
}
} catch (error) {
console.error('Error processing batch:', error);
console.error("Error processing batch:", error);
}
}
@@ -205,4 +213,4 @@ function singleLineStatus(str) {
} else {
console.log(str);
}
}
}

View File

@@ -0,0 +1,94 @@
{
"Linux": "IBM PC Compatible",
"PC (Microsoft Windows)": "IBM PC Compatible",
"PlayStation": "Sony PlayStation 1",
"PlayStation 2": "Sony PlayStation 2",
"PlayStation 3": "Sony PlayStation 3",
"Xbox": "Microsoft Xbox",
"Xbox 360": "Microsoft Xbox 360",
"Commodore C64/128/MAX": "Commodore 64",
"Amiga": "Commodore Amiga",
"Nintendo Entertainment System": "Nintendo Entertainment System",
"Nintendo DS": "Nintendo DS",
"Nintendo GameCube": "Nintendo GameCube",
"Game Boy Color": "Nintendo Game Boy Color",
"Dreamcast": "Sega Dreamcast",
"Game Boy Advance": "Nintendo Game Boy Advance",
"Amstrad CPC": "Amstrad CPC",
"ZX Spectrum": "Sinclair ZX Spectrum",
"MSX": "Microsoft MSX",
"Sega Mega Drive/Genesis": "Sega Mega Drive",
"Sega 32X": "Sega 32X",
"Sega Saturn": "Sega Saturn",
"Game Boy": "Nintendo Game Boy",
"iOS": "Mobile",
"Sega Game Gear": "Sega Game Gear",
"Nintendo 3DS": "Nintendo 3DS",
"PlayStation Portable": "Sony PlayStation Portable",
"Wii U": "Nintendo Wii U",
"N-Gage": "Nokia N-Gage",
"PlayStation Vita": "Sony PlayStation Vita",
"3DO Interactive Multiplayer": "Panasonic 3DO",
"Family Computer Disk System": "Nintendo Family Computer Disk System",
"MSX2": "Microsoft MSX2",
"Atari 7800": "Atari 7800",
"Atari Lynx": "Atari Lynx",
"Atari Jaguar": "Atari Jaguar",
"Sega Master System/Mark III": "Sega Master System",
"Atari 8-bit": "Atari 8-bit",
"Atari 5200": "Atari 5200",
"Intellivision": "Mattel Intellivision",
"Vectrex": "GCE Vectrex",
"Commodore VIC-20": "Commodore VIC-20",
"Sharp X1": "Sharp X1",
"Sega CD": "Sega CD",
"Neo Geo MVS": "SNK NeoGeo Pocket",
"SG-1000": "Sega SG-1000",
"TurboGrafx-16/PC Engine": "NEC PC Engine",
"Virtual Boy": "Nintendo Virtual Boy",
"Microvision": "Microvision",
"Bally Astrocade": "Bally Astrocade",
"Commodore Plus/4": "Commodore Plus-4",
"Apple IIGS": "Apple IIGS",
"Philips CD-i": "Philips CD-i",
"Neo Geo Pocket": "SNK NeoGeo Pocket",
"Neo Geo Pocket Color": "SNK NeoGeo Pocket Color",
"Sharp X68000": "Sharp X68000",
"Fairchild Channel F": "Fairchild Channel F",
"PC Engine SuperGrafx": "NEC PC Engine",
"Texas Instruments TI-99": "Texas Instruments TI-99-4A",
"Odyssey 2 / Videopac G7000": "Magnavox Odyssey 2",
"Neo Geo CD": "SNK NeoGeo Pocket",
"New Nintendo 3DS": "New Nintendo 3DS",
"PC-9800 Series": "NEC PC-98",
"FM-7": "Fujitsu FM-7",
"Pokémon mini": "Nintendo Pokemon Mini",
"PlayStation 5": "Sony PlayStation 5",
"Xbox Series X|S": "Microsoft Xbox Series X|S",
"Google Stadia": "Google",
"DVD Player": "DVD-Video",
"Blu-ray Player": "BD-Video",
"Zeebo": "Zeebo",
"PC-FX": "NEC PC-FX",
"Game & Watch": "Nintendo Game & Watch",
"Sega Pico": "Sega PICO",
"Sinclair ZX81": "Sinclair ZX Spectrum",
"Sharp MZ-2200": "Sharp MZ-2200",
"Epoch Cassette Vision": "Epoch Game Pocket Computer",
"Epoch Super Cassette Vision": "Epoch Super Cassette Vision",
"Game.com": "Tiger Game.com",
"Casio Loopy": "Casio Loopy",
"Mega Duck/Cougar Boy": "Welback Mega Duck",
"Leapster": "LeapFrog Leapster",
"Leapster Explorer/LeadPad Explorer": "LeapFrog LeapPad",
"Watara/QuickShot Supervision": "Watara SuperVision",
"64DD": "Nintendo 64DD",
"Arduboy": "Arduboy Inc Arduboy",
"V.Smile": "VTech V.Smile",
"Arcadia 2001": "Emerson Arcadia 2001",
"Gizmondo": "Tiger Gizmondo",
"Apple Pippin": "Apple-Bandai Pippin",
"Panasonic M2": "Panasonic M2",
"Super A'Can": "Funtech Super Acan",
"Sega CD 32X": "Sega CD"
}

View File

@@ -17,8 +17,11 @@ import {
offset,
} from "@phalcode/ts-igdb-client";
import { File, Metadata } from "./database.js";
import debugPrint from "./debugprint.js";
import TaskQueue from "./taskqueue.js";
import { singleLineStatus } from "./debugprint.js";
import { Timer } from "./time.js";
import { readFileSync } from "fs";
import { dirname, resolve } from "path";
export default class MetadataSearch {
constructor() {
@@ -49,16 +52,23 @@ export default class MetadataSearch {
"platforms.name",
"game_type.type",
"screenshots.image_id",
"videos.video_id"
"videos.video_id",
];
getPlatformMapping() {
}
async setupClient() {
try {
if (this.twitchSecrets.client_id && this.twitchSecrets.client_secret) {
this.accessToken = await twitchAccessToken(this.twitchSecrets);
this.client = igdb(this.twitchSecrets.client_id, this.accessToken);
const mapFilePath = "./lib/json/igdb_platform_map.json";
this.platformMap = JSON.parse(readFileSync(mapFilePath, "utf8"));
if (this.accessToken) {
this.authorized = true;
this.syncAllMetadata();
return;
}
}
@@ -68,88 +78,6 @@ export default class MetadataSearch {
}
}
async getMetadata(query, retrying = false) {
try {
if (!this.authorized) return;
const { data } = await this.client
.multi(...this.buildGameMultiQuery(query))
.execute();
return data;
} catch (error) {
if (error === "ERR_BAD_REQUEST" && !retrying) {
this.setupClient();
return this.getMetadata(query, true);
}
console.error("Failed to retrieve metadata:", error);
}
}
buildGameMultiQuery(query) {
let multiQuery = [];
for (let x in query) {
multiQuery.push(
request("games")
.alias(x)
.pipe(
fields(this.gameFields),
or(
and(
...this.buildAndClauses("name", "~", query[x].name),
where("game_type.type", "!=", "Mod"),
where("game_type.type", "!=", "DLC"),
...this.buildPlatformClause("~", query[x].platform)
),
and(
...this.buildAndClauses(
"alternative_names.name",
"~",
query[x].name
),
where("game_type.type", "!=", "Mod"),
where("game_type.type", "!=", "DLC"),
...this.buildPlatformClause("~", query[x].platform)
),
and(
...this.buildAndClauses(
"game_localizations.name",
"~",
query[x].name
),
where("game_type.type", "!=", "Mod"),
where("game_type.type", "!=", "DLC"),
...this.buildPlatformClause("~", query[x].platform)
)
),
sort("name", "asc"),
limit(1)
)
);
}
return multiQuery;
}
buildAndClauses(field, op, string) {
let andClauses = [];
let name = [...new Set(string.split(" "))].filter((n) => n); //dedupe;
for (let x in name) {
andClauses.push(where(field, op, name[x], WhereFlags.CONTAINS));
}
return andClauses;
}
buildPlatformClause(op, string) {
if (string == "Others") return [];
//special garbage because SOMEONE doesn't value consistency
string = string.replace("Nintendo Wii", "Wii");
string = string.replace("Nintendo Game Boy", "Game Boy");
string = string.replace("Nintendo Satellaview", "Satellaview");
string = string.replace("Sony PlayStation", "PlayStation");
string = string.replace("Microsoft Xbox", "Xbox");
string = string.replace("Commodore 64", "Commodore C64");
string = string.replace("Commodore Amiga", "Amiga");
return [where("platforms.name", op, string, WhereFlags.CONTAINS)];
}
normalizeName(filename) {
if (!filename) return;
return filename
@@ -161,58 +89,102 @@ export default class MetadataSearch {
.trim();
}
async getGamesMetadata(games) {
async getIGDBGamesCount(retrying = false) {
try {
if (!this.authorized || !games.length) return;
let gameQuery = [];
for (let x in games) {
if (!(await games[x].getDetails()))
if (!games[x].nongame) {
if (!games[x].blockmetadata) {
gameQuery.push({
name: this.normalizeName(games[x].filename),
platform: games[x].category,
id: x,
});
}
}
}
if (!gameQuery.length) return;
let gameMetas = await this.queue.enqueue(this.getMetadata, this, gameQuery)
debugPrint(JSON.stringify(gameMetas, null, 2));
if (!gameMetas.length) return;
for (let x in gameMetas) {
if (gameMetas[x].result.length) {
await this.addMetadataToDb(
gameMetas[x].result[0],
games[gameQuery[x].id]
);
} else {
games[x].blockmetadata = true;
games[x].save();
}
}
if (!this.authorized) return 0;
const { data } = await this.client
.request("games/count")
.pipe(
and(
where("game_type.type", "!=", "Mod"),
where("game_type.type", "!=", "DLC")
)
)
.execute();
return data.count;
} catch (error) {
console.error("Error getting metadata:", error);
if (error.code === "ERR_BAD_REQUEST" && !retrying) {
this.setupClient();
return this.getIGDBGamesCount(true);
}
console.error("Error getting IGDB games count:", error);
return 0;
}
}
async queueGetGamesMetadata(games) {
try {
await this.getGamesMetadata(games); //we don't actually care as long as it finishes
let details = await Promise.all(games.map((game) => game.getDetails()));
let combined = [];
//make sure the metadata gets included with the gamedata
for (let x in games) {
combined.push({
file: games[x].dataValues,
metadata: details[x]?.dataValues,
});
async matchAllMetadata() {
let games = await File.findAndCountAll({
where: {
nongame: false,
},
limit: 1000,
});
for (let x in games) {
let game = games[x];
let metadata = await Metadata.searchByText(game.filename, game.category);
if (metadata) {
await game.setDetails(metadata);
await metadata.addFile(game);
}
}
}
async syncAllMetadata(retrying = false) {
try {
const timer = new Timer();
if (!this.authorized) {
console.log(
"Twitch credentials are unavailable or invalid; metadata sync is unavailable."
);
return;
}
console.log("Syncing all metadata...");
let count = await this.getIGDBGamesCount();
let pageSize = 500;
let pages = Math.ceil(count / pageSize);
let retryCount = 0;
for (let x = 0; x < pages; x++) {
if (retryCount == 5) continue;
singleLineStatus(
`Syncing metadata: ${x * 500} / ${count} ${(
((x * 500) / count) *
100
).toFixed(2)}% (${timer.elapsed()})`
);
try {
let { data } = await this.client
.request("games")
.pipe(
limit(pageSize),
offset(x * pageSize),
fields(this.gameFields)
)
.execute();
for (let y in data) {
await this.addMetadataToDb(data[y]);
}
} catch (error) {
if (error.code === "ERR_BAD_RESPONSE") {
x--;
await this.sleep(1000);
retryCount++;
console.log(
`Retrieving metadata at offset ${
x * 500
} failed. Retry count: ${retryCount}`
);
continue;
}
throw error; //hoist it up
}
retryCount = 0;
}
return combined;
} catch (error) {
console.error("Error getting metadata:", error);
return []
if (error.code === "ERR_BAD_REQUEST" && !retrying) {
this.setupClient();
return this.syncAllMetadata(true);
}
console.error("Error syncing all metadata:", error);
}
}
@@ -223,23 +195,6 @@ export default class MetadataSearch {
md = await Metadata.build(
{
id: metadata.id,
title: metadata.name,
description: metadata.summary,
rating: metadata.total_rating,
coverartid: metadata.cover?.image_id,
releasedate: metadata.first_release_date
? new Date(metadata.first_release_date * 1000)
: null,
genre: JSON.stringify(metadata.genres?.map((genre) => genre.name)),
gamemodes: JSON.stringify(
metadata.game_modes?.map((gm) => gm.name)
),
platforms: JSON.stringify(
metadata.platforms?.map((platform) => platform.name)
),
screenshots: JSON.stringify(metadata.screenshots?.map((ss) => ss.image_id)),
videos: JSON.stringify(metadata.videos?.map((v) => v.video_id))
},
{
returning: true,
@@ -248,17 +203,25 @@ export default class MetadataSearch {
}
);
}
//these don't work right unless I do them after the fact.
md.developers = JSON.stringify(
metadata.involved_companies
?.filter((ic) => ic.developer)
?.map((ic) => ic.company.name)
);
md.publishers = JSON.stringify(
metadata.involved_companies
?.filter((ic) => ic.publisher)
?.map((ic) => ic.company.name)
);
md.title = metadata.name;
md.description = metadata.summary;
md.rating = metadata.total_rating;
md.coverartid = metadata.cover?.image_id;
md.releasedate = metadata.first_release_date
? new Date(metadata.first_release_date * 1000)
: null;
md.genre = metadata.genres?.map((genre) => genre.name);
md.gamemodes = metadata.game_modes?.map((gm) => gm.name);
md.platforms = metadata.platforms?.map((platform) => this.platformMap[platform.name] || platform.name);
md.screenshots = metadata.screenshots?.map((ss) => ss.image_id);
md.videos = metadata.videos?.map((v) => v.video_id);
md.developers = metadata.involved_companies
?.filter((ic) => ic.developer)
?.map((ic) => ic.company.name);
md.publishers = metadata.involved_companies
?.filter((ic) => ic.publisher)
?.map((ic) => ic.company.name);
let alternates = [];
if (metadata.alternative_names) {
alternates.push(
@@ -276,12 +239,18 @@ export default class MetadataSearch {
}))
);
}
//this needs to remain json as we want the keys to be retained
md.alternatetiles = JSON.stringify(alternates);
await md.save();
await game.setDetails(md);
await md.addFile(game);
if (game) {
await game.setDetails(md);
await md.addFile(game);
}
} catch (error) {
console.error("Error adding metadata:", error);
}
}
async sleep(delay) {
return new Promise((resolve) => setTimeout(resolve, delay));
}
}

View File

@@ -1,58 +1,105 @@
import { DataTypes } from "sequelize"
import { DataTypes, DATE } from "sequelize";
export default function (sequelize) {
const Metadata = sequelize.define('Metadata', {
id: {//these will match the igdbid to make things a little easier
type: DataTypes.INTEGER,
primaryKey: true,
const Metadata = sequelize.define(
"Metadata",
{
id: {
//these will match the igdbid to make things a little easier
type: DataTypes.INTEGER,
primaryKey: true,
},
title: {
type: DataTypes.STRING,
allowNull: false,
},
alternatetitles: {
type: DataTypes.STRING(1024),
},
description: {
type: DataTypes.STRING(16384),
},
rating: {
type: DataTypes.STRING,
},
coverartid: {
type: DataTypes.STRING,
},
releasedate: {
type: DataTypes.DATEONLY,
},
genre: {
type: DataTypes.ARRAY(DataTypes.STRING),
},
developers: {
type: DataTypes.ARRAY(DataTypes.STRING),
},
publishers: {
type: DataTypes.ARRAY(DataTypes.STRING),
},
gamemodes: {
type: DataTypes.ARRAY(DataTypes.STRING),
},
platforms: {
type: DataTypes.ARRAY(DataTypes.STRING),
},
screenshots: {
type: DataTypes.ARRAY(DataTypes.STRING),
},
videos: {
type: DataTypes.ARRAY(DataTypes.STRING),
},
searchVector: {
type: DataTypes.TSVECTOR,
allowNull: true,
},
},
{
indexes: [
{ fields: ["title"] },
{
name: "metadata_search_idx",
using: "gin",
fields: ["searchVector"],
},
title: {
type: DataTypes.STRING,
allowNull: false
},
alternatetitles: {
type: DataTypes.STRING
},
description: {
type: DataTypes.STRING(2048)
},
rating: {
type: DataTypes.STRING
},
coverartid: {
type: DataTypes.STRING
},
releasedate: {
type: DataTypes.DATEONLY
},
//anything that stores as json make the limit much higher
genre: {
type: DataTypes.STRING(2048)
},
developers: {
type: DataTypes.STRING(2048)
},
publishers: {
type: DataTypes.STRING(2048)
},
gamemodes:{
type: DataTypes.STRING(2048)
},
platforms: {
type: DataTypes.STRING(2048)
},
screenshots: {
type: DataTypes.STRING(2048)
},
videos:{
type: DataTypes.STRING(2048)
}
}, {
indexes: [
{ fields: ['title'] },
{ fields: ['description'] },//If this slows down the db may want to not index this.
]
})
return Metadata
}
],
}
);
Metadata.beforeSave("addVector", async (instance) => {
const title = instance.title || "";
const query = `
SELECT to_tsvector('english', $1)
`;
const [results] = await sequelize.query(query, {
bind: [title],
raw: true,
});
instance.searchVector = results[0].to_tsvector;
});
// Add a class method for full-text search
Metadata.searchByText = async function (searchQuery, platform, limit = 1) {
let platformClause = "";
let limitClause = `limit ${limit}`;
if (platform) {
platformClause = `AND '${platform}' = ANY(platforms)`;
}
const query = `
SELECT * FROM "Metadata"
WHERE "searchVector" @@ plainto_tsquery('english', :search) :platformClause
ORDER BY ts_rank("searchVector", plainto_tsquery('english', :search)) DESC :limit
`;
return await sequelize.query(query, {
model: Metadata,
replacements: {
search: searchQuery,
platformClause: platformClause,
limit: limitClause,
},
type: sequelize.QueryTypes.SELECT,
});
};
return Metadata;
}