separate alternate titles and titles metadata as it was dropping the quality of matches

This commit is contained in:
Alexandra
2025-05-30 06:52:24 -06:00
parent 5c5aa236e4
commit e7bfcdc87a
3 changed files with 63 additions and 20 deletions

View File

@@ -90,8 +90,8 @@ export default class MetadataSearch {
async getIGDBGamesCount(retrying = false) {
try {
// hack to ensure the client is ready before we do anything
while(!this.ready){
await this.sleep(500)
while (!this.ready) {
await this.sleep(500);
}
if (this.authorized === false) return 0;
const { data } = await this.client
@@ -118,7 +118,7 @@ export default class MetadataSearch {
let games = await File.findAndCountAll({
where: {
nongame: false,
detailsId: null,
//detailsId: null,
},
limit: 1000,
order: ["id", "filename"],
@@ -132,10 +132,10 @@ export default class MetadataSearch {
games = await File.findAndCountAll({
where: {
nongame: false,
detailsId: null,
//detailsId: null,
},
limit: 1000,
offset: x * 1000,
offset: x * 1000 - found,
order: ["id", "filename"],
include: { model: Metadata, as: "details" },
});
@@ -148,9 +148,18 @@ export default class MetadataSearch {
);
let game = games.rows[y];
let metadata = await Metadata.searchByText(
"title",
this.normalizeName(game.filename),
game.category
);
if (!metadata.length) {
// repeat the search under one of the alternate titles
metadata = await Metadata.searchByText(
"alternatetitles",
this.normalizeName(game.filename),
game.category
);
}
if (metadata.length) {
let md = await Metadata.findByPk(metadata[0].id);
await game.setDetails(md);
@@ -161,10 +170,19 @@ export default class MetadataSearch {
} else if (fuzzy) {
//this is much slower and should only be used if the faster full text search can't find it.
let metadata = await Metadata.fuzzySearchByText(
"title",
this.normalizeName(game.filename),
0.8,
game.category
);
if (!metadata.length) {
metadata = await Metadata.fuzzySearchByText(
"alternatetitles",
this.normalizeName(game.filename),
0.8,
game.category
);
}
if (metadata.length) {
let md = await Metadata.findByPk(metadata[0].id);
await game.setDetails(md);
@@ -182,6 +200,10 @@ export default class MetadataSearch {
async syncAllMetadata(retrying = false) {
try {
const timer = new Timer();
// hack to ensure the client is ready before we do anything
while (!this.ready) {
await this.sleep(500);
}
if (!this.authorized) {
console.log(
"Twitch credentials are unavailable or invalid; metadata sync is unavailable."

View File

@@ -49,7 +49,11 @@ export default function (sequelize) {
videos: {
type: DataTypes.ARRAY(DataTypes.STRING),
},
searchVector: {
titleVector: {
type: DataTypes.TSVECTOR,
allowNull: true,
},
alternatetitlesVector: {
type: DataTypes.TSVECTOR,
allowNull: true,
},
@@ -58,9 +62,14 @@ export default function (sequelize) {
indexes: [
{ fields: ["title"] },
{
name: "metadata_search_idx",
name: "metadata_search_t_idx",
using: "gin",
fields: ["searchVector"],
fields: ["titleVector"],
},
{
name: "metadata_search_at_idx",
using: "gin",
fields: ["alternatetitlesVector"],
},
],
}
@@ -71,36 +80,45 @@ export default function (sequelize) {
const alternateTitles =
JSON.parse(instance.alternatetitles || "[]")
const titles = Object.values(alternateTitles).join(', ')
const query = `
SELECT to_tsvector('english', $1 || ', ' || $2)
let query = `
SELECT to_tsvector('english', $1)
`;
const [results] = await sequelize.query(query, {
bind: [title, titles],
let [results] = await sequelize.query(query, {
bind: [title],
raw: true,
});
instance.searchVector = results[0].to_tsvector;
instance.titleVector = results[0].to_tsvector;
query = `
SELECT to_tsvector('english', $1)
`;
[results] = await sequelize.query(query, {
bind: [titles],
raw: true,
});
instance.alternatetitlesVector = results[0].to_tsvector;
});
// Add a class method for full-text search
Metadata.searchByText = async function (searchQuery, platform, limit = 1) {
Metadata.searchByText = async function (field, searchQuery, platform, limit = 1) {
let platformClause = "";
let limitClause = `LIMIT ${limit}`;
if (platform) {
if (platform && platform != "Others") {
platformClause = `AND '${platform}' = ANY(platforms)`;
}
const query = `
SELECT id FROM "Metadata"
WHERE "searchVector" @@ plainto_tsquery('english', $1) ${platformClause}
WHERE $1 @@ plainto_tsquery('english', $2) ${platformClause}
ORDER BY length(title) ${limitClause}
`;
return await sequelize.query(query, {
model: Metadata,
bind: [searchQuery],
bind: [(field + 'Vector'), searchQuery],
type: sequelize.QueryTypes.SELECT,
});
};
Metadata.fuzzySearchByText = async function (
field,
searchQuery,
fuzziness,
platform,
@@ -109,19 +127,19 @@ export default function (sequelize) {
fuzziness = fuzziness || 0.6;
let platformClause = "";
let limitClause = `LIMIT ${limit}`;
if (platform) {
if (platform && platform != "Others") {
platformClause = `AND '${platform}' = ANY(platforms)`;
}
const query = `
SELECT id FROM "Metadata"
WHERE SIMILARITY(title, $1) > $2 OR SIMILARITY(alternatetitles, $1) > $2
WHERE SIMILARITY($1, $2) > $3
${platformClause}
ORDER BY length(title) ${limitClause}
`;
return await sequelize.query(query, {
model: Metadata,
bind: [searchQuery, fuzziness],
bind: [field, searchQuery, fuzziness],
type: sequelize.QueryTypes.SELECT,
});
};

View File

@@ -578,3 +578,6 @@ if (
}
cron.schedule("0 30 2 * * *", getFilesJob);
await metadataSearch.syncAllMetadata()
await metadataSearch.matchAllMetadata()