search_awesome/cli.ts
2022-12-01 23:13:07 +09:00

214 lines
4.9 KiB
TypeScript

import { expandGlob } from 'std/fs/mod.ts';
import { config } from "std/dotenv/mod.ts";
import { chunk } from "https://deno.land/std/collections/chunk.ts"
import { Client as ElasticsearchClient, DocumentsBulkRequest } from "https://deno.land/x/elasticsearch@v8.3.3/mod.ts";
import { Doc, DocParser, readDoc } from './doc_load/load.ts';
import ProgressBar from 'https://deno.land/x/progress@v1.3.0/mod.ts';
import { Command } from "cliffy";
const env = await config({ export: true });
const client = new ElasticsearchClient({
node: 'https://localhost:9200',
auth: {
username: env['ELASTIC_USERNAME'],
password: env['ELASTIC_PASSWORD'],
}
});
async function createIndex() {
const res = await client.indices.create({
index: 'github-awesome',
body: {
mappings: {
properties: {
"name": {
type: "text",
},
"desc": {
type: "text",
},
"url": {
type: "keyword",
},
"star": {
type: "integer",
},
"fork": {
type: "integer",
},
"author": {
type: "keyword",
},
"tags": {
type: "keyword",
"ignore_above": 256,
},
"readme": {
type: "text",
},
},
},
},
});
console.log(res);
console.log(res.acknowledged ? 'Index created' : 'Index creation failed');
}
async function deleteIndex() {
const res = await client.indices.delete({
index: 'github-awesome',
});
console.log(res);
console.log(res.acknowledged ? 'Index deleted' : 'Index deletion failed');
}
async function bulkIndex(path: string[],{
chunkSize = 1000,
progressBar = false,
}) {
const ch = chunk(path, chunkSize);
const bar = new ProgressBar({
total: ch.length,
title: 'Indexing',
width: 50,
});
let i = 0;
for (const pathes of ch) {
const docs = await Promise.all(pathes.map(async (path) => {
const doc = await readDoc(path);
if (doc.from_url){
delete doc.from_url;
}
return [
{
index: {
_id: doc.author+"/"+doc.name,
}
},
doc
] as [{ index: { _id: string } }, Doc];
}
));
const res = await client.documents.bulk({
target: 'github-awesome',
body: docs.flat(),
} as DocumentsBulkRequest<Doc>) as {
errors: boolean,
took: number,
items: unknown[]
};
if (res.errors){
if (progressBar){
bar.console("error occurs!")
}
else {
console.log("error occurs!")
}
}
if (progressBar) {
bar.render(++i);
}
}
if (progressBar) {
bar.end();
}
}
async function test_search(query: string, {
size = 10,
from = 0,
}) {
const res = await client.search<Doc>({
target: 'github-awesome',
body: {
query: {
multi_match: {
query,
fields: ['name', 'desc', 'tags', 'author', 'readme'],
}
},
from,
size,
},
});
return res.hits.hits;
}
async function main() {
const cmd = new Command();
cmd
.name('github-awesome')
.version('0.1.0')
.description('github-awesome search engine cli');
cmd
.command('index [path...]')
.description('index github-awesome. glob pattern is supported.')
.option('-c, --chunk-size <chunkSize:number>', 'chunk size', {
default: 200,
})
.option('-p, --progress-bar', 'show progress bar')
.action(async ({chunkSize, progressBar}, ...path: string[]) => {
const pathes = [];
for (const p of path) {
for await (const iterator of expandGlob(p)) {
pathes.push(iterator.path);
}
}
if (pathes.length === 0) {
console.log('no path found');
return;
}
await bulkIndex(pathes, {
chunkSize,
progressBar
});
});
cmd
.command('search <query>')
.description('search github-awesome')
.option('-s, --size <size:number>', 'size', {
default: 10,
})
.option('-f, --from <from:number>', 'from', {
default: 0,
})
.option('-j, --json', 'output json')
.action(async ({size, from, json}, query: string) => {
const s = await test_search(query, {
size,
from,
});
if (s.length === 0) {
console.log('no result found');
return;
}
if (json) {
console.log(JSON.stringify(s, null, 2));
}
else {
for (const doc of s) {
console.log("id :",doc._id);
console.log("score :",doc._score);
console.log();
}
}
});
cmd
.command('create-index')
.description('create index')
.action(async () => {
await createIndex();
});
cmd
.command('delete-index')
.description('delete index')
.action(async () => {
await deleteIndex();
});
await cmd.parse(Deno.args);
}
if (import.meta.main) {
await main();
}