2022-12-01 19:40:03 +09:00
|
|
|
import { expandGlob } from 'std/fs/mod.ts';
|
|
|
|
import { config } from "std/dotenv/mod.ts";
|
|
|
|
import { chunk } from "https://deno.land/std/collections/chunk.ts"
|
|
|
|
import { Client as ElasticsearchClient } from "https://deno.land/x/elasticsearch@v8.3.3/mod.ts";
|
|
|
|
import { Doc, DocParser, readDoc } from './doc_load/load.ts';
|
|
|
|
import ProgressBar from 'https://deno.land/x/progress@v1.3.0/mod.ts';
|
|
|
|
import { Command } from "cliffy";
|
2022-11-29 17:55:06 +09:00
|
|
|
|
2022-12-01 19:40:03 +09:00
|
|
|
|
|
|
|
const env = await config({ export: true });
|
|
|
|
const client = new ElasticsearchClient({
|
|
|
|
node: 'https://localhost:9200',
|
|
|
|
auth: {
|
|
|
|
username: env['ELASTIC_USERNAME'],
|
|
|
|
password: env['ELASTIC_PASSWORD'],
|
|
|
|
}
|
|
|
|
});
|
|
|
|
|
|
|
|
async function createIndex() {
|
|
|
|
const res = await client.indices.create({
|
|
|
|
index: 'github-awesome',
|
|
|
|
body: {
|
|
|
|
mappings: {
|
|
|
|
properties: {
|
|
|
|
"name": {
|
|
|
|
type: "text",
|
|
|
|
},
|
|
|
|
"desc": {
|
|
|
|
type: "text",
|
|
|
|
},
|
|
|
|
"url": {
|
|
|
|
type: "keyword",
|
|
|
|
},
|
|
|
|
"star": {
|
|
|
|
type: "integer",
|
|
|
|
},
|
|
|
|
"fork": {
|
|
|
|
type: "integer",
|
|
|
|
},
|
|
|
|
"author": {
|
|
|
|
type: "keyword",
|
|
|
|
},
|
|
|
|
"tags": {
|
|
|
|
type: "keyword",
|
|
|
|
"ignore_above": 256,
|
|
|
|
},
|
|
|
|
"readme": {
|
|
|
|
type: "text",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
});
|
|
|
|
console.log(res);
|
|
|
|
console.log(res.acknowledged ? 'Index created' : 'Index creation failed');
|
|
|
|
}
|
|
|
|
|
|
|
|
async function deleteIndex() {
|
|
|
|
const res = await client.indices.delete({
|
|
|
|
index: 'github-awesome',
|
|
|
|
});
|
|
|
|
console.log(res);
|
|
|
|
console.log(res.acknowledged ? 'Index deleted' : 'Index deletion failed');
|
|
|
|
}
|
|
|
|
|
|
|
|
async function bulkIndex(path: string[],{
|
|
|
|
chunkSize = 1000,
|
|
|
|
progressBar = false,
|
|
|
|
}) {
|
|
|
|
const ch = chunk(path, chunkSize);
|
|
|
|
const bar = new ProgressBar({
|
|
|
|
total: ch.length,
|
|
|
|
title: 'Indexing',
|
|
|
|
width: 50,
|
|
|
|
});
|
|
|
|
let i = 0;
|
|
|
|
for (const pathes of ch) {
|
|
|
|
const docs = await Promise.all(pathes.map(async (path) => {
|
|
|
|
const doc = await readDoc(path);
|
|
|
|
if (doc.from_url){
|
|
|
|
delete doc.from_url;
|
|
|
|
}
|
|
|
|
return [
|
|
|
|
{
|
|
|
|
create: {
|
|
|
|
_id: doc.author+"/"+doc.name,
|
|
|
|
}
|
|
|
|
},
|
|
|
|
doc
|
|
|
|
] as [{ create: { _id: string } }, Doc];
|
|
|
|
}
|
|
|
|
));
|
|
|
|
const _ = await client.documents.bulk({
|
|
|
|
target: 'github-awesome',
|
|
|
|
body: docs.flat(),
|
|
|
|
});
|
|
|
|
|
|
|
|
if (progressBar) {
|
|
|
|
bar.render(++i);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (progressBar) {
|
|
|
|
bar.end();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
async function test_search(query: string, {
|
|
|
|
size = 10,
|
|
|
|
from = 0,
|
|
|
|
}) {
|
|
|
|
const res = await client.search<Doc>({
|
|
|
|
target: 'github-awesome',
|
|
|
|
body: {
|
|
|
|
query: {
|
|
|
|
multi_match: {
|
|
|
|
query,
|
|
|
|
fields: ['name', 'desc', 'tags', 'author', 'readme'],
|
|
|
|
}
|
|
|
|
},
|
|
|
|
from,
|
|
|
|
size,
|
|
|
|
},
|
|
|
|
});
|
|
|
|
return res.hits.hits;
|
|
|
|
}
|
|
|
|
|
|
|
|
async function main() {
|
|
|
|
const cmd = new Command();
|
|
|
|
cmd
|
|
|
|
.name('github-awesome')
|
|
|
|
.version('0.1.0')
|
|
|
|
.description('github-awesome search engine cli');
|
|
|
|
cmd
|
|
|
|
.command('index [path...]')
|
|
|
|
.description('index github-awesome. glob pattern is supported.')
|
|
|
|
.option('-c, --chunk-size <chunkSize:number>', 'chunk size', {
|
|
|
|
default: 200,
|
|
|
|
})
|
|
|
|
.option('-p, --progress-bar', 'show progress bar')
|
|
|
|
.action(async ({chunkSize, progressBar}, ...path: string[]) => {
|
|
|
|
const pathes = [];
|
|
|
|
for (const p of path) {
|
|
|
|
for await (const iterator of expandGlob(p)) {
|
|
|
|
pathes.push(iterator.path);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (pathes.length === 0) {
|
|
|
|
console.log('no path found');
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
await bulkIndex(pathes, {
|
|
|
|
chunkSize,
|
|
|
|
progressBar
|
|
|
|
});
|
|
|
|
});
|
|
|
|
cmd
|
|
|
|
.command('search <query>')
|
|
|
|
.description('search github-awesome')
|
|
|
|
.option('-s, --size <size:number>', 'size', {
|
|
|
|
default: 10,
|
|
|
|
})
|
|
|
|
.option('-f, --from <from:number>', 'from', {
|
|
|
|
default: 0,
|
|
|
|
})
|
|
|
|
.option('-j, --json', 'output json')
|
|
|
|
.action(async ({size, from, json}, query: string) => {
|
|
|
|
const s = await test_search(query, {
|
|
|
|
size,
|
|
|
|
from,
|
|
|
|
});
|
|
|
|
if (s.length === 0) {
|
|
|
|
console.log('no result found');
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (json) {
|
|
|
|
console.log(JSON.stringify(s, null, 2));
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
for (const doc of s) {
|
|
|
|
console.log("id :",doc._id);
|
|
|
|
console.log("score :",doc._score);
|
|
|
|
console.log();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
});
|
|
|
|
cmd
|
|
|
|
.command('create-index')
|
|
|
|
.description('create index')
|
|
|
|
.action(async () => {
|
|
|
|
await createIndex();
|
|
|
|
});
|
|
|
|
cmd
|
|
|
|
.command('delete-index')
|
|
|
|
.description('delete index')
|
|
|
|
.action(async () => {
|
|
|
|
await deleteIndex();
|
|
|
|
});
|
|
|
|
await cmd.parse(Deno.args);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (import.meta.main) {
|
|
|
|
await main();
|
2022-11-29 17:55:06 +09:00
|
|
|
}
|