perf: ️ init

This commit is contained in:
bubkoo 2020-09-27 21:29:47 +08:00
parent b8ebe961f2
commit 05eddff2c7
17 changed files with 3300 additions and 2 deletions

16
.editorconfig Executable file
View File

@ -0,0 +1,16 @@
# http://editorconfig.org
root = true
[*]
indent_style = space
indent_size = 2
end_of_line = lf
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
[*.md]
trim_trailing_whitespace = false
[Makefile]
indent_style = tab

View File

@ -0,0 +1,18 @@
name: Potential Duplicates
on:
issues:
types: [opened, edited]
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: 🚧 Install
run: |
yarn
- name: 📦 Build
run: |
yarn build
- uses: ./
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

13
.github/workflows/release.yml vendored Normal file
View File

@ -0,0 +1,13 @@
name: Release
on:
push:
branches:
- master
jobs:
release:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: bubkoo/release-github-action@v1
with:
github_token: ${{ secrets.GITHUB_TOKEN }}

16
.gitignore vendored Normal file
View File

@ -0,0 +1,16 @@
node_modules
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
lerna-debug.log*
coverage
*.lcov
.nyc_output
.npm
.env
.env.test
.cache
.DS_Store
lib
dist

3
.prettierignore Normal file
View File

@ -0,0 +1,3 @@
dist/
lib/
node_modules/

8
.prettierrc Normal file
View File

@ -0,0 +1,8 @@
{
"semi": false,
"singleQuote": true,
"printWidth": 80,
"trailingComma": "all",
"proseWrap": "never",
"overrides": [{ "files": ".prettierrc", "options": { "parser": "json" } }]
}

View File

@ -1,2 +1,38 @@
# potential-duplicates # Potential Duplicates
A Github Action to search for potential issue duplicates using DamerauLevenshtein algorithm.
> A Github Action to search for potential issue duplicates using [DamerauLevenshtein](https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance) algorithm.
## Usage
Create `.github/workflows/potential-duplicates.yml` in the default branch:
```yaml
name: Potential Duplicates
on:
issues:
types: [opened, edited]
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: bubkoo/label-commands@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# Label to set, when potential duplicates are detected.
label: potential-duplicate
# Get issues with state to compare. Supported state: 'all', 'closed', 'open'.
state: all
# If similarity is higher than this threshold, issue will be marked as duplicate.
threshold: 0.6
# Comment to post when potential duplicates are detected.
comment: >
Potential duplicates:
{{#issues}}
- [#{{ number }}] {{ title }} ({{ accuracy }}%)
{{/issues}}
```
## License
The scripts and documentation in this project are released under the [MIT License](LICENSE)

33
action.yml Normal file
View File

@ -0,0 +1,33 @@
name: Potential Duplicates
description: Search for potential issue duplicates using DamerauLevenshtein algorithm..
author: bubkoo <bubkoo.wy@gmail.com>
inputs:
GITHUB_TOKEN:
description: Your GitHub token for authentication.
required: true
label:
description: Label to set, when potential duplicates are detected.
default: potential-duplicate
required: false
state:
description: `Get issues with state to compare. Supported state: 'all', 'closed', 'open'`
default: all
required: false
threshold:
description: Label color to set, when potential duplicates are detected.
default: 0.6
required: false
comment:
description: Comment to post when potential duplicates are detected.
default: >
Potential duplicates:
{{#issues}}
- [#{{ number }}] {{ title }} ({{ accuracy }}%)
{{/issues}}
required: false
runs:
using: node12
main: dist/index.js
branding:
icon: type
color: yellow # gray-dark purple red orange green blue yellow black white

67
package.json Normal file
View File

@ -0,0 +1,67 @@
{
"name": "potential-duplicates",
"description": "A Github Action to search for potential issue duplicates using DamerauLevenshtein algorithm.",
"version": "1.0.0",
"main": "dist/index.js",
"repository": "https://github.com/bubkoo/potential-duplicates",
"author": "bubkoo <bubkoo.wy@gmail.com>",
"license": "MIT",
"files": [
"dist",
"action.yml"
],
"scripts": {
"clean": "rimraf dist",
"lint": "tslint -c tslint.json -p tsconfig.json --fix",
"build": "ncc build src/index.ts --minify --v8-cache",
"prebuild": "run-s lint clean",
"precommit": "lint-staged"
},
"husky": {
"hooks": {
"commit-msg": "commitlint -E HUSKY_GIT_PARAMS",
"pre-commit": "lint-staged && yarn precommit"
}
},
"lint-staged": {
"**/*.{js,jsx,tsx,ts,less,md,json}": [
"pretty-quick — staged"
],
"src/**/*.ts": [
"tslint -c tslint.json -p ./tsconfig.json --fix"
]
},
"commitlint": {
"extends": [
"@commitlint/config-conventional"
]
},
"dependencies": {
"@actions/core": "^1.2.6",
"@actions/github": "^4.0.0",
"js-yaml": "^3.14.0",
"mustache": "^4.0.1"
},
"devDependencies": {
"@commitlint/cli": "^11.0.0",
"@commitlint/config-conventional": "^11.0.0",
"@types/mustache": "^4.0.1",
"@types/js-yaml": "^3.12.5",
"@types/node": "^14.0.27",
"@typescript-eslint/eslint-plugin": "^4.1.1",
"@typescript-eslint/parser": "^4.1.1",
"@vercel/ncc": "^0.24.1",
"eslint": "^7.9.0",
"husky": "^4.3.0",
"lint-staged": "^10.3.0",
"npm-run-all": "^4.1.5",
"prettier": "^2.1.2",
"pretty-quick": "^3.0.2",
"rimraf": "^3.0.2",
"tslint": "^6.1.3",
"tslint-config-airbnb": "^5.11.2",
"tslint-config-prettier": "^1.18.0",
"tslint-eslint-rules": "^5.4.0",
"typescript": "^4.0.3"
}
}

62
src/action.ts Normal file
View File

@ -0,0 +1,62 @@
import * as core from '@actions/core'
import * as github from '@actions/github'
import mustache from 'mustache'
import { Algo } from './algo'
import { Util } from './util'
export namespace Action {
export async function run() {
const context = github.context
const payload = context.payload.issue
if (payload && Util.isValidEvent('issues', ['opened', 'edited'])) {
const octokit = Util.getOctokit()
const duplicates = []
const response = await octokit.issues.listForRepo({
...context.repo,
state: core.getInput('state') as 'all' | 'open' | 'closed',
})
const issues = response.data.filter((i) => i.number !== payload.number)
const title = payload.title
const threshold = parseFloat(core.getInput('threshold'))
for (const issue of issues) {
const accuracy = Algo.compare(issue.title, title)
core.debug(`${issue.title} ~ ${title} = ${accuracy}`)
if (accuracy >= threshold) {
duplicates.push({
number: issue.number,
title: issue.title,
accuracy: Math.round(accuracy * 100),
})
}
}
if (duplicates.length) {
const label = core.getInput('label')
if (label) {
await octokit.issues.addLabels({
...context.repo,
issue_number: payload.number,
labels: [label],
})
}
const comment = core.getInput('comment')
if (comment) {
const body = mustache.render(comment, {
issues: duplicates,
})
await octokit.issues.createComment({
...context.repo,
body,
issue_number: payload.number,
})
}
}
}
}
}

140
src/algo.ts Normal file
View File

@ -0,0 +1,140 @@
import { Dic } from './dic'
export namespace Algo {
/**
* Removes punctuation and common words from a given phrase.
* Additionally, finds and remplaces predefined synonyms for even faster
* and more accurate results.
*/
function prepare(phrase: string) {
let ret = phrase.toLowerCase()
for (const punct of Dic.punctuation) {
ret = ret.replace(new RegExp(`\\${punct}`, 'g'), ' ')
}
for (const word in Dic.synonyms) {
ret = ret.replace(
new RegExp((Dic.synonyms as any)[word].join('|'), 'gi'),
word,
)
}
for (const exclude of Dic.excludes) {
ret = ret.replace(new RegExp(`\\b${exclude}\\s\\b`, 'g'), '')
}
return ret
}
/**
* The DamerauLevenshtein distance between two words is the minimum number
* of operations (consisting of insertions, deletions or substitutions of a
* single character, or transposition of two adjacent characters) required
* to change one word into the other.
*
* @see https://en.wikipedia.org/wiki/Levenshtein_distance
* @see https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
* @see https://rosettacode.org/wiki/Levenshtein_distance#JavaScript
*/
function distance(a: string, b: string) {
const [al, bl] = [a.length, b.length]
const matrix: number[][] = []
if (a === b) return 0
if (!al) return bl
if (!bl) return al
for (let i = 0; i <= al; i += 1) {
matrix[i] = []
matrix[i][0] = i
}
for (let j = 0; j <= bl; j += 1) {
matrix[0][j] = j
}
for (let i = 1; i <= al; i += 1) {
for (let j = 1; j <= bl; j += 1) {
const cost = a[i - 1] === b[j - 1] ? 0 : 1
matrix[i][j] = Math.min(
matrix[i - 1][j + 0] + 1, // deletion
matrix[i + 0][j - 1] + 1, // insertion
matrix[i - 1][j - 1] + cost, // substitution
)
if (i > 1 && j > 1 && a[i - 1] === b[j - 2] && a[i - 2] === b[j - 1]) {
matrix[i][j] = Math.min(
matrix[i + 0][j + 0],
matrix[i - 2][j - 2] + cost, // transposition
)
}
}
}
return matrix[al][bl]
}
/**
* Compares two strings and returns how similar they are. The result is a
* float in interval [0.0; 1.0].
*/
function similarity(i: string, j: string) {
const length = Math.max(i.length, j.length)
return length === 0 ? 1.0 : (length - distance(i, j)) / length
}
// How many points remove per missing word (see `compare()`):
const ERROR_ADJ = 0.15
/**
* Compares two phrases and returns how similar they are. The results is a
* float in interval [0.0; 1.0]. The algorithm works as follows:
*
* 1. Preparation:
* Common words, punctuation symbols and synonyms are removed. Sentences
* are then split into separate words for further analysis. We always
* operate on the list which contains less words.
* 2. Calculations:
* For each word in the first phrase, we try to find a analogue in the
* second one. This is done using the DamerauLevenshtein distance
* algorithm. Words with the biggest probability of being an analogue are
* added to the list.
* 3. Error adjustment:
* We calculate the difference between words amount in each phrase. For
* each word, we remove a certain probability from the final score. This
* step is necessary in situations where the first sentence contains only
* few word and direct analogues in the second one. Without error
* adjustment, this would give us a result of 1.0. For example:
* A: "Testing module foo"
* B: "Testing if there's not memory leak in module bar"
*
* @todo include phrase-length difference in the observational error
*/
export function compare(phraseA: string, phraseB: string) {
let wordsA = prepare(phraseA).split(' ')
let wordsB = prepare(phraseB).split(' ')
let total = 0
if (wordsA.length > wordsB.length) {
;[wordsA, wordsB] = [wordsB, wordsA]
}
for (const wordA of wordsA) {
const temp = []
for (const wordB of wordsB) {
temp.push(similarity(wordA, wordB))
}
total += Math.max.apply(null, temp)
}
// Direct score:
total /= wordsA.length
// Error adjustment:
total -= (wordsB.length - wordsA.length) * ERROR_ADJ
return total
}
}

33
src/dic.ts Normal file
View File

@ -0,0 +1,33 @@
export namespace Dic {
export const excludes = [
'the',
'and',
'a',
'an',
'as',
'at',
'are',
'by',
'when',
'well',
'is',
'it',
'in',
'to',
'till',
'until',
'or',
'on',
'into',
'outo',
]
export const punctuation = ['! ', ', ', ' - ', ' ', '... ', '.. ', '. ']
export const synonyms = {
app: ['aplication', 'application', 'client'],
cli: ['console', 'terminal', 'shell', 'command line interface'],
null: ['blank', 'empty', 'unfilled', 'nil'],
module: ['starter', 'package'],
}
}

3
src/index.ts Normal file
View File

@ -0,0 +1,3 @@
import { Action } from './action'
Action.run()

25
src/util.ts Normal file
View File

@ -0,0 +1,25 @@
import * as core from '@actions/core'
import * as github from '@actions/github'
export namespace Util {
export function getOctokit() {
const token = core.getInput('GITHUB_TOKEN', { required: true })
return github.getOctokit(token)
}
export function isValidEvent(event: string, actions?: string | string[]) {
const context = github.context
const payload = context.payload
if (event === context.eventName) {
if (actions == null) {
return true
}
if (Array.isArray(actions)) {
return actions.some((action) => action === payload.action)
}
return actions === payload.action
}
return false
}
}

18
tsconfig.json Normal file
View File

@ -0,0 +1,18 @@
{
"compilerOptions": {
"module": "commonjs",
"moduleResolution": "node",
"skipLibCheck": true,
"esModuleInterop": true,
"noImplicitAny": true,
"noEmitOnError": true,
"noUnusedLocals": true,
"strictNullChecks": true,
"resolveJsonModule": true,
"experimentalDecorators": true,
"outDir": "lib",
"target": "es5",
"lib": ["dom", "es2015"]
},
"include": ["src/**/*.ts"]
}

17
tslint.json Normal file
View File

@ -0,0 +1,17 @@
{
"extends": [
"tslint-config-airbnb",
"tslint-eslint-rules",
"tslint-config-prettier"
],
"linterOptions": {
"exclude": ["./lib/**/*.d.ts"]
},
"rules": {
"no-construct": true,
"no-debugger": true,
"no-reference": true,
"import-name": false,
"semicolon": [true, "never"]
}
}

2790
yarn.lock Normal file

File diff suppressed because it is too large Load Diff