mirror of
https://github.com/iv-org/close-potential-duplicates.git
synced 2024-11-22 21:39:40 +00:00
perf: ⚡️ init
This commit is contained in:
parent
b8ebe961f2
commit
05eddff2c7
16
.editorconfig
Executable file
16
.editorconfig
Executable file
@ -0,0 +1,16 @@
|
||||
# http://editorconfig.org
|
||||
root = true
|
||||
|
||||
[*]
|
||||
indent_style = space
|
||||
indent_size = 2
|
||||
end_of_line = lf
|
||||
charset = utf-8
|
||||
trim_trailing_whitespace = true
|
||||
insert_final_newline = true
|
||||
|
||||
[*.md]
|
||||
trim_trailing_whitespace = false
|
||||
|
||||
[Makefile]
|
||||
indent_style = tab
|
18
.github/workflows/potential-duplicates.yml
vendored
Normal file
18
.github/workflows/potential-duplicates.yml
vendored
Normal file
@ -0,0 +1,18 @@
|
||||
name: Potential Duplicates
|
||||
on:
|
||||
issues:
|
||||
types: [opened, edited]
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: 🚧 Install
|
||||
run: |
|
||||
yarn
|
||||
- name: 📦 Build
|
||||
run: |
|
||||
yarn build
|
||||
- uses: ./
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
13
.github/workflows/release.yml
vendored
Normal file
13
.github/workflows/release.yml
vendored
Normal file
@ -0,0 +1,13 @@
|
||||
name: Release
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
jobs:
|
||||
release:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: bubkoo/release-github-action@v1
|
||||
with:
|
||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
16
.gitignore
vendored
Normal file
16
.gitignore
vendored
Normal file
@ -0,0 +1,16 @@
|
||||
node_modules
|
||||
*.log
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
lerna-debug.log*
|
||||
coverage
|
||||
*.lcov
|
||||
.nyc_output
|
||||
.npm
|
||||
.env
|
||||
.env.test
|
||||
.cache
|
||||
.DS_Store
|
||||
lib
|
||||
dist
|
3
.prettierignore
Normal file
3
.prettierignore
Normal file
@ -0,0 +1,3 @@
|
||||
dist/
|
||||
lib/
|
||||
node_modules/
|
8
.prettierrc
Normal file
8
.prettierrc
Normal file
@ -0,0 +1,8 @@
|
||||
{
|
||||
"semi": false,
|
||||
"singleQuote": true,
|
||||
"printWidth": 80,
|
||||
"trailingComma": "all",
|
||||
"proseWrap": "never",
|
||||
"overrides": [{ "files": ".prettierrc", "options": { "parser": "json" } }]
|
||||
}
|
40
README.md
40
README.md
@ -1,2 +1,38 @@
|
||||
# potential-duplicates
|
||||
A Github Action to search for potential issue duplicates using Damerau–Levenshtein algorithm.
|
||||
# Potential Duplicates
|
||||
|
||||
> A Github Action to search for potential issue duplicates using [Damerau–Levenshtein](https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance) algorithm.
|
||||
|
||||
## Usage
|
||||
|
||||
|
||||
Create `.github/workflows/potential-duplicates.yml` in the default branch:
|
||||
|
||||
```yaml
|
||||
name: Potential Duplicates
|
||||
on:
|
||||
issues:
|
||||
types: [opened, edited]
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: bubkoo/label-commands@v1
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
# Label to set, when potential duplicates are detected.
|
||||
label: potential-duplicate
|
||||
# Get issues with state to compare. Supported state: 'all', 'closed', 'open'.
|
||||
state: all
|
||||
# If similarity is higher than this threshold, issue will be marked as duplicate.
|
||||
threshold: 0.6
|
||||
# Comment to post when potential duplicates are detected.
|
||||
comment: >
|
||||
Potential duplicates:
|
||||
{{#issues}}
|
||||
- [#{{ number }}] {{ title }} ({{ accuracy }}%)
|
||||
{{/issues}}
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
The scripts and documentation in this project are released under the [MIT License](LICENSE)
|
||||
|
33
action.yml
Normal file
33
action.yml
Normal file
@ -0,0 +1,33 @@
|
||||
name: Potential Duplicates
|
||||
description: Search for potential issue duplicates using Damerau–Levenshtein algorithm..
|
||||
author: bubkoo <bubkoo.wy@gmail.com>
|
||||
inputs:
|
||||
GITHUB_TOKEN:
|
||||
description: Your GitHub token for authentication.
|
||||
required: true
|
||||
label:
|
||||
description: Label to set, when potential duplicates are detected.
|
||||
default: potential-duplicate
|
||||
required: false
|
||||
state:
|
||||
description: `Get issues with state to compare. Supported state: 'all', 'closed', 'open'`
|
||||
default: all
|
||||
required: false
|
||||
threshold:
|
||||
description: Label color to set, when potential duplicates are detected.
|
||||
default: 0.6
|
||||
required: false
|
||||
comment:
|
||||
description: Comment to post when potential duplicates are detected.
|
||||
default: >
|
||||
Potential duplicates:
|
||||
{{#issues}}
|
||||
- [#{{ number }}] {{ title }} ({{ accuracy }}%)
|
||||
{{/issues}}
|
||||
required: false
|
||||
runs:
|
||||
using: node12
|
||||
main: dist/index.js
|
||||
branding:
|
||||
icon: type
|
||||
color: yellow # gray-dark purple red orange green blue yellow black white
|
67
package.json
Normal file
67
package.json
Normal file
@ -0,0 +1,67 @@
|
||||
{
|
||||
"name": "potential-duplicates",
|
||||
"description": "A Github Action to search for potential issue duplicates using Damerau–Levenshtein algorithm.",
|
||||
"version": "1.0.0",
|
||||
"main": "dist/index.js",
|
||||
"repository": "https://github.com/bubkoo/potential-duplicates",
|
||||
"author": "bubkoo <bubkoo.wy@gmail.com>",
|
||||
"license": "MIT",
|
||||
"files": [
|
||||
"dist",
|
||||
"action.yml"
|
||||
],
|
||||
"scripts": {
|
||||
"clean": "rimraf dist",
|
||||
"lint": "tslint -c tslint.json -p tsconfig.json --fix",
|
||||
"build": "ncc build src/index.ts --minify --v8-cache",
|
||||
"prebuild": "run-s lint clean",
|
||||
"precommit": "lint-staged"
|
||||
},
|
||||
"husky": {
|
||||
"hooks": {
|
||||
"commit-msg": "commitlint -E HUSKY_GIT_PARAMS",
|
||||
"pre-commit": "lint-staged && yarn precommit"
|
||||
}
|
||||
},
|
||||
"lint-staged": {
|
||||
"**/*.{js,jsx,tsx,ts,less,md,json}": [
|
||||
"pretty-quick — staged"
|
||||
],
|
||||
"src/**/*.ts": [
|
||||
"tslint -c tslint.json -p ./tsconfig.json --fix"
|
||||
]
|
||||
},
|
||||
"commitlint": {
|
||||
"extends": [
|
||||
"@commitlint/config-conventional"
|
||||
]
|
||||
},
|
||||
"dependencies": {
|
||||
"@actions/core": "^1.2.6",
|
||||
"@actions/github": "^4.0.0",
|
||||
"js-yaml": "^3.14.0",
|
||||
"mustache": "^4.0.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@commitlint/cli": "^11.0.0",
|
||||
"@commitlint/config-conventional": "^11.0.0",
|
||||
"@types/mustache": "^4.0.1",
|
||||
"@types/js-yaml": "^3.12.5",
|
||||
"@types/node": "^14.0.27",
|
||||
"@typescript-eslint/eslint-plugin": "^4.1.1",
|
||||
"@typescript-eslint/parser": "^4.1.1",
|
||||
"@vercel/ncc": "^0.24.1",
|
||||
"eslint": "^7.9.0",
|
||||
"husky": "^4.3.0",
|
||||
"lint-staged": "^10.3.0",
|
||||
"npm-run-all": "^4.1.5",
|
||||
"prettier": "^2.1.2",
|
||||
"pretty-quick": "^3.0.2",
|
||||
"rimraf": "^3.0.2",
|
||||
"tslint": "^6.1.3",
|
||||
"tslint-config-airbnb": "^5.11.2",
|
||||
"tslint-config-prettier": "^1.18.0",
|
||||
"tslint-eslint-rules": "^5.4.0",
|
||||
"typescript": "^4.0.3"
|
||||
}
|
||||
}
|
62
src/action.ts
Normal file
62
src/action.ts
Normal file
@ -0,0 +1,62 @@
|
||||
import * as core from '@actions/core'
|
||||
import * as github from '@actions/github'
|
||||
import mustache from 'mustache'
|
||||
import { Algo } from './algo'
|
||||
import { Util } from './util'
|
||||
|
||||
export namespace Action {
|
||||
export async function run() {
|
||||
const context = github.context
|
||||
const payload = context.payload.issue
|
||||
if (payload && Util.isValidEvent('issues', ['opened', 'edited'])) {
|
||||
const octokit = Util.getOctokit()
|
||||
const duplicates = []
|
||||
const response = await octokit.issues.listForRepo({
|
||||
...context.repo,
|
||||
state: core.getInput('state') as 'all' | 'open' | 'closed',
|
||||
})
|
||||
|
||||
const issues = response.data.filter((i) => i.number !== payload.number)
|
||||
const title = payload.title
|
||||
const threshold = parseFloat(core.getInput('threshold'))
|
||||
|
||||
for (const issue of issues) {
|
||||
const accuracy = Algo.compare(issue.title, title)
|
||||
|
||||
core.debug(`${issue.title} ~ ${title} = ${accuracy}`)
|
||||
|
||||
if (accuracy >= threshold) {
|
||||
duplicates.push({
|
||||
number: issue.number,
|
||||
title: issue.title,
|
||||
accuracy: Math.round(accuracy * 100),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
if (duplicates.length) {
|
||||
const label = core.getInput('label')
|
||||
if (label) {
|
||||
await octokit.issues.addLabels({
|
||||
...context.repo,
|
||||
issue_number: payload.number,
|
||||
labels: [label],
|
||||
})
|
||||
}
|
||||
|
||||
const comment = core.getInput('comment')
|
||||
if (comment) {
|
||||
const body = mustache.render(comment, {
|
||||
issues: duplicates,
|
||||
})
|
||||
|
||||
await octokit.issues.createComment({
|
||||
...context.repo,
|
||||
body,
|
||||
issue_number: payload.number,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
140
src/algo.ts
Normal file
140
src/algo.ts
Normal file
@ -0,0 +1,140 @@
|
||||
import { Dic } from './dic'
|
||||
|
||||
export namespace Algo {
|
||||
/**
|
||||
* Removes punctuation and common words from a given phrase.
|
||||
* Additionally, finds and remplaces predefined synonyms for even faster
|
||||
* and more accurate results.
|
||||
*/
|
||||
function prepare(phrase: string) {
|
||||
let ret = phrase.toLowerCase()
|
||||
|
||||
for (const punct of Dic.punctuation) {
|
||||
ret = ret.replace(new RegExp(`\\${punct}`, 'g'), ' ')
|
||||
}
|
||||
|
||||
for (const word in Dic.synonyms) {
|
||||
ret = ret.replace(
|
||||
new RegExp((Dic.synonyms as any)[word].join('|'), 'gi'),
|
||||
word,
|
||||
)
|
||||
}
|
||||
|
||||
for (const exclude of Dic.excludes) {
|
||||
ret = ret.replace(new RegExp(`\\b${exclude}\\s\\b`, 'g'), '')
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
/**
|
||||
* The Damerau–Levenshtein distance between two words is the minimum number
|
||||
* of operations (consisting of insertions, deletions or substitutions of a
|
||||
* single character, or transposition of two adjacent characters) required
|
||||
* to change one word into the other.
|
||||
*
|
||||
* @see https://en.wikipedia.org/wiki/Levenshtein_distance
|
||||
* @see https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance
|
||||
* @see https://rosettacode.org/wiki/Levenshtein_distance#JavaScript
|
||||
*/
|
||||
function distance(a: string, b: string) {
|
||||
const [al, bl] = [a.length, b.length]
|
||||
const matrix: number[][] = []
|
||||
|
||||
if (a === b) return 0
|
||||
if (!al) return bl
|
||||
if (!bl) return al
|
||||
|
||||
for (let i = 0; i <= al; i += 1) {
|
||||
matrix[i] = []
|
||||
matrix[i][0] = i
|
||||
}
|
||||
|
||||
for (let j = 0; j <= bl; j += 1) {
|
||||
matrix[0][j] = j
|
||||
}
|
||||
|
||||
for (let i = 1; i <= al; i += 1) {
|
||||
for (let j = 1; j <= bl; j += 1) {
|
||||
const cost = a[i - 1] === b[j - 1] ? 0 : 1
|
||||
|
||||
matrix[i][j] = Math.min(
|
||||
matrix[i - 1][j + 0] + 1, // deletion
|
||||
matrix[i + 0][j - 1] + 1, // insertion
|
||||
matrix[i - 1][j - 1] + cost, // substitution
|
||||
)
|
||||
|
||||
if (i > 1 && j > 1 && a[i - 1] === b[j - 2] && a[i - 2] === b[j - 1]) {
|
||||
matrix[i][j] = Math.min(
|
||||
matrix[i + 0][j + 0],
|
||||
matrix[i - 2][j - 2] + cost, // transposition
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return matrix[al][bl]
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares two strings and returns how similar they are. The result is a
|
||||
* float in interval [0.0; 1.0].
|
||||
*/
|
||||
function similarity(i: string, j: string) {
|
||||
const length = Math.max(i.length, j.length)
|
||||
return length === 0 ? 1.0 : (length - distance(i, j)) / length
|
||||
}
|
||||
|
||||
// How many points remove per missing word (see `compare()`):
|
||||
const ERROR_ADJ = 0.15
|
||||
|
||||
/**
|
||||
* Compares two phrases and returns how similar they are. The results is a
|
||||
* float in interval [0.0; 1.0]. The algorithm works as follows:
|
||||
*
|
||||
* 1. Preparation:
|
||||
* Common words, punctuation symbols and synonyms are removed. Sentences
|
||||
* are then split into separate words for further analysis. We always
|
||||
* operate on the list which contains less words.
|
||||
* 2. Calculations:
|
||||
* For each word in the first phrase, we try to find a analogue in the
|
||||
* second one. This is done using the Damerau–Levenshtein distance
|
||||
* algorithm. Words with the biggest probability of being an analogue are
|
||||
* added to the list.
|
||||
* 3. Error adjustment:
|
||||
* We calculate the difference between words amount in each phrase. For
|
||||
* each word, we remove a certain probability from the final score. This
|
||||
* step is necessary in situations where the first sentence contains only
|
||||
* few word and direct analogues in the second one. Without error
|
||||
* adjustment, this would give us a result of 1.0. For example:
|
||||
* A: "Testing module foo"
|
||||
* B: "Testing if there's not memory leak in module bar"
|
||||
*
|
||||
* @todo include phrase-length difference in the observational error
|
||||
*/
|
||||
export function compare(phraseA: string, phraseB: string) {
|
||||
let wordsA = prepare(phraseA).split(' ')
|
||||
let wordsB = prepare(phraseB).split(' ')
|
||||
let total = 0
|
||||
|
||||
if (wordsA.length > wordsB.length) {
|
||||
;[wordsA, wordsB] = [wordsB, wordsA]
|
||||
}
|
||||
|
||||
for (const wordA of wordsA) {
|
||||
const temp = []
|
||||
for (const wordB of wordsB) {
|
||||
temp.push(similarity(wordA, wordB))
|
||||
}
|
||||
|
||||
total += Math.max.apply(null, temp)
|
||||
}
|
||||
|
||||
// Direct score:
|
||||
total /= wordsA.length
|
||||
// Error adjustment:
|
||||
total -= (wordsB.length - wordsA.length) * ERROR_ADJ
|
||||
|
||||
return total
|
||||
}
|
||||
}
|
33
src/dic.ts
Normal file
33
src/dic.ts
Normal file
@ -0,0 +1,33 @@
|
||||
export namespace Dic {
|
||||
export const excludes = [
|
||||
'the',
|
||||
'and',
|
||||
'a',
|
||||
'an',
|
||||
'as',
|
||||
'at',
|
||||
'are',
|
||||
'by',
|
||||
'when',
|
||||
'well',
|
||||
'is',
|
||||
'it',
|
||||
'in',
|
||||
'to',
|
||||
'till',
|
||||
'until',
|
||||
'or',
|
||||
'on',
|
||||
'into',
|
||||
'outo',
|
||||
]
|
||||
|
||||
export const punctuation = ['! ', ', ', ' - ', ' – ', '... ', '.. ', '. ']
|
||||
|
||||
export const synonyms = {
|
||||
app: ['aplication', 'application', 'client'],
|
||||
cli: ['console', 'terminal', 'shell', 'command line interface'],
|
||||
null: ['blank', 'empty', 'unfilled', 'nil'],
|
||||
module: ['starter', 'package'],
|
||||
}
|
||||
}
|
3
src/index.ts
Normal file
3
src/index.ts
Normal file
@ -0,0 +1,3 @@
|
||||
import { Action } from './action'
|
||||
|
||||
Action.run()
|
25
src/util.ts
Normal file
25
src/util.ts
Normal file
@ -0,0 +1,25 @@
|
||||
import * as core from '@actions/core'
|
||||
import * as github from '@actions/github'
|
||||
|
||||
export namespace Util {
|
||||
export function getOctokit() {
|
||||
const token = core.getInput('GITHUB_TOKEN', { required: true })
|
||||
return github.getOctokit(token)
|
||||
}
|
||||
|
||||
export function isValidEvent(event: string, actions?: string | string[]) {
|
||||
const context = github.context
|
||||
const payload = context.payload
|
||||
if (event === context.eventName) {
|
||||
if (actions == null) {
|
||||
return true
|
||||
}
|
||||
if (Array.isArray(actions)) {
|
||||
return actions.some((action) => action === payload.action)
|
||||
}
|
||||
|
||||
return actions === payload.action
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
18
tsconfig.json
Normal file
18
tsconfig.json
Normal file
@ -0,0 +1,18 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"module": "commonjs",
|
||||
"moduleResolution": "node",
|
||||
"skipLibCheck": true,
|
||||
"esModuleInterop": true,
|
||||
"noImplicitAny": true,
|
||||
"noEmitOnError": true,
|
||||
"noUnusedLocals": true,
|
||||
"strictNullChecks": true,
|
||||
"resolveJsonModule": true,
|
||||
"experimentalDecorators": true,
|
||||
"outDir": "lib",
|
||||
"target": "es5",
|
||||
"lib": ["dom", "es2015"]
|
||||
},
|
||||
"include": ["src/**/*.ts"]
|
||||
}
|
17
tslint.json
Normal file
17
tslint.json
Normal file
@ -0,0 +1,17 @@
|
||||
{
|
||||
"extends": [
|
||||
"tslint-config-airbnb",
|
||||
"tslint-eslint-rules",
|
||||
"tslint-config-prettier"
|
||||
],
|
||||
"linterOptions": {
|
||||
"exclude": ["./lib/**/*.d.ts"]
|
||||
},
|
||||
"rules": {
|
||||
"no-construct": true,
|
||||
"no-debugger": true,
|
||||
"no-reference": true,
|
||||
"import-name": false,
|
||||
"semicolon": [true, "never"]
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user