diff --git a/.gitignore b/.gitignore index 2f78cf5..f5e7476 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ *.pyc +*.pyo +__pycache__ diff --git a/tools/opensource_tools/README_spdx_match.md b/tools/opensource_tools/README_spdx_match.md new file mode 100644 index 0000000..714bb20 --- /dev/null +++ b/tools/opensource_tools/README_spdx_match.md @@ -0,0 +1,60 @@ +### SPDX License 匹配工具 + +该工具用于从Excel文件和JSON文件中提取、匹配并映射开源许可证信息。其核心功能包括将Excel文件中的`cc_url`字段复制到`match_url`字段中,并将`spdx_fixed_license_name`字段与JSON中的SPDX License键进行匹配,结果输出到Excel文件的`match_license`字段。 + +### 目录结构 + +``` +plaintextCopy codeproject_root/ +├── src/ +│ └── spdx_license_matcher.py # SPDX License 匹配工具代码 +├── test/ +│ └── test_spdx_license_matcher.py # SPDX License 匹配工具的测试代码 +└── data/ + ├── url_license_fixed.xlsx # 输入的Excel数据文件 + └── spdx.json # 输入的SPDX License JSON文件 +``` + +### 文件详细说明 + +- **src/spdx_license_matcher.py**:核心脚本文件,包含加载数据、列复制、许可证匹配及结果输出等功能。 +- **test/test_spdx_license_matcher.py**:测试文件,使用`unittest`框架验证工具的核心功能是否正常工作。 +- **data/oh_spdx_license_match.xlsx**和**data/spdx.json**:测试数据文件,分别包含Excel的许可URL数据和SPDX License的JSON映射数据。 + + + +#### 使用说明 + +1. 运行脚本需要传递三个参数:输入的Excel文件路径、JSON文件路径和输出Excel文件路径。 +2. 命令示例如下: + +``` +python src/spdx_license_matcher.py data/oh_spdx_license_match.xlsx data/spdx.json data/output.xlsx +``` + +### + +#### 测试步骤 + +1. 在项目根目录运行测试命令: + +``` +python -m unittest discover -s test +``` + +1. 测试输出: + - 测试代码会自动验证主要功能,包括列复制、许可证匹配、数据保存等。 + - 若所有测试通过,将显示“OK”。 + +### 注意事项 + +- **SPDX 数据格式**:确保JSON文件的格式符合SPDX标准。 +- **Excel 文件格式**:输入Excel文件应包含`cc_url`和`spdx_fixed_license_name`列。 +- **测试文件**:测试中创建的临时文件会在测试结束时自动删除。 + +------ + +### 常见问题 + +1. 许可证名称无法匹配:请检查输入Excel文件的`spdx_fixed_license_name`字段内容,确保名称拼写正确,并与JSON文件的键名称一致。 +2. 多项许可匹配:对于含有多个以“分号”分隔的许可证名称,脚本会逐个进行匹配,并将结果以“分号”分隔的方式填入`match_license`列中。 diff --git a/tools/opensource_tools/data/oh_spdx_license_match.xlsx b/tools/opensource_tools/data/oh_spdx_license_match.xlsx new file mode 100644 index 0000000..0728f43 Binary files /dev/null and b/tools/opensource_tools/data/oh_spdx_license_match.xlsx differ diff --git a/tools/opensource_tools/data/spdx.json b/tools/opensource_tools/data/spdx.json new file mode 100644 index 0000000..7fdff77 --- /dev/null +++ b/tools/opensource_tools/data/spdx.json @@ -0,0 +1,640 @@ +{ + "3D Slicer License v1.0": "3D-Slicer-1.0", + "3dfx Glide License": "Glide", + "Abstyles License": "Abstyles", + "Academic Free License v1.1": "AFL-1.1", + "Academic Free License v1.2": "AFL-1.2", + "Academic Free License v2.0": "AFL-2.0", + "Academic Free License v2.1": "AFL-2.1", + "Academic Free License v3.0": "AFL-3.0", + "Academy of Motion Picture Arts and Sciences BSD": "AMPAS", + "AdaCore Doc License": "AdaCore-doc", + "Adaptive Public License 1.0": "APL-1.0", + "Adobe Display PostScript License": "Adobe-Display-PostScript", + "Adobe Glyph List License": "Adobe-Glyph", + "Adobe Postscript AFM License": "APAFML", + "Adobe Systems Incorporated Source Code License Agreement": "Adobe-2006", + "Adobe Utopia Font License": "Adobe-Utopia", + "Affero General Public License v1.0 only": "AGPL-1.0-only", + "Affero General Public License v1.0 or later": "AGPL-1.0-or-later", + "Afmparse License": "Afmparse", + "Aladdin Free Public License": "Aladdin", + "Amazon Digital Services License": "ADSL", + "AMD newlib License": "AMD-newlib", + "AMD's plpa_map.c License": "AMDPLPA", + "AML glslang variant License": "AML-glslang", + "ANTLR Software Rights Notice": "ANTLR-PD", + "ANTLR Software Rights Notice with license fallback": "ANTLR-PD-fallback", + "Any OSI License": "any-OSI", + "Apache License 1.0": "Apache-1.0", + "Apache License 1.1": "Apache-1.1", + "Apache License 2.0": "Apache-2.0", + "App::s2p License": "App-s2p", + "Apple MIT License": "AML", + "Apple Public Source License 1.0": "APSL-1.0", + "Apple Public Source License 1.1": "APSL-1.1", + "Apple Public Source License 1.2": "APSL-1.2", + "Apple Public Source License 2.0": "APSL-2.0", + "Arphic Public License": "Arphic-1999", + "Artistic License 1.0": "Artistic-1.0", + "Artistic License 1.0 (Perl)": "Artistic-1.0-Perl", + "Artistic License 1.0 w/clause 8": "Artistic-1.0-cl8", + "Artistic License 2.0": "Artistic-2.0", + "ASWF Digital Assets License 1.1": "ASWF-Digital-Assets-1.1", + "ASWF Digital Assets License version 1.0": "ASWF-Digital-Assets-1.0", + "Attribution Assurance License": "AAL", + "Baekmuk License": "Baekmuk", + "Bahyph License": "Bahyph", + "Barr License": "Barr", + "bcrypt Solar Designer License": "bcrypt-Solar-Designer", + "Beerware License": "Beerware", + "Bitstream Charter Font License": "Bitstream-Charter", + "Bitstream Vera Font License": "Bitstream-Vera", + "BitTorrent Open Source License v1.0": "BitTorrent-1.0", + "BitTorrent Open Source License v1.1": "BitTorrent-1.1", + "Blue Oak Model License 1.0.0": "BlueOak-1.0.0", + "Boehm-Demers-Weiser GC License": "Boehm-GC", + "Boost Software License 1.0": "BSL-1.0", + "Borceux license": "Borceux", + "Brian Gladman 2-Clause License": "Brian-Gladman-2-Clause", + "Brian Gladman 3-Clause License": "Brian-Gladman-3-Clause", + "BSD 1-Clause License": "BSD-1-Clause", + "BSD 2-Clause \"Simplified\" License": "BSD-2-Clause", + "BSD 2-Clause - first lines requirement": "BSD-2-Clause-first-lines", + "BSD 2-Clause - Ian Darwin variant": "BSD-2-Clause-Darwin", + "BSD 2-Clause with views sentence": "BSD-2-Clause-Views", + "BSD 3-Clause \"New\" or \"Revised\" License": "BSD-3-Clause", + "BSD 3-Clause acpica variant": "BSD-3-Clause-acpica", + "BSD 3-Clause Clear License": "BSD-3-Clause-Clear", + "BSD 3-Clause Flex variant": "BSD-3-Clause-flex", + "BSD 3-Clause Modification": "BSD-3-Clause-Modification", + "BSD 3-Clause No Military License": "BSD-3-Clause-No-Military-License", + "BSD 3-Clause No Nuclear License": "BSD-3-Clause-No-Nuclear-License", + "BSD 3-Clause No Nuclear License 2014": "BSD-3-Clause-No-Nuclear-License-2014", + "BSD 3-Clause No Nuclear Warranty": "BSD-3-Clause-No-Nuclear-Warranty", + "BSD 3-Clause Open MPI variant": "BSD-3-Clause-Open-MPI", + "BSD 3-Clause Sun Microsystems": "BSD-3-Clause-Sun", + "BSD 4 Clause Shortened": "BSD-4-Clause-Shortened", + "BSD 4-Clause \"Original\" or \"Old\" License": "BSD-4-Clause", + "BSD 4.3 RENO License": "BSD-4.3RENO", + "BSD 4.3 TAHOE License": "BSD-4.3TAHOE", + "BSD Advertising Acknowledgement License": "BSD-Advertising-Acknowledgement", + "BSD Protection License": "BSD-Protection", + "BSD Source Code Attribution": "BSD-Source-Code", + "BSD Source Code Attribution - beginning of file variant": "BSD-Source-beginning-file", + "BSD with attribution": "BSD-3-Clause-Attribution", + "BSD with Attribution and HPND disclaimer": "BSD-Attribution-HPND-disclaimer", + "BSD Zero Clause License": "0BSD", + "BSD-2-Clause Plus Patent License": "BSD-2-Clause-Patent", + "BSD-4-Clause (University of California-Specific)": "BSD-4-Clause-UC", + "BSD-Inferno-Nettverk": "BSD-Inferno-Nettverk", + "Business Source License 1.1": "BUSL-1.1", + "bzip2 and libbzip2 License v1.0.6": "bzip2-1.0.6", + "Caldera License": "Caldera", + "Caldera License (without preamble)": "Caldera-no-preamble", + "Catharon License": "Catharon", + "CeCILL Free Software License Agreement v1.0": "CECILL-1.0", + "CeCILL Free Software License Agreement v1.1": "CECILL-1.1", + "CeCILL Free Software License Agreement v2.0": "CECILL-2.0", + "CeCILL Free Software License Agreement v2.1": "CECILL-2.1", + "CeCILL-B Free Software License Agreement": "CECILL-B", + "CeCILL-C Free Software License Agreement": "CECILL-C", + "CERN Open Hardware Licence v1.1": "CERN-OHL-1.1", + "CERN Open Hardware Licence v1.2": "CERN-OHL-1.2", + "CERN Open Hardware Licence Version 2 - Permissive": "CERN-OHL-P-2.0", + "CERN Open Hardware Licence Version 2 - Strongly Reciprocal": "CERN-OHL-S-2.0", + "CERN Open Hardware Licence Version 2 - Weakly Reciprocal": "CERN-OHL-W-2.0", + "CFITSIO License": "CFITSIO", + "check-cvs License": "check-cvs", + "Checkmk License": "checkmk", + "Clarified Artistic License": "ClArtistic", + "Clips License": "Clips", + "CMU Mach - no notices-in-documentation variant": "CMU-Mach-nodoc", + "CMU License": "MIT-CMU", + "CMU Mach License": "CMU-Mach", + "CNRI Jython License": "CNRI-Jython", + "CNRI Python License": "CNRI-Python", + "CNRI Python Open Source GPL Compatible License Agreement": "CNRI-Python-GPL-Compatible", + "Code Project Open License 1.02": "CPOL-1.02", + "Common Development and Distribution License 1.0": "CDDL-1.0", + "Common Development and Distribution License 1.1": "CDDL-1.1", + "Common Documentation License 1.0": "CDL-1.0", + "Common Lisp LOOP License": "LOOP", + "Common Public Attribution License 1.0": "CPAL-1.0", + "Common Public License 1.0": "CPL-1.0", + "Common Vulnerability Enumeration ToU License": "cve-tou", + "Community Data License Agreement Permissive 1.0": "CDLA-Permissive-1.0", + "Community Data License Agreement Permissive 2.0": "CDLA-Permissive-2.0", + "Community Data License Agreement Sharing 1.0": "CDLA-Sharing-1.0", + "Community Specification License 1.0": "Community-Spec-1.0", + "Computational Use of Data Agreement v1.0": "C-UDA-1.0", + "Computer Associates Trusted Open Source License 1.1": "CATOSL-1.1", + "Condor Public License v1.1": "Condor-1.1", + "Copyfree Open Innovation License": "COIL-1.0", + "copyleft-next 0.3.0": "copyleft-next-0.3.0", + "copyleft-next 0.3.1": "copyleft-next-0.3.1", + "Cornell Lossless JPEG License": "Cornell-Lossless-JPEG", + "Creative Commons Attribution 1.0 Generic": "CC-BY-1.0", + "Creative Commons Attribution 2.0 Generic": "CC-BY-2.0", + "Creative Commons Attribution 2.5 Australia": "CC-BY-2.5-AU", + "Creative Commons Attribution 2.5 Generic": "CC-BY-2.5", + "Creative Commons Attribution 3.0 Australia": "CC-BY-3.0-AU", + "Creative Commons Attribution 3.0 Austria": "CC-BY-3.0-AT", + "Creative Commons Attribution 3.0 Germany": "CC-BY-3.0-DE", + "Creative Commons Attribution 3.0 IGO": "CC-BY-3.0-IGO", + "Creative Commons Attribution 3.0 Netherlands": "CC-BY-3.0-NL", + "Creative Commons Attribution 3.0 United States": "CC-BY-3.0-US", + "Creative Commons Attribution 3.0 Unported": "CC-BY-3.0", + "Creative Commons Attribution 4.0 International": "CC-BY-4.0", + "Creative Commons Attribution No Derivatives 1.0 Generic": "CC-BY-ND-1.0", + "Creative Commons Attribution No Derivatives 2.0 Generic": "CC-BY-ND-2.0", + "Creative Commons Attribution No Derivatives 2.5 Generic": "CC-BY-ND-2.5", + "Creative Commons Attribution No Derivatives 3.0 Germany": "CC-BY-ND-3.0-DE", + "Creative Commons Attribution No Derivatives 3.0 Unported": "CC-BY-ND-3.0", + "Creative Commons Attribution No Derivatives 4.0 International": "CC-BY-ND-4.0", + "Creative Commons Attribution Non Commercial 1.0 Generic": "CC-BY-NC-1.0", + "Creative Commons Attribution Non Commercial 2.0 Generic": "CC-BY-NC-2.0", + "Creative Commons Attribution Non Commercial 2.5 Generic": "CC-BY-NC-2.5", + "Creative Commons Attribution Non Commercial 3.0 Germany": "CC-BY-NC-3.0-DE", + "Creative Commons Attribution Non Commercial 3.0 Unported": "CC-BY-NC-3.0", + "Creative Commons Attribution Non Commercial 4.0 International": "CC-BY-NC-4.0", + "Creative Commons Attribution Non Commercial No Derivatives 1.0 Generic": "CC-BY-NC-ND-1.0", + "Creative Commons Attribution Non Commercial No Derivatives 2.0 Generic": "CC-BY-NC-ND-2.0", + "Creative Commons Attribution Non Commercial No Derivatives 2.5 Generic": "CC-BY-NC-ND-2.5", + "Creative Commons Attribution Non Commercial No Derivatives 3.0 Germany": "CC-BY-NC-ND-3.0-DE", + "Creative Commons Attribution Non Commercial No Derivatives 3.0 IGO": "CC-BY-NC-ND-3.0-IGO", + "Creative Commons Attribution Non Commercial No Derivatives 3.0 Unported": "CC-BY-NC-ND-3.0", + "Creative Commons Attribution Non Commercial No Derivatives 4.0 International": "CC-BY-NC-ND-4.0", + "Creative Commons Attribution Non Commercial Share Alike 1.0 Generic": "CC-BY-NC-SA-1.0", + "Creative Commons Attribution Non Commercial Share Alike 2.0 England and Wales": "CC-BY-NC-SA-2.0-UK", + "Creative Commons Attribution Non Commercial Share Alike 2.0 Generic": "CC-BY-NC-SA-2.0", + "Creative Commons Attribution Non Commercial Share Alike 2.0 Germany": "CC-BY-NC-SA-2.0-DE", + "Creative Commons Attribution Non Commercial Share Alike 2.5 Generic": "CC-BY-NC-SA-2.5", + "Creative Commons Attribution Non Commercial Share Alike 3.0 Germany": "CC-BY-NC-SA-3.0-DE", + "Creative Commons Attribution Non Commercial Share Alike 3.0 IGO": "CC-BY-NC-SA-3.0-IGO", + "Creative Commons Attribution Non Commercial Share Alike 3.0 Unported": "CC-BY-NC-SA-3.0", + "Creative Commons Attribution Non Commercial Share Alike 4.0 International": "CC-BY-NC-SA-4.0", + "Creative Commons Attribution Share Alike 1.0 Generic": "CC-BY-SA-1.0", + "Creative Commons Attribution Share Alike 2.0 England and Wales": "CC-BY-SA-2.0-UK", + "Creative Commons Attribution Share Alike 2.0 Generic": "CC-BY-SA-2.0", + "Creative Commons Attribution Share Alike 2.1 Japan": "CC-BY-SA-2.1-JP", + "Creative Commons Attribution Share Alike 2.5 Generic": "CC-BY-SA-2.5", + "Creative Commons Attribution Share Alike 3.0 Austria": "CC-BY-SA-3.0-AT", + "Creative Commons Attribution Share Alike 3.0 Germany": "CC-BY-SA-3.0-DE", + "Creative Commons Attribution Share Alike 3.0 Unported": "CC-BY-SA-3.0", + "Creative Commons Attribution Share Alike 4.0 International": "CC-BY-SA-4.0", + "Creative Commons Attribution-NonCommercial-ShareAlike 2.0 France": "CC-BY-NC-SA-2.0-FR", + "Creative Commons Attribution-ShareAlike 3.0 IGO": "CC-BY-SA-3.0-IGO", + "Creative Commons Public Domain Dedication and Certification": "CC-PDDC", + "Creative Commons Zero v1.0 Universal": "CC0-1.0", + "Cronyx License": "Cronyx", + "Crossword License": "Crossword", + "Cryptographic Autonomy License 1.0": "CAL-1.0", + "Cryptographic Autonomy License 1.0 (Combined Work Exception)": "CAL-1.0-Combined-Work-Exception", + "CrystalStacker License": "CrystalStacker", + "CUA Office Public License v1.0": "CUA-OPL-1.0", + "Cube License": "Cube", + "curl License": "curl", + "Data licence Germany – attribution – version 2.0": "DL-DE-BY-2.0", + "Data licence Germany – zero – version 2.0": "DL-DE-ZERO-2.0", + "David M. Gay dtoa License": "dtoa", + "DEC 3-Clause License": "DEC-3-Clause", + "Detection Rule License 1.0": "DRL-1.0", + "Detection Rule License 1.1": "DRL-1.1", + "Deutsche Freie Software Lizenz": "D-FSL-1.0", + "diffmark license": "diffmark", + "Do What The F*ck You Want To Public License": "WTFPL", + "DOC License": "DOC", + "DocBook Schema License": "DocBook-Schema", + "DocBook XML License": "DocBook-XML", + "Dotseqn License": "Dotseqn", + "DSDP License": "DSDP", + "dvipdfm License": "dvipdfm", + "Eclipse Public License 1.0": "EPL-1.0", + "Eclipse Public License 2.0": "EPL-2.0", + "Educational Community License v1.0": "ECL-1.0", + "Educational Community License v2.0": "ECL-2.0", + "eGenix.com Public License 1.1.0": "eGenix", + "Eiffel Forum License v1.0": "EFL-1.0", + "Eiffel Forum License v2.0": "EFL-2.0", + "Elastic License 2.0": "Elastic-2.0", + "Enlightenment License (e16)": "MIT-advertising", + "enna License": "MIT-enna", + "Entessa Public License v1.0": "Entessa", + "EPICS Open License": "EPICS", + "Erlang Public License v1.1": "ErlPL-1.1", + "Etalab Open License 2.0": "etalab-2.0", + "EU DataGrid Software License": "EUDatagrid", + "European Union Public License 1.0": "EUPL-1.0", + "European Union Public License 1.1": "EUPL-1.1", + "European Union Public License 1.2": "EUPL-1.2", + "Eurosym License": "Eurosym", + "Fair License": "Fair", + "feh License": "MIT-feh", + "Ferguson Twofish License": "Ferguson-Twofish", + "Frameworx Open License 1.0": "Frameworx-1.0", + "Fraunhofer FDK AAC Codec Library": "FDK-AAC", + "FreeBSD Documentation License": "FreeBSD-DOC", + "FreeImage Public License v1.0": "FreeImage", + "Freetype Project License": "FTL", + "FSF All Permissive License": "FSFAP", + "FSF All Permissive License (without Warranty)": "FSFAP-no-warranty-disclaimer", + "FSF Unlimited License": "FSFUL", + "FSF Unlimited License (With License Retention and Warranty Disclaimer)": "FSFULLRWD", + "FSF Unlimited License (with License Retention)": "FSFULLR", + "Furuseth License": "Furuseth", + "Fuzzy Bitmap License": "FBM", + "fwlw License": "fwlw", + "GD License": "GD", + "Giftware License": "Giftware", + "GL2PS License": "GL2PS", + "Glulxe License": "Glulxe", + "Gnome GCR Documentation License": "GCR-docs", + "GNU Affero General Public License v3.0 only": "AGPL-3.0-only", + "GNU Affero General Public License v3.0 or later": "AGPL-3.0-or-later", + "GNU Free Documentation License v1.1 only": "GFDL-1.1-only", + "GNU Free Documentation License v1.1 only - invariants": "GFDL-1.1-invariants-only", + "GNU Free Documentation License v1.1 only - no invariants": "GFDL-1.1-no-invariants-only", + "GNU Free Documentation License v1.1 or later": "GFDL-1.1-or-later", + "GNU Free Documentation License v1.1 or later - invariants": "GFDL-1.1-invariants-or-later", + "GNU Free Documentation License v1.1 or later - no invariants": "GFDL-1.1-no-invariants-or-later", + "GNU Free Documentation License v1.2 only": "GFDL-1.2-only", + "GNU Free Documentation License v1.2 only - invariants": "GFDL-1.2-invariants-only", + "GNU Free Documentation License v1.2 only - no invariants": "GFDL-1.2-no-invariants-only", + "GNU Free Documentation License v1.2 or later": "GFDL-1.2-or-later", + "GNU Free Documentation License v1.2 or later - invariants": "GFDL-1.2-invariants-or-later", + "GNU Free Documentation License v1.2 or later - no invariants": "GFDL-1.2-no-invariants-or-later", + "GNU Free Documentation License v1.3 only": "GFDL-1.3-only", + "GNU Free Documentation License v1.3 only - invariants": "GFDL-1.3-invariants-only", + "GNU Free Documentation License v1.3 only - no invariants": "GFDL-1.3-no-invariants-only", + "GNU Free Documentation License v1.3 or later": "GFDL-1.3-or-later", + "GNU Free Documentation License v1.3 or later - invariants": "GFDL-1.3-invariants-or-later", + "GNU Free Documentation License v1.3 or later - no invariants": "GFDL-1.3-no-invariants-or-later", + "GNU General Public License v1.0 only": "GPL-1.0-only", + "GNU General Public License v1.0 or later": "GPL-1.0-or-later", + "GNU General Public License v2.0 only": "GPL-2.0-only", + "GNU General Public License v2.0 or later": "GPL-2.0-or-later", + "GNU General Public License v2.0 WITH Linux-syscall-note": "GPL-2.0-with-Linux-syscall-note", + "GNU General Public License v3.0 only": "GPL-3.0-only", + "GNU General Public License v3.0 or later": "GPL-3.0-or-later", + "GNU Lesser General Public License v2.1 only": "LGPL-2.1-only", + "GNU Lesser General Public License v2.1 or later": "LGPL-2.1-or-later", + "GNU Lesser General Public License v3.0 only": "LGPL-3.0-only", + "GNU Lesser General Public License v3.0 or later": "LGPL-3.0-or-later", + "GNU Library General Public License v2 only": "LGPL-2.0-only", + "GNU Library General Public License v2 or later": "LGPL-2.0-or-later", + "gnuplot License": "gnuplot", + "Good Luck With That Public License": "GLWTPL", + "Graphics Gems License": "Graphics-Gems", + "gSOAP Public License v1.3b": "gSOAP-1.3b", + "gtkbook License": "gtkbook", + "Gutmann License": "Gutmann", + "Haskell Language Report License": "HaskellReport", + "hdparm License": "hdparm", + "Hewlett-Packard 1986 License": "HP-1986", + "Hewlett-Packard 1989 License": "HP-1989", + "Hewlett-Packard BSD variant license": "BSD-3-Clause-HP", + "HIDAPI License": "HIDAPI", + "Hippocratic License 2.1": "Hippocratic-2.1", + "Historical Permission Notice and Disclaimer": "HPND", + "Historical Permission Notice and Disclaimer - INRIA-IMAG variant": "HPND-INRIA-IMAG", + "Historical Permission Notice and Disclaimer - DEC variant": "HPND-DEC", + "Historical Permission Notice and Disclaimer - documentation sell variant": "HPND-doc-sell", + "Historical Permission Notice and Disclaimer - documentation variant": "HPND-doc", + "Historical Permission Notice and Disclaimer - Fenneberg-Livingston variant": "HPND-Fenneberg-Livingston", + "Historical Permission Notice and Disclaimer - Intel variant": "HPND-Intel", + "Historical Permission Notice and Disclaimer - Kevlin Henney variant": "HPND-Kevlin-Henney", + "Historical Permission Notice and Disclaimer - Markus Kuhn variant": "HPND-Markus-Kuhn", + "Historical Permission Notice and Disclaimer - merchantability variant": "HPND-merchantability-variant", + "Historical Permission Notice and Disclaimer - Netrek variant": "HPND-Netrek", + "Historical Permission Notice and Disclaimer - Pbmplus variant": "HPND-Pbmplus", + "Historical Permission Notice and Disclaimer - sell regexpr variant": "HPND-sell-regexpr", + "Historical Permission Notice and Disclaimer - sell variant": "HPND-sell-variant", + "Historical Permission Notice and Disclaimer - sell xserver variant with MIT disclaimer": "HPND-sell-MIT-disclaimer-xserver", + "Historical Permission Notice and Disclaimer - University of California variant": "HPND-UC", + "Historical Permission Notice and Disclaimer - University of California, US export warning": "HPND-UC-export-US", + "Historical Permission Notice and Disclaimer with MIT disclaimer": "HPND-MIT-disclaimer", + "HPND sell variant with MIT disclaimer": "HPND-sell-variant-MIT-disclaimer", + "HPND sell variant with MIT disclaimer - reverse": "HPND-sell-variant-MIT-disclaimer-rev", + "HPND with US Government export control and 2 disclaimers": "HPND-export2-US", + "HPND with US Government export control warning": "HPND-export-US", + "HPND with US Government export control warning and acknowledgment": "HPND-export-US-acknowledgement", + "HPND with US Government export control warning and modification rqmt": "HPND-export-US-modify", + "HTML Tidy License": "HTMLTIDY", + "IBM PowerPC Initialization and Boot Software": "IBM-pibs", + "IBM Public License v1.0": "IPL-1.0", + "ICU License": "ICU", + "IEC Code Components End-user licence agreement": "IEC-Code-Components-EULA", + "ImageMagick License": "ImageMagick", + "iMatix Standard Function Library Agreement": "iMatix", + "Imlib2 License": "Imlib2", + "Independent JPEG Group License": "IJG", + "Independent JPEG Group License - short": "IJG-short", + "Info-ZIP License": "Info-ZIP", + "Inner Net License v2.0": "Inner-Net-2.0", + "Intel ACPI Software License Agreement": "Intel-ACPI", + "Intel Open Source License": "Intel", + "Interbase Public License v1.0": "Interbase-1.0", + "IPA Font License": "IPA", + "ISC License": "ISC", + "ISC Veillard variant": "ISC-Veillard", + "Jam License": "Jam", + "Japan Network Information Center License": "JPNIC", + "JasPer License": "JasPer-2.0", + "JPL Image Use Policy": "JPL-image", + "JSON License": "JSON", + "Kastrup License": "Kastrup", + "Kazlib License": "Kazlib", + "Knuth CTAN License": "Knuth-CTAN", + "LaTeX Project Public License v1.0": "LPPL-1.0", + "LaTeX Project Public License v1.1": "LPPL-1.1", + "LaTeX Project Public License v1.2": "LPPL-1.2", + "LaTeX Project Public License v1.3a": "LPPL-1.3a", + "LaTeX Project Public License v1.3c": "LPPL-1.3c", + "Latex2e License": "Latex2e", + "Latex2e with translated notice permission": "Latex2e-translated-notice", + "Lawrence Berkeley National Labs BSD variant license": "BSD-3-Clause-LBNL", + "Leptonica License": "Leptonica", + "Lesser General Public License For Linguistic Resources": "LGPLLR", + "libpng License": "Libpng", + "libselinux public domain notice": "libselinux-1.0", + "libtiff License": "libtiff", + "libutil David Nugent License": "libutil-David-Nugent", + "Licence Art Libre 1.2": "LAL-1.2", + "Licence Art Libre 1.3": "LAL-1.3", + "Licence Libre du Québec – Permissive version 1.1": "LiLiQ-P-1.1", + "Licence Libre du Québec – Réciprocité forte version 1.1": "LiLiQ-Rplus-1.1", + "Licence Libre du Québec – Réciprocité version 1.1": "LiLiQ-R-1.1", + "Linux Kernel Variant of OpenIB.org license": "Linux-OpenIB", + "Linux man-pages - 1 paragraph": "Linux-man-pages-1-para", + "Linux man-pages Copyleft": "Linux-man-pages-copyleft", + "Linux man-pages Copyleft - 2 paragraphs": "Linux-man-pages-copyleft-2-para", + "Linux man-pages Copyleft Variant": "Linux-man-pages-copyleft-var", + "LLVM Exception": "LLVM-exception", + "LPD Documentation License": "LPD-document", + "lsof License": "lsof", + "Lucent Public License v1.02": "LPL-1.02", + "Lucent Public License Version 1.0": "LPL-1.0", + "Lucida Bitmap Fonts License": "Lucida-Bitmap-Fonts", + "LZMA SDK License (versions 9.11 to 9.20)": "LZMA-SDK-9.11-to-9.20", + "LZMA SDK License (versions 9.22 and beyond)": "LZMA-SDK-9.22", + "Mackerras 3-Clause - acknowledgment variant": "Mackerras-3-Clause-acknowledgment", + "Mackerras 3-Clause License": "Mackerras-3-Clause", + "magaz License": "magaz", + "mailprio License": "mailprio", + "MakeIndex License": "MakeIndex", + "Martin Birgmeier License": "Martin-Birgmeier", + "Matrix Template Library License": "MTLL", + "McPhee Slideshow License": "McPhee-slideshow", + "metamail License": "metamail", + "Michigan/Merit Networks License": "UMich-Merit", + "Microsoft Limited Public License": "MS-LPL", + "Microsoft Public License": "MS-PL", + "Microsoft Reciprocal License": "MS-RL", + "Minpack License": "Minpack", + "MIT +no-false-attribs license": "MITNFA", + "MIT Festival Variant": "MIT-Festival", + "MIT Khronos - old variant": "MIT-Khronos-old", + "MIT License": "MIT", + "MIT License Modern Variant": "MIT-Modern-Variant", + "MIT No Attribution": "MIT-0", + "MIT Open Group variant": "MIT-open-group", + "MIT testregex Variant": "MIT-testregex", + "MIT Tom Wu Variant": "MIT-Wu", + "MMIXware License": "MMIXware", + "Motosoto License": "Motosoto", + "Mozilla Public License 1.0": "MPL-1.0", + "Mozilla Public License 1.1": "MPL-1.1", + "Mozilla Public License 2.0": "MPL-2.0", + "Mozilla Public License 2.0 (no copyleft exception)": "MPL-2.0-no-copyleft-exception", + "MPEG Software Simulation": "MPEG-SSG", + "mpi Permissive License": "mpi-permissive", + "mpich2 License": "mpich2", + "mplus Font License": "mplus", + "Mulan Permissive Software License, Version 1": "MulanPSL-1.0", + "Mulan Permissive Software License, Version 2": "MulanPSL-2.0", + "Multics License": "Multics", + "Mup License": "Mup", + "Nara Institute of Science and Technology License (2003)": "NAIST-2003", + "NASA Open Source Agreement 1.3": "NASA-1.3", + "Naumen Public License": "Naumen", + "NCBI Public Domain Notice": "NCBI-PD", + "NCL Source Code License": "NCL", + "Net Boolean Public License v1": "NBPL-1.0", + "NetCDF license": "NetCDF", + "Nethack General Public License": "NGPL", + "Netizen Open Source License": "NOSL", + "Netscape Public License v1.0": "NPL-1.0", + "Netscape Public License v1.1": "NPL-1.1", + "Newsletr License": "Newsletr", + "NICTA Public Software License, Version 1.0": "NICTA-1.0", + "NIST Public Domain Notice": "NIST-PD", + "NIST Public Domain Notice with license fallback": "NIST-PD-fallback", + "NIST Software License": "NIST-Software", + "No Limit Public License": "NLPL", + "Nokia Open Source License": "Nokia", + "Non-Commercial Government Licence": "NCGL-UK-2.0", + "Non-Profit Open Software License 3.0": "NPOSL-3.0", + "Norwegian Licence for Open Government Data (NLOD) 1.0": "NLOD-1.0", + "Norwegian Licence for Open Government Data (NLOD) 2.0": "NLOD-2.0", + "Noweb License": "Noweb", + "NRL License": "NRL", + "NTP License": "NTP", + "NTP No Attribution": "NTP-0", + "OAR License": "OAR", + "OCLC Research Public License 2.0": "OCLC-2.0", + "OFFIS License": "OFFIS", + "OGC Software License, Version 1.0": "OGC-1.0", + "Open CASCADE Technology Public License": "OCCT-PL", + "Open Data Commons Attribution License v1.0": "ODC-By-1.0", + "Open Data Commons Open Database License v1.0": "ODbL-1.0", + "Open Data Commons Public Domain Dedication & License 1.0": "PDDL-1.0", + "Open Government Licence - Canada": "OGL-Canada-2.0", + "Open Government Licence v1.0": "OGL-UK-1.0", + "Open Government Licence v2.0": "OGL-UK-2.0", + "Open Government Licence v3.0": "OGL-UK-3.0", + "Open Group Test Suite License": "OGTSL", + "Open LDAP Public License 2.2.2": "OLDAP-2.2.2", + "Open LDAP Public License v1.1": "OLDAP-1.1", + "Open LDAP Public License v1.2": "OLDAP-1.2", + "Open LDAP Public License v1.3": "OLDAP-1.3", + "Open LDAP Public License v1.4": "OLDAP-1.4", + "Open LDAP Public License v2.0 (or possibly 2.0A and 2.0B)": "OLDAP-2.0", + "Open LDAP Public License v2.0.1": "OLDAP-2.0.1", + "Open LDAP Public License v2.1": "OLDAP-2.1", + "Open LDAP Public License v2.2": "OLDAP-2.2", + "Open LDAP Public License v2.2.1": "OLDAP-2.2.1", + "Open LDAP Public License v2.3": "OLDAP-2.3", + "Open LDAP Public License v2.4": "OLDAP-2.4", + "Open LDAP Public License v2.5": "OLDAP-2.5", + "Open LDAP Public License v2.6": "OLDAP-2.6", + "Open LDAP Public License v2.7": "OLDAP-2.7", + "Open LDAP Public License v2.8": "OLDAP-2.8", + "Open Logistics Foundation License Version 1.3": "OLFL-1.3", + "Open Market License": "OML", + "Open Public License v1.0": "OPL-1.0", + "Open Publication License v1.0": "OPUBL-1.0", + "Open Software License 1.0": "OSL-1.0", + "Open Software License 1.1": "OSL-1.1", + "Open Software License 2.0": "OSL-2.0", + "Open Software License 2.1": "OSL-2.1", + "Open Software License 3.0": "OSL-3.0", + "Open Use of Data Agreement v1.0": "O-UDA-1.0", + "OpenPBS v2.3 Software License": "OpenPBS-2.3", + "OpenSSL License": "OpenSSL", + "OpenSSL License - standalone": "OpenSSL-standalone", + "OpenVision License": "OpenVision", + "OSET Public License version 2.1": "OSET-PL-2.1", + "PADL License": "PADL", + "Peer Production License": "PPL", + "PHP License v3.0": "PHP-3.0", + "PHP License v3.01": "PHP-3.01", + "Pixar License": "Pixar", + "pkgconf License": "pkgconf", + "Plexus Classworlds License": "Plexus", + "PNG Reference Library version 2": "libpng-2.0", + "pnmstitch License": "pnmstitch", + "PolyForm Noncommercial License 1.0.0": "PolyForm-Noncommercial-1.0.0", + "PolyForm Small Business License 1.0.0": "PolyForm-Small-Business-1.0.0", + "PostgreSQL License": "PostgreSQL", + "psfrag License": "psfrag", + "psutils License": "psutils", + "Python ldap License": "python-ldap", + "Python License 2.0": "Python-2.0", + "Python License 2.0.1": "Python-2.0.1", + "Python Software Foundation License 2.0": "PSF-2.0", + "Q Public License 1.0": "QPL-1.0", + "Q Public License 1.0 - INRIA 2004 variant": "QPL-1.0-INRIA-2004", + "Qhull License": "Qhull", + "radvd License": "radvd", + "Rdisc License": "Rdisc", + "RealNetworks Public Source License v1.0": "RPSL-1.0", + "Reciprocal Public License 1.1": "RPL-1.1", + "Reciprocal Public License 1.5": "RPL-1.5", + "Red Hat eCos Public License v1.1": "RHeCos-1.1", + "Ricoh Source Code Public License": "RSCPL", + "RSA Message-Digest License": "RSA-MD", + "Ruby License": "Ruby", + "Ruby pty extension license": "Ruby-pty", + "Sax Public Domain Notice": "SAX-PD", + "Sax Public Domain Notice 2.0": "SAX-PD-2.0", + "Saxpath License": "Saxpath", + "SCEA Shared Source License": "SCEA", + "Scheme Language Report License": "SchemeReport", + "Scheme Widget Library (SWL) Software License Agreement": "SWL", + "Secure Messaging Protocol Public License": "SMPPL", + "Sendmail License": "Sendmail", + "Sendmail License 8.23": "Sendmail-8.23", + "Server Side Public License, v 1": "SSPL-1.0", + "SGI Free Software License B v1.0": "SGI-B-1.0", + "SGI Free Software License B v1.1": "SGI-B-1.1", + "SGI Free Software License B v2.0": "SGI-B-2.0", + "SGI OpenGL License": "SGI-OpenGL", + "SGP4 Permission Notice": "SGP4", + "SIL Open Font License 1.0": "OFL-1.0", + "SIL Open Font License 1.0 with no Reserved Font Name": "OFL-1.0-no-RFN", + "SIL Open Font License 1.0 with Reserved Font Name": "OFL-1.0-RFN", + "SIL Open Font License 1.1": "OFL-1.1", + "SIL Open Font License 1.1 with no Reserved Font Name": "OFL-1.1-no-RFN", + "SIL Open Font License 1.1 with Reserved Font Name": "OFL-1.1-RFN", + "Simple Public License 2.0": "SimPL-2.0", + "SL License": "SL", + "Sleepycat License": "Sleepycat", + "SNIA Public License 1.1": "SNIA", + "snprintf License": "snprintf", + "softSurfer License": "softSurfer", + "Solderpad Hardware License v0.5": "SHL-0.5", + "Solderpad Hardware License, Version 0.51": "SHL-0.51", + "Soundex License": "Soundex", + "Spencer License 86": "Spencer-86", + "Spencer License 94": "Spencer-94", + "Spencer License 99": "Spencer-99", + "SQLite Blessing": "blessing", + "SSH OpenSSH license": "SSH-OpenSSH", + "SSH short notice": "SSH-short", + "ssh-keyscan License": "ssh-keyscan", + "SSLeay License - standalone": "SSLeay-standalone", + "Standard ML of New Jersey License": "SMLNJ", + "SugarCRM Public License v1.1.3": "SugarCRM-1.1.3", + "Sun Industry Standards Source License v1.1": "SISSL", + "Sun Industry Standards Source License v1.2": "SISSL-1.2", + "Sun PPP License": "Sun-PPP", + "Sun PPP License (2000)": "Sun-PPP-2000", + "Sun Public License v1.0": "SPL-1.0", + "SunPro License": "SunPro", + "swrule License": "swrule", + "Sybase Open Watcom Public License 1.0": "Watcom-1.0", + "Symlinks License": "Symlinks", + "Systemics BSD variant license": "BSD-Systemics", + "Systemics W3Works BSD variant license": "BSD-Systemics-W3Works", + "Taiwan Open Government Data License, version 1.0": "OGDL-Taiwan-1.0", + "TAPR Open Hardware License v1.0": "TAPR-OHL-1.0", + "TCL/TK License": "TCL", + "TCP Wrappers License": "TCP-wrappers", + "Technische Universitaet Berlin License 1.0": "TU-Berlin-1.0", + "Technische Universitaet Berlin License 2.0": "TU-Berlin-2.0", + "TermReadKey License": "TermReadKey", + "Text-Tabs+Wrap License": "TTWL", + "The MirOS Licence": "MirOS", + "The Parity Public License 6.0.0": "Parity-6.0.0", + "The Parity Public License 7.0.0": "Parity-7.0.0", + "The Unlicense": "Unlicense", + "THOR Public License 1.0": "TPL-1.0", + "threeparttable License": "threeparttable", + "Time::ParseDate License": "TPDL", + "TMate Open Source License": "TMate", + "TORQUE v2.5+ Software License v1.1": "TORQUE-1.1", + "GPL-2.0-with-Linux-syscall-note": "TGPPL-1.0", + "Trusster Open Source License": "TOSL", + "TTYP0 License": "TTYP0", + "Ubuntu Font Licence v1.0": "Ubuntu-font-1.0", + "UCAR License": "UCAR", + "ulem License": "ulem", + "Unicode License Agreement - Data Files and Software (2015)": "Unicode-DFS-2015", + "Unicode License Agreement - Data Files and Software (2016)": "Unicode-DFS-2016", + "Unicode License v3": "Unicode-3.0", + "Unicode Terms of Use": "Unicode-TOU", + "United Kingdom Open Parliament Licence v3.0": "OPL-UK-3.0", + "Universal Permissive License v1.0": "UPL-1.0", + "University of Illinois/NCSA Open Source License": "NCSA", + "UnixCrypt License": "UnixCrypt", + "Upstream Compatibility License v1.0": "UCL-1.0", + "Utah Raster Toolkit Run Length Encoded License": "URT-RLE", + "Vim License": "Vim", + "VOSTROM Public License for Open Source": "VOSTROM", + "Vovida Software License v1.0": "VSL-1.0", + "W3C Software Notice and Document License (2015-05-13)": "W3C-20150513", + "W3C Software Notice and License (1998-07-20)": "W3C-19980720", + "W3C Software Notice and License (2002-12-31)": "W3C", + "w3m License": "w3m", + "Widget Workshop License": "Widget-Workshop", + "Wsuipa License": "Wsuipa", + "X.Net License": "Xnet", + "X11 License": "X11", + "X11 License Distribution Modification Variant": "X11-distribute-modifications-variant", + "X11 swapped final paragraphs": "X11-swapped", + "Xdebug License v 1.03": "Xdebug-1.03", + "Xerox License": "Xerox", + "Xfig License": "Xfig", + "XFree86 License 1.1": "XFree86-1.1", + "xinetd License": "xinetd", + "xkeyboard-config Zinoviev License": "xkeyboard-config-Zinoviev", + "xlock License": "xlock", + "XPP License": "xpp", + "XSkat License": "XSkat", + "xzoom License": "xzoom", + "Yahoo! Public License v1.0": "YPL-1.0", + "Yahoo! Public License v1.1": "YPL-1.1", + "Zed License": "Zed", + "Zeeff License": "Zeeff", + "Zend License v2.0": "Zend-2.0", + "Zimbra Public License v1.3": "Zimbra-1.3", + "Zimbra Public License v1.4": "Zimbra-1.4", + "zlib License": "Zlib", + "zlib/libpng License with Acknowledgement": "zlib-acknowledgement", + "Zope Public License 1.1": "ZPL-1.1", + "Zope Public License 2.0": "ZPL-2.0", + "Zope Public License 2.1": "ZPL-2.1", + "GPL-2.0 WITH Linux-syscall-note": "GPL-2.0-with-Linux-syscall-note", + "BSD 2-Clause FreeBSD License": "BSD-2-Clause-FreeBSD" +} diff --git a/tools/opensource_tools/src/spdx_license_matcher.py b/tools/opensource_tools/src/spdx_license_matcher.py new file mode 100644 index 0000000..38b29bc --- /dev/null +++ b/tools/opensource_tools/src/spdx_license_matcher.py @@ -0,0 +1,72 @@ +import re +import sys +import json +import pandas as pd + +class SPDXLicenseMatcher: + def __init__(self, input_excel_path, input_json_path): + # Load Excel and SPDX JSON data + self.df = pd.read_excel(input_excel_path) + self.spdx_mapping = self._load_spdx_data(input_json_path) + + @staticmethod + def _load_spdx_data(json_path): + # Load SPDX JSON data with enhanced standardization on keys (case-insensitive, punctuation-free) + with open(json_path, 'r', encoding='utf-8') as f: + spdx_data = json.load(f) + spdx_mapping = {re.sub(r'[^a-zA-Z0-9 ]', '', key.lower()): value for key, value in spdx_data.items()} + return spdx_mapping + + @staticmethod + def _normalize_license_name(name): + """Further normalize license names by removing non-alphanumeric characters.""" + normalized_name = re.sub(r'[^a-zA-Z0-9 ]', '', name).lower() + return normalized_name + + def copy_url_column(self): + # Copy cc_url to match_url for reference + self.df['match_url'] = self.df['cc_url'] + + def match_license_column(self): + # Map spdx_fixed_license_name column against SPDX data + self.df['match_license'] = self.df['spdx_fixed_license_name'].apply(self._map_license) + + def _map_license(self, license_names): + # Process multiple license names separated by semicolons + license_keys = [self._normalize_license_name(name) for name in license_names.split(';')] + matched_licenses = [self._find_license_match(key) for key in license_keys] + + # Filter out any None results and join by ';' to mimic input format + matched_licenses = [license for license in matched_licenses if license] + return ';'.join(matched_licenses) if matched_licenses else "No Match" + + def _find_license_match(self, key): + # Attempt to find an exact match first + if key in self.spdx_mapping: + return self.spdx_mapping[key] + + # Fallback to fuzzy matching if no exact match found + for spdx_key in self.spdx_mapping.keys(): + if all(word in spdx_key for word in key.split()): + return self.spdx_mapping[spdx_key] + return None + + def save_to_excel(self, output_excel_path): + # Save the DataFrame to an Excel file + self.df.to_excel(output_excel_path, index=False) + print(f"[INFO] Final processed results saved to {output_excel_path}") + + + +def main(input_excel_path, input_json_path, output_excel_path): + matcher = SPDXLicenseMatcher(input_excel_path, input_json_path) + #matcher.copy_url_column() + matcher.match_license_column() + matcher.save_to_excel(output_excel_path) + +if __name__ == "__main__": + input_excel_path = sys.argv[1] + input_json_path = sys.argv[2] + output_excel_path = sys.argv[3] + main(input_excel_path, input_json_path, output_excel_path) + diff --git a/tools/opensource_tools/test/test_spdx_license_matcher.py b/tools/opensource_tools/test/test_spdx_license_matcher.py new file mode 100644 index 0000000..84c14e2 --- /dev/null +++ b/tools/opensource_tools/test/test_spdx_license_matcher.py @@ -0,0 +1,74 @@ +import unittest +import pandas as pd +import json +from src.spdx_license_matcher import SPDXLicenseMatcher +import os + +class TestSPDXLicenseMatcher(unittest.TestCase): + @classmethod + def setUpClass(cls): + # 创建临时测试数据 + cls.test_excel_path = 'test_oh_spdx_license_match.xlsx' + cls.test_json_path = 'test_spdx.json' + cls.output_excel_path = 'test_output.xlsx' + + # Excel测试数据,包含分号分隔的许可证名 + df = pd.DataFrame({ + 'cc_url': ['https://example.com/license1', 'https://example.com/license2'], + 'spdx_fixed_license_name': ['Apache License 2.0', 'Creative Commons Attribution 4.0 International;MIT License'] + }) + df.to_excel(cls.test_excel_path, index=False) + + # JSON测试数据,包含格式规范化的SPDX映射 + spdx_data = { + "Apache License 2.0": "Apache-2.0", + "Creative Commons Attribution 4.0 International": "CC-BY-4.0", + "MIT License": "MIT" + } + with open(cls.test_json_path, 'w', encoding='utf-8') as f: + json.dump(spdx_data, f) + + @classmethod + def tearDownClass(cls): + # 删除临时文件 + os.remove(cls.test_excel_path) + os.remove(cls.test_json_path) + os.remove(cls.output_excel_path) + + def setUp(self): + # 初始化 SPDXLicenseMatcher 实例 + self.matcher = SPDXLicenseMatcher(self.test_excel_path, self.test_json_path) + + def test_load_data(self): + # 测试数据加载 + self.assertIsNotNone(self.matcher.df) + self.assertGreater(len(self.matcher.spdx_mapping), 0) + + def test_copy_url_column(self): + # 测试URL列复制 + self.matcher.copy_url_column() + self.assertIn('match_url', self.matcher.df.columns) + self.assertEqual(self.matcher.df['match_url'][0], 'https://example.com/license1') + + def test_match_license_column(self): + # 测试许可证匹配,包含分号分隔的许可证名 + self.matcher.match_license_column() + self.assertIn('match_license', self.matcher.df.columns) + # 验证匹配结果 + self.assertEqual(self.matcher.df['match_license'][0], 'Apache-2.0') + self.assertEqual(self.matcher.df['match_license'][1], 'CC-BY-4.0;MIT') + + def test_save_to_excel(self): + # 测试保存到Excel文件 + self.matcher.copy_url_column() + self.matcher.match_license_column() + self.matcher.save_to_excel(self.output_excel_path) + self.assertTrue(os.path.exists(self.output_excel_path)) + # 验证保存内容 + df_saved = pd.read_excel(self.output_excel_path) + self.assertIn('match_license', df_saved.columns) + self.assertEqual(df_saved['match_license'][0], 'Apache-2.0') + self.assertEqual(df_saved['match_license'][1], 'CC-BY-4.0;MIT') + +if __name__ == '__main__': + unittest.main()