Backed out 2 changesets (bug 1516337) for causing windows AArch64 build bustages.

Backed out changeset c5e856d5edba (bug 1516337)
Backed out changeset d4dff8b8974e (bug 1516337)
This commit is contained in:
Cosmin Sabou 2019-02-23 22:48:53 +02:00
parent bb77a3853c
commit 1e2976b1e9
371 changed files with 142214 additions and 62399 deletions

2
.gitignore vendored
View File

@ -157,3 +157,5 @@ lextab.py
!.vscode/extensions.json
!.vscode/tasks.json
# Ignore file generated by lalrpop at build time.
third_party/rust/lalrpop/src/parser/lrgrammar.rs

View File

@ -188,5 +188,8 @@ tps_result\.json
^testing/raptor/raptor/tests/.*.json
^testing/raptor/webext/raptor/auto_gen_test_config.js
# Ignore file generated by lalrpop at build time.
^third_party/rust/lalrpop/src/parser/lrgrammar.rs
# Ignore the build directories of WebRender standalone builds.
gfx/wr/target/

134
Cargo.lock generated
View File

@ -229,7 +229,7 @@ dependencies = [
"peeking_take_while 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"proc-macro2 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
"quote 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"which 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
@ -276,6 +276,15 @@ dependencies = [
"constant_time_eq 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "block-buffer"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"arrayref 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
"byte-tools 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "block-buffer"
version = "0.7.0"
@ -305,6 +314,11 @@ name = "build_const"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "byte-tools"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "byte-tools"
version = "0.3.0"
@ -758,6 +772,14 @@ name = "diff"
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "digest"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"generic-array 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "digest"
version = "0.8.0"
@ -778,14 +800,14 @@ dependencies = [
[[package]]
name = "docopt"
version = "1.0.2"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_derive 1.0.80 (git+https://github.com/servo/serde?branch=deserialize_from_enums9)",
"strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
"strsim 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
@ -827,7 +849,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "ena"
version = "0.10.1"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
@ -1024,7 +1046,7 @@ dependencies = [
"mozprofile 0.5.0",
"mozrunner 0.9.0",
"mozversion 0.2.0",
"regex 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_derive 1.0.80 (git+https://github.com/servo/serde?branch=deserialize_from_enums9)",
"serde_json 1.0.26 (registry+https://github.com/rust-lang/crates.io-index)",
@ -1053,6 +1075,14 @@ dependencies = [
"style_traits 0.0.1",
]
[[package]]
name = "generic-array"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"typenum 1.10.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "generic-array"
version = "0.12.0"
@ -1332,23 +1362,45 @@ dependencies = [
[[package]]
name = "lalrpop"
version = "0.16.2"
version = "0.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"ascii-canvas 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
"bit-set 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
"diff 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
"docopt 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"ena 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)",
"digest 0.7.6 (registry+https://github.com/rust-lang/crates.io-index)",
"docopt 0.8.3 (registry+https://github.com/rust-lang/crates.io-index)",
"ena 0.9.3 (registry+https://github.com/rust-lang/crates.io-index)",
"itertools 0.7.6 (registry+https://github.com/rust-lang/crates.io-index)",
"lalrpop-util 0.16.2 (registry+https://github.com/rust-lang/crates.io-index)",
"lalrpop-snap 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)",
"lalrpop-util 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)",
"petgraph 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"regex-syntax 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_derive 1.0.80 (git+https://github.com/servo/serde?branch=deserialize_from_enums9)",
"sha2 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"sha2 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)",
"string_cache 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)",
"term 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
"unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "lalrpop-snap"
version = "0.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"ascii-canvas 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
"bit-set 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
"diff 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
"ena 0.9.3 (registry+https://github.com/rust-lang/crates.io-index)",
"itertools 0.7.6 (registry+https://github.com/rust-lang/crates.io-index)",
"lalrpop-util 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)",
"petgraph 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"regex-syntax 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)",
"string_cache 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)",
"term 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
"unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
@ -1356,7 +1408,7 @@ dependencies = [
[[package]]
name = "lalrpop-util"
version = "0.16.2"
version = "0.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
@ -1665,7 +1717,7 @@ dependencies = [
name = "mozversion"
version = "0.2.0"
dependencies = [
"regex 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"rust-ini 0.10.3 (registry+https://github.com/rust-lang/crates.io-index)",
"semver 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
@ -2102,19 +2154,19 @@ dependencies = [
"aho-corasick 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"regex-syntax 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)",
"thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
"thread_local 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
"utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "regex"
version = "1.0.3"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"aho-corasick 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
"regex-syntax 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
"thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
"thread_local 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
"utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
@ -2328,6 +2380,17 @@ dependencies = [
"stable_deref_trait 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "sha2"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"block-buffer 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
"byte-tools 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"digest 0.7.6 (registry+https://github.com/rust-lang/crates.io-index)",
"fake-simd 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "sha2"
version = "0.8.0"
@ -2428,6 +2491,11 @@ name = "string_cache_shared"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "strsim"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "strsim"
version = "0.7.0"
@ -2468,7 +2536,7 @@ dependencies = [
"parking_lot 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)",
"precomputed-hash 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"rayon 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"selectors 0.21.0",
"servo_arc 0.1.1",
"smallbitvec 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
@ -2523,7 +2591,7 @@ dependencies = [
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
"malloc_size_of 0.0.1",
"num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"selectors 0.21.0",
"size_of_test 0.0.1",
"smallvec 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)",
@ -2651,10 +2719,11 @@ dependencies = [
[[package]]
name = "thread_local"
version = "0.3.6"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
@ -2990,7 +3059,7 @@ dependencies = [
"hyper 0.12.7 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_derive 1.0.80 (git+https://github.com/servo/serde?branch=deserialize_from_enums9)",
"serde_json 1.0.26 (registry+https://github.com/rust-lang/crates.io-index)",
@ -3005,8 +3074,8 @@ name = "webidl"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"lalrpop 0.16.2 (registry+https://github.com/rust-lang/crates.io-index)",
"lalrpop-util 0.16.2 (registry+https://github.com/rust-lang/crates.io-index)",
"lalrpop 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)",
"lalrpop-util 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
@ -3241,10 +3310,12 @@ dependencies = [
"checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12"
"checksum bitreader 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "80b13e2ab064ff3aa0bdbf1eff533f9822dc37899821f5f98c67f263eab51707"
"checksum blake2-rfc 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)" = "5d6d530bdd2d52966a6d03b7a964add7ae1a288d25214066fd4b600f0f796400"
"checksum block-buffer 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a076c298b9ecdb530ed9d967e74a6027d6a7478924520acddcddc24c1c8ab3ab"
"checksum block-buffer 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "49665c62e0e700857531fa5d3763e91b539ff1abeebd56808d378b495870d60d"
"checksum block-padding 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "4fc4358306e344bf9775d0197fd00d2603e5afb0771bb353538630f022068ea3"
"checksum boxfnonce 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8380105befe91099e6f69206164072c05bc92427ff6aa8a5171388317346dd75"
"checksum build_const 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e90dc84f5e62d2ebe7676b83c22d33b6db8bd27340fb6ffbff0a364efa0cb9c9"
"checksum byte-tools 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "560c32574a12a89ecd91f5e742165893f86e3ab98d21f8ea548658eb9eef5f40"
"checksum byte-tools 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "980479e6fde23246dfb54d47580d66b4e99202e7579c5eaa9fe10ecb5ebd2182"
"checksum byteorder 1.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "94f88df23a25417badc922ab0f5716cc1330e87f71ddd9203b3a3ccd9cedf75d"
"checksum bytes 0.4.9 (registry+https://github.com/rust-lang/crates.io-index)" = "e178b8e0e239e844b083d5a0d4a156b2654e67f9f80144d48398fcd736a24fb8"
@ -3294,14 +3365,15 @@ dependencies = [
"checksum derive_more 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3f57d78cf3bd45270dad4e70c21ec77a960b36c7a841ff9db76aaa775a8fb871"
"checksum devd-rs 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e7c9ac481c38baf400d3b732e4a06850dfaa491d1b6379a249d9d40d14c2434c"
"checksum diff 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "3c2b69f912779fbb121ceb775d74d51e915af17aaebc38d28a592843a2dd0a3a"
"checksum digest 0.7.6 (registry+https://github.com/rust-lang/crates.io-index)" = "03b072242a8cbaf9c145665af9d250c59af3b958f83ed6824e13533cf76d5b90"
"checksum digest 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "05f47366984d3ad862010e22c7ce81a7dbcaebbdfb37241a620f8b6596ee135c"
"checksum dirs 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "88972de891f6118092b643d85a0b28e0678e0f948d7f879aa32f2d5aafe97d2a"
"checksum docopt 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "db2906c2579b5b7207fc1e328796a9a8835dc44e22dbe8e460b1d636f9a7b225"
"checksum docopt 0.8.3 (registry+https://github.com/rust-lang/crates.io-index)" = "d8acd393692c503b168471874953a2531df0e9ab77d0b6bbc582395743300a4a"
"checksum dtoa 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "09c3753c3db574d215cba4ea76018483895d7bff25a31b49ba45db21c48e50ab"
"checksum dtoa-short 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "068d4026697c1a18f0b0bb8cfcad1b0c151b90d8edb9bf4c235ad68128920d1d"
"checksum dwrote 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c31c624339dab99c223a4b26c2e803b7c248adaca91549ce654c76f39a03f5c8"
"checksum either 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "18785c1ba806c258137c937e44ada9ee7e69a37e3c72077542cd2f069d78562a"
"checksum ena 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "25b4e5febb25f08c49f1b07dc33a182729a6b21edfb562b5aef95f78e0dbe5bb"
"checksum ena 0.9.3 (registry+https://github.com/rust-lang/crates.io-index)" = "88dc8393b3c7352f94092497f6b52019643e493b6b890eb417cdb7c46117e621"
"checksum encoding_c 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "769ecb8b33323998e482b218c0d13cd64c267609023b4b7ec3ee740714c318ee"
"checksum encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)" = "a69d152eaa438a291636c1971b0a370212165ca8a75759eb66818c5ce9b538f7"
"checksum env_logger 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0561146661ae44c579e993456bc76d11ce1e0c7d745e57b2fa7146b6e49fa2ad"
@ -3324,6 +3396,7 @@ dependencies = [
"checksum fxhash 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
"checksum gcc 0.3.54 (registry+https://github.com/rust-lang/crates.io-index)" = "5e33ec290da0d127825013597dbdfc28bee4964690c7ce1166cbc2a7bd08b1bb"
"checksum generic-array 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3c0f28c2f5bfb5960175af447a2da7c18900693738343dc896ffbcabd9839592"
"checksum generic-array 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ef25c5683767570c2bbd7deba372926a55eaae9982d7726ee2a1050239d45b9d"
"checksum gl_generator 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a0ffaf173cf76c73a73e080366bf556b4776ece104b06961766ff11449f38604"
"checksum gleam 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)" = "4b47f5b15742aee359c7895ab98cf2cceecc89bb4feb6f4e42f802d7899877da"
"checksum glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "8be18de09a56b60ed0edf84bc9df007e30040691af7acd1c41874faac5895bfb"
@ -3341,8 +3414,9 @@ dependencies = [
"checksum itoa 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c069bbec61e1ca5a596166e55dfe4773ff745c3d16b700013bcaff9a6df2c682"
"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
"checksum khronos_api 3.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "62237e6d326bd5871cd21469323bf096de81f1618cd82cbaf5d87825335aeb49"
"checksum lalrpop 0.16.2 (registry+https://github.com/rust-lang/crates.io-index)" = "02888049e197dff0c5c9fd503bd2458ea373c5e845c2f5460db1f9e43050d55e"
"checksum lalrpop-util 0.16.2 (registry+https://github.com/rust-lang/crates.io-index)" = "488da0d45c65af229321623c62660627d02b0e7fbc768a4c3fcd121815404ef1"
"checksum lalrpop 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9f7014afd5642680074fd5dcc624d544f9eabfa281cba2c3ac56c3db6d21ad1b"
"checksum lalrpop-snap 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0b85aa455529344133d7ecaaac04c01ed87f459deeaa0fe5422885e2095d8cdc"
"checksum lalrpop-util 0.16.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2400aeebcd11259370d038c24821b93218dd2f33a53f53e9c8fcccca70be6696"
"checksum lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a374c89b9db55895453a74c1e38861d9deec0b01b405a82516e9d5de4820dea1"
"checksum lazycell 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ce12306c4739d86ee97c23139f3a34ddf0387bbf181bc7929d287025a8c3ef6b"
"checksum lazycell 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a6f08839bc70ef4a3fe1d566d5350f519c5912ea86be0df1740a7d247c7fc0ef"
@ -3414,7 +3488,7 @@ dependencies = [
"checksum redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76"
"checksum redox_users 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "214a97e49be64fd2c86f568dd0cb2c757d2cc53de95b273b6ad0a1c908482f26"
"checksum regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1731164734096285ec2a5ec7fea5248ae2f5485b3feeb0115af4fda2183b2d1b"
"checksum regex 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3d8c9f33201f46669484bacc312b00e7541bed6aaf296dffe2bb4e0ac6b8ce2a"
"checksum regex 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "75ecf88252dce580404a22444fc7d626c01815debba56a7f4f536772a5ff19d3"
"checksum regex-syntax 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ad890a5eef7953f55427c50575c680c42841653abd2b028b68cd223d157f62db"
"checksum regex-syntax 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8f1ac0f60d675cc6cf13a20ec076568254472551051ad5dd050364d70671bf6b"
"checksum rkv 0.9.3 (registry+https://github.com/rust-lang/crates.io-index)" = "becd7f5278be3b97250a8035455116f9fc63f5fc68cc8293213051d7d751c373"
@ -3437,6 +3511,7 @@ dependencies = [
"checksum serde_bytes 0.10.4 (registry+https://github.com/rust-lang/crates.io-index)" = "adb6e51a6b3696b301bc221d785f898b4457c619b51d7ce195a6d20baecb37b3"
"checksum serde_derive 1.0.80 (git+https://github.com/servo/serde?branch=deserialize_from_enums9)" = "<none>"
"checksum serde_json 1.0.26 (registry+https://github.com/rust-lang/crates.io-index)" = "44dd2cfde475037451fa99b7e5df77aa3cfd1536575fa8e7a538ab36dcde49ae"
"checksum sha2 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "9eb6be24e4c23a84d7184280d2722f7f2731fcdd4a9d886efbfe4413e4847ea0"
"checksum sha2 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7b4d8bfd0e469f417657573d8451fb33d16cfe0989359b93baf3a1ffc639543d"
"checksum simd 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0048b17eb9577ac545c61d85c3559b41dfb4cbea41c9bd9ca6a4f73ff05fda84"
"checksum siphasher 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2ffc669b726f2bc9a3bcff66e5e23b56ba6bf70e22a34c3d7b6d0b3450b65b84"
@ -3449,6 +3524,7 @@ dependencies = [
"checksum string_cache 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)" = "25d70109977172b127fe834e5449e5ab1740b9ba49fa18a2020f509174f25423"
"checksum string_cache_codegen 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1eea1eee654ef80933142157fdad9dd8bc43cf7c74e999e369263496f04ff4da"
"checksum string_cache_shared 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b1884d1bc09741d466d9b14e6d37ac89d6909cbcac41dd9ae982d4d063bbedfc"
"checksum strsim 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b4d15c810519a91cf877e7e36e63fe068815c678181439f2f29e2562147c3694"
"checksum strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bb4f380125926a99e52bc279241539c018323fab05ad6368b56f93d9369ff550"
"checksum syn 0.13.1 (registry+https://github.com/rust-lang/crates.io-index)" = "91b52877572087400e83d24b9178488541e3d535259e04ff17a63df1e5ceff59"
"checksum syn 0.14.6 (registry+https://github.com/rust-lang/crates.io-index)" = "4e4b5274d4a0a3d2749d5c158dc64d3403e60554dc61194648787ada5212473d"
@ -3463,7 +3539,7 @@ dependencies = [
"checksum textwrap 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c0b59b6b4b44d867f1370ef1bd91bfb262bf07bf0ae65c202ea2fbc16153b693"
"checksum thin-slice 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c"
"checksum thin-vec 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "73fdf4b84c65a85168477b7fb6c498e0716bc9487fba24623389ea7f51708044"
"checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b"
"checksum thread_local 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "279ef31c19ededf577bfd12dfae728040a21f635b06a24cd670ff510edd38963"
"checksum thread_profiler 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf947d192a9be60ef5131cc7a4648886ba89d712f16700ebbf80c8a69d05d48f"
"checksum threadbound 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d23e87ee7a1ef5bd2d38cef24ff360f6e02beee13c6a7eb64dddde4a3da427a3"
"checksum time 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)" = "d825be0eb33fda1a7e68012d51e9c7f451dc1a69391e7fdc197060bb8c56667b"

View File

@ -0,0 +1 @@
{"files":{"Cargo.toml":"373908618d7bdf561f84ddc5add92f69dab295c97ab0908d3a4ec428fad23bad","LICENSE-APACHE":"a9040321c3712d8fd0b09cf52b17445de04a23a10165049ae187cd39e5c86be5","LICENSE-MIT":"9e0dfd2dd4173a530e238cb6adb37aa78c34c6bc7444e0e10c1ab5d8881f63ba","src/lib.rs":"bdf23c8a00fb4d51beabeb6600fe45ebf1be618632db885013b6f60a5666c124","src/paddings.rs":"7a18850dab9dca0a3e6cc49d6a94a9566ea2473628f42f726a69f8e07f95872a"},"package":"a076c298b9ecdb530ed9d967e74a6027d6a7478924520acddcddc24c1c8ab3ab"}

View File

@ -0,0 +1,27 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g. crates.io) dependencies
#
# If you believe there's an error in this file please file an
# issue against the rust-lang/cargo repository. If you're
# editing this file be aware that the upstream Cargo.toml
# will likely look very different (and much more reasonable)
[package]
name = "block-buffer"
version = "0.3.3"
authors = ["RustCrypto Developers"]
description = "Fixed size buffer for block processing of data"
documentation = "https://docs.rs/block-buffer"
keywords = ["block", "padding", "pkcs7", "ansix923", "iso7816"]
categories = ["cryptography", "no-std"]
license = "MIT/Apache-2.0"
repository = "https://github.com/RustCrypto/utils"
[dependencies.arrayref]
version = "0.3"
[dependencies.byte-tools]
version = "0.2"

View File

@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -0,0 +1,25 @@
Copyright (c) 2017 Artyom Pavlov
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

View File

@ -0,0 +1,144 @@
#![no_std]
#[macro_use]
extern crate arrayref;
extern crate byte_tools;
use byte_tools::{zero, write_u64_le};
mod paddings;
pub use paddings::*;
macro_rules! impl_buffer {
($name:ident, $len:expr) => {
pub struct $name {
buffer: [u8; $len],
pos: usize,
}
impl Copy for $name {}
impl Clone for $name {
fn clone(&self) -> Self {
*self
}
}
impl Default for $name {
fn default() -> Self {
$name {buffer: [0; $len], pos: 0}
}
}
impl $name {
#[inline]
pub fn input<F: FnMut(&[u8; $len])>(&mut self, mut input: &[u8], mut func: F) {
// If there is already data in the buffer, copy as much as we can
// into it and process the data if the buffer becomes full.
if self.pos != 0 {
let rem = self.remaining();
if input.len() >= rem {
let (l, r) = input.split_at(rem);
input = r;
self.buffer[self.pos..].copy_from_slice(l);
self.pos = 0;
func(&self.buffer);
} else {
let end = self.pos + input.len();
self.buffer[self.pos..end].copy_from_slice(input);
self.pos = end;
return;
}
}
// While we have at least a full buffer size chunks's worth of data,
// process that data without copying it into the buffer
while input.len() >= self.size() {
let (l, r) = input.split_at(self.size());
input = r;
func(array_ref!(l, 0, $len));
}
// Copy any input data into the buffer. At this point in the method,
// the ammount of data left in the input vector will be less than
// the buffer size and the buffer will be empty.
self.buffer[..input.len()].copy_from_slice(input);
self.pos = input.len();
}
#[inline]
fn digest_pad<F>(&mut self, up_to: usize, func: &mut F)
where F: FnMut(&[u8; $len])
{
self.buffer[self.pos] = 0x80;
self.pos += 1;
zero(&mut self.buffer[self.pos..]);
if self.remaining() < up_to {
func(&self.buffer);
zero(&mut self.buffer[..self.pos]);
}
}
#[inline]
/// Will pad message with message length in big-endian format
pub fn len_padding<F>(&mut self, data_len: u64, mut func: F)
where F: FnMut(&[u8; $len])
{
self.digest_pad(8, &mut func);
let s = self.size();
write_u64_le(&mut self.buffer[s-8..], data_len);
func(&self.buffer);
self.pos = 0;
}
#[inline]
pub fn len_padding_u128<F>(&mut self, hi: u64, lo: u64, mut func: F)
where F: FnMut(&[u8; $len])
{
self.digest_pad(16, &mut func);
let s = self.size();
write_u64_le(&mut self.buffer[s-16..s-8], hi);
write_u64_le(&mut self.buffer[s-8..], lo);
func(&self.buffer);
self.pos = 0;
}
#[inline]
pub fn pad_with<P: Padding>(&mut self) -> &mut [u8; $len] {
P::pad(&mut self.buffer[..], self.pos);
self.pos = 0;
&mut self.buffer
}
#[inline]
pub fn size(&self) -> usize {
$len
}
#[inline]
pub fn position(&self) -> usize {
self.pos
}
#[inline]
pub fn remaining(&self) -> usize {
self.size() - self.pos
}
}
}
}
impl_buffer!(BlockBuffer128, 16);
impl_buffer!(BlockBuffer256, 32);
impl_buffer!(BlockBuffer512, 64);
impl_buffer!(BlockBuffer1024, 128);
impl_buffer!(BlockBuffer576, 72);
impl_buffer!(BlockBuffer832, 104);
impl_buffer!(BlockBuffer1088, 136);
impl_buffer!(BlockBuffer1152, 144);
impl_buffer!(BlockBuffer1344, 168);

View File

@ -0,0 +1,129 @@
use byte_tools::{zero, set};
/// Trait for padding messages divided into blocks
pub trait Padding {
/// Pads `block` filled with data up to `pos`
fn pad(block: &mut [u8], pos: usize);
}
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
/// Error for indicating failed unpadding process
pub struct UnpadError;
/// Trait for extracting oringinal message from padded medium
pub trait Unpadding {
/// Unpad given `data` by truncating it according to the used padding.
/// In case of the malformed padding will return `UnpadError`
fn unpad(data: &[u8]) -> Result<&[u8], UnpadError>;
}
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub enum ZeroPadding{}
impl Padding for ZeroPadding {
#[inline]
fn pad(block: &mut [u8], pos: usize) {
zero(&mut block[pos..])
}
}
impl Unpadding for ZeroPadding {
#[inline]
fn unpad(data: &[u8]) -> Result<&[u8], UnpadError> {
let mut n = data.len() - 1;
while n != 0 {
if data[n] != 0 {
break;
}
n -= 1;
}
Ok(&data[..n+1])
}
}
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub enum Pkcs7{}
impl Padding for Pkcs7 {
#[inline]
fn pad(block: &mut [u8], pos: usize) {
let n = block.len() - pos;
set(&mut block[pos..], n as u8);
}
}
impl Unpadding for Pkcs7 {
#[inline]
fn unpad(data: &[u8]) -> Result<&[u8], UnpadError> {
if data.is_empty() { return Err(UnpadError); }
let l = data.len();
let n = data[l-1];
if n == 0 {
return Err(UnpadError)
}
for v in &data[l-n as usize..l-1] {
if *v != n { return Err(UnpadError); }
}
Ok(&data[..l-n as usize])
}
}
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub enum AnsiX923{}
impl Padding for AnsiX923 {
#[inline]
fn pad(block: &mut [u8], pos: usize) {
let n = block.len() - 1;
zero(&mut block[pos..n]);
block[n] = (n - pos) as u8;
}
}
impl Unpadding for AnsiX923 {
#[inline]
fn unpad(data: &[u8]) -> Result<&[u8], UnpadError> {
if data.is_empty() { return Err(UnpadError); }
let l = data.len();
let n = data[l-1] as usize;
if n == 0 {
return Err(UnpadError)
}
for v in &data[l-n..l-1] {
if *v != 0 { return Err(UnpadError); }
}
Ok(&data[..l-n])
}
}
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub enum Iso7816{}
impl Padding for Iso7816 {
#[inline]
fn pad(block: &mut [u8], pos: usize) {
let n = block.len() - pos;
block[pos] = 0x80;
for b in block[pos+1..].iter_mut() {
*b = n as u8;
}
}
}
impl Unpadding for Iso7816 {
fn unpad(data: &[u8]) -> Result<&[u8], UnpadError> {
if data.is_empty() { return Err(UnpadError); }
let mut n = data.len() - 1;
while n != 0 {
if data[n] != 0 {
break;
}
n -= 1;
}
if data[n] != 0x80 { return Err(UnpadError); }
Ok(&data[..n])
}
}

View File

@ -0,0 +1 @@
{"files":{"Cargo.toml":"af6af6ea1dfa296af5dc58986d1afb46952328588069ec0b08723db439e9972d","LICENSE-APACHE":"a9040321c3712d8fd0b09cf52b17445de04a23a10165049ae187cd39e5c86be5","LICENSE-MIT":"52232c2cee3bb7d8cabe47ef367f1bf8bb607c22bdfca0219d6156cb7f446e9d","src/lib.rs":"9c96cffef7458fc7bd9e4e61270b69d539ff3a9225a0319b7996155c25ff96ab","src/read_single.rs":"3ab78b15754c2a7848a1be871ff6ee2a31a099f8f4f89be44ad210cda0dbcc9a","src/read_slice.rs":"b3790f2fd080db97e239c05c63da123ea375fb9b354dc9cacb859ed9c44f552e","src/write_single.rs":"1cee4f2f5d8690e47840ea7017539ead417a26abc0717137442a6d9d2875afe4","src/write_slice.rs":"de90e6b9cfca67125871bee7cef55c63574b1871a6584e51fc00a97e5877fe69"},"package":"560c32574a12a89ecd91f5e742165893f86e3ab98d21f8ea548658eb9eef5f40"}

View File

@ -0,0 +1,21 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g. crates.io) dependencies
#
# If you believe there's an error in this file please file an
# issue against the rust-lang/cargo repository. If you're
# editing this file be aware that the upstream Cargo.toml
# will likely look very different (and much more reasonable)
[package]
name = "byte-tools"
version = "0.2.0"
authors = ["The Rust-Crypto Project Developers"]
description = "Utility functions for working with bytes"
documentation = "https://docs.rs/byte-tools"
keywords = ["bytes"]
license = "MIT/Apache-2.0"
repository = "https://github.com/RustCrypto/utils"

View File

@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -0,0 +1,26 @@
Copyright (c) 2006-2009 Graydon Hoare
Copyright (c) 2009-2013 Mozilla Foundation
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

View File

@ -0,0 +1,37 @@
#![no_std]
use core::ptr;
mod read_single;
mod write_single;
mod read_slice;
mod write_slice;
pub use read_single::*;
pub use write_single::*;
pub use read_slice::*;
pub use write_slice::*;
/// Copy bytes from src to dest
#[inline]
pub fn copy_memory(src: &[u8], dst: &mut [u8]) {
assert!(dst.len() >= src.len());
unsafe {
let srcp = src.as_ptr();
let dstp = dst.as_mut_ptr();
ptr::copy_nonoverlapping(srcp, dstp, src.len());
}
}
/// Zero all bytes in dst
#[inline]
pub fn zero(dst: &mut [u8]) {
set(dst, 0);
}
/// Sets all bytes in `dst` equal to `value`
#[inline]
pub fn set(dst: &mut [u8], value: u8) {
unsafe {
ptr::write_bytes(dst.as_mut_ptr(), value, dst.len());
}
}

View File

@ -0,0 +1,38 @@
use core::{mem, ptr};
macro_rules! read_single {
($src:expr, $size:expr, $ty:ty, $which:ident) => ({
assert!($size == mem::size_of::<$ty>());
assert!($size == $src.len());
unsafe {
let mut tmp: $ty = mem::uninitialized();
let p = &mut tmp as *mut _ as *mut u8;
ptr::copy_nonoverlapping($src.as_ptr(), p, $size);
tmp.$which()
}
});
}
/// Read the value of a vector of bytes as a u32 value in little-endian format.
#[inline]
pub fn read_u32_le(src: &[u8]) -> u32 {
read_single!(src, 4, u32, to_le)
}
/// Read the value of a vector of bytes as a u32 value in big-endian format.
#[inline]
pub fn read_u32_be(src: &[u8]) -> u32 {
read_single!(src, 4, u32, to_be)
}
/// Read the value of a vector of bytes as a u64 value in little-endian format.
#[inline]
pub fn read_u64_le(src: &[u8]) -> u64 {
read_single!(src, 8, u64, to_le)
}
/// Read the value of a vector of bytes as a u64 value in big-endian format.
#[inline]
pub fn read_u64_be(src: &[u8]) -> u64 {
read_single!(src, 8, u64, to_be)
}

View File

@ -0,0 +1,44 @@
use core::ptr;
macro_rules! read_slice {
($src:expr, $dst:expr, $size:expr, $which:ident) => ({
assert_eq!($size*$dst.len(), $src.len());
unsafe {
ptr::copy_nonoverlapping(
$src.as_ptr(),
$dst.as_mut_ptr() as *mut u8,
$src.len());
}
for v in $dst.iter_mut() {
*v = v.$which();
}
});
}
/// Read a vector of bytes into a vector of u32s. The values are read in
/// little-endian format.
#[inline]
pub fn read_u32v_le(dst: &mut [u32], src: &[u8]) {
read_slice!(src, dst, 4, to_le);
}
/// Read a vector of bytes into a vector of u32s. The values are read in
/// big-endian format.
#[inline]
pub fn read_u32v_be(dst: &mut [u32], src: &[u8]) {
read_slice!(src, dst, 4, to_be);
}
/// Read a vector of bytes into a vector of u64s. The values are read in
/// little-endian format.
#[inline]
pub fn read_u64v_le(dst: &mut [u64], src: &[u8]) {
read_slice!(src, dst, 8, to_le);
}
/// Read a vector of bytes into a vector of u64s. The values are read in
/// big-endian format.
#[inline]
pub fn read_u64v_be(dst: &mut [u64], src: &[u8]) {
read_slice!(src, dst, 8, to_be);
}

View File

@ -0,0 +1,39 @@
use core::{mem, ptr};
macro_rules! write_single {
($dst:expr, $n:expr, $size:expr, $which:ident) => ({
assert!($size == $dst.len());
unsafe {
let bytes = mem::transmute::<_, [u8; $size]>($n.$which());
ptr::copy_nonoverlapping((&bytes).as_ptr(), $dst.as_mut_ptr(), $size);
}
});
}
/// Write a u32 into a vector, which must be 4 bytes long. The value is written
/// in little-endian format.
#[inline]
pub fn write_u32_le(dst: &mut [u8], n: u32) {
write_single!(dst, n, 4, to_le);
}
/// Write a u32 into a vector, which must be 4 bytes long. The value is written
/// in big-endian format.
#[inline]
pub fn write_u32_be(dst: &mut [u8], n: u32) {
write_single!(dst, n, 4, to_be);
}
/// Write a u64 into a vector, which must be 8 bytes long. The value is written
/// in little-endian format.
#[inline]
pub fn write_u64_le(dst: &mut [u8], n: u64) {
write_single!(dst, n, 8, to_le);
}
/// Write a u64 into a vector, which must be 8 bytes long. The value is written
/// in big-endian format.
#[inline]
pub fn write_u64_be(dst: &mut [u8], n: u64) {
write_single!(dst, n, 8, to_be);
}

View File

@ -0,0 +1,46 @@
use core::{ptr, mem};
macro_rules! write_slice {
($src:expr, $dst:expr, $ty:ty, $size:expr, $which:ident) => ({
assert!($size == mem::size_of::<$ty>());
assert_eq!($dst.len(), $size*$src.len());
unsafe {
ptr::copy_nonoverlapping(
$src.as_ptr() as *const u8,
$dst.as_mut_ptr(),
$dst.len());
let tmp: &mut [$ty] = mem::transmute($dst);
for v in tmp[..$src.len()].iter_mut() {
*v = v.$which();
}
}
});
}
/// Write a vector of u32s into a vector of bytes. The values are written in
/// little-endian format.
#[inline]
pub fn write_u32v_le(dst: &mut [u8], src: &[u32]) {
write_slice!(src, dst, u32, 4, to_le);
}
/// Write a vector of u32s into a vector of bytes. The values are written in
/// big-endian format.
#[inline]
pub fn write_u32v_be(dst: &mut [u8], src: &[u32]) {
write_slice!(src, dst, u32, 4, to_be);
}
/// Write a vector of u64s into a vector of bytes. The values are written in
/// little-endian format.
#[inline]
pub fn write_u64v_le(dst: &mut [u8], src: &[u64]) {
write_slice!(src, dst, u64, 8, to_le);
}
/// Write a vector of u64s into a vector of bytes. The values are written in
/// little-endian format.
#[inline]
pub fn write_u64v_be(dst: &mut [u8], src: &[u64]) {
write_slice!(src, dst, u64, 8, to_be);
}

View File

@ -0,0 +1 @@
{"files":{"Cargo.toml":"b3667b1e1a3985dd2c9e7873f6945c2d7163ed7da95569f40c2097285a325ec4","LICENSE-APACHE":"a9040321c3712d8fd0b09cf52b17445de04a23a10165049ae187cd39e5c86be5","LICENSE-MIT":"9e0dfd2dd4173a530e238cb6adb37aa78c34c6bc7444e0e10c1ab5d8881f63ba","src/dev.rs":"c824f834fa8b8c729024e4ec61138e89c26a56bfb6b50295600dddb5ff8fff62","src/digest.rs":"6710ac33c80e6159a2396839794fc76a61b94ab573516a69486457b3e291c793","src/errors.rs":"cff5bf2350bc109ad4f08caacf6780ff1e7016d9995f0847e84e96a8e31ab9d5","src/lib.rs":"bf4e93ebd066513001f3d6d77024ae8addf4df4fd89f76549fd1b73df386f3e4"},"package":"03b072242a8cbaf9c145665af9d250c59af3b958f83ed6824e13533cf76d5b90"}

View File

@ -0,0 +1,32 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g. crates.io) dependencies
#
# If you believe there's an error in this file please file an
# issue against the rust-lang/cargo repository. If you're
# editing this file be aware that the upstream Cargo.toml
# will likely look very different (and much more reasonable)
[package]
name = "digest"
version = "0.7.6"
authors = ["RustCrypto Developers"]
description = "Traits for cryptographic hash functions"
documentation = "https://docs.rs/digest"
keywords = ["digest", "crypto", "hash"]
categories = ["cryptography", "no-std"]
license = "MIT/Apache-2.0"
repository = "https://github.com/RustCrypto/traits"
[package.metadata.docs.rs]
features = ["std"]
[dependencies.generic-array]
version = "0.9"
[features]
dev = []
std = []
[badges.travis-ci]
repository = "RustCrypto/traits"

View File

@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -0,0 +1,25 @@
Copyright (c) 2017 Artyom Pavlov
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

171
third_party/rust/digest-0.7.6/src/dev.rs vendored Normal file
View File

@ -0,0 +1,171 @@
use super::{Digest, Input, VariableOutput, ExtendableOutput, XofReader};
use core::fmt::Debug;
pub struct Test {
pub name: &'static str,
pub input: &'static [u8],
pub output: &'static [u8],
}
#[macro_export]
macro_rules! new_tests {
( $( $name:expr ),* ) => {
[$(
Test {
name: $name,
input: include_bytes!(concat!("data/", $name, ".input.bin")),
output: include_bytes!(concat!("data/", $name, ".output.bin")),
},
)*]
};
( $( $name:expr ),+, ) => (new_tests!($($name),+))
}
pub fn main_test<D: Digest + Debug + Clone>(tests: &[Test]) {
// Test that it works when accepting the message all at once
for t in tests.iter() {
let mut sh = D::default();
sh.input(t.input);
let out = sh.result();
assert_eq!(out[..], t.output[..]);
}
// Test that it works when accepting the message in pieces
for t in tests.iter() {
let mut sh = D::default();
let len = t.input.len();
let mut left = len;
while left > 0 {
let take = (left + 1) / 2;
sh.input(&t.input[len - left..take + len - left]);
left = left - take;
}
let out = sh.result();
assert_eq!(out[..], t.output[..]);
}
}
pub fn variable_test<D>(tests: &[Test])
where D: Input + VariableOutput + Clone + Debug
{
let mut buf = [0u8; 1024];
// Test that it works when accepting the message all at once
for t in tests.iter() {
let mut sh = D::new(t.output.len()).unwrap();
sh.process(t.input);
let out = sh.variable_result(&mut buf[..t.output.len()]).unwrap();
assert_eq!(out[..], t.output[..]);
}
// Test that it works when accepting the message in pieces
for t in tests.iter() {
let mut sh = D::new(t.output.len()).unwrap();
let len = t.input.len();
let mut left = len;
while left > 0 {
let take = (left + 1) / 2;
sh.process(&t.input[len - left..take + len - left]);
left = left - take;
}
let out = sh.variable_result(&mut buf[..t.output.len()]).unwrap();
assert_eq!(out[..], t.output[..]);
}
}
pub fn xof_test<D>(tests: &[Test])
where D: Input + ExtendableOutput + Default + Debug + Clone
{
let mut buf = [0u8; 1024];
// Test that it works when accepting the message all at once
for t in tests.iter() {
let mut sh = D::default();
sh.process(t.input);
let out = &mut buf[..t.output.len()];
sh.xof_result().read(out);
assert_eq!(out[..], t.output[..]);
}
// Test that it works when accepting the message in pieces
for t in tests.iter() {
let mut sh = D::default();
let len = t.input.len();
let mut left = len;
while left > 0 {
let take = (left + 1) / 2;
sh.process(&t.input[len - left..take + len - left]);
left = left - take;
}
let out = &mut buf[..t.output.len()];
sh.xof_result().read(out);
assert_eq!(out[..], t.output[..]);
}
// Test reeading from reader byte by byte
for t in tests.iter() {
let mut sh = D::default();
sh.process(t.input);
let mut reader = sh.xof_result();
let out = &mut buf[..t.output.len()];
for chunk in out.chunks_mut(1) {
reader.read(chunk);
}
assert_eq!(out[..], t.output[..]);
}
}
pub fn one_million_a<D: Digest + Default + Debug + Clone>(expected: &[u8]) {
let mut sh = D::default();
for _ in 0..50000 {
sh.input(&[b'a'; 10]);
}
sh.input(&[b'a'; 500000]);
let out = sh.result();
assert_eq!(out[..], expected[..]);
}
#[macro_export]
macro_rules! bench_digest {
($name:ident, $engine:path, $bs:expr) => {
#[bench]
fn $name(b: &mut Bencher) {
let mut d = <$engine>::default();
let data = [0; $bs];
b.iter(|| {
d.input(&data);
});
b.bytes = $bs;
}
};
($engine:path) => {
extern crate test;
use test::Bencher;
use digest::Digest;
bench_digest!(bench1_16, $engine, 1<<4);
bench_digest!(bench2_64, $engine, 1<<6);
bench_digest!(bench3_256, $engine, 1<<8);
bench_digest!(bench4_1k, $engine, 1<<10);
bench_digest!(bench5_4k, $engine, 1<<12);
bench_digest!(bench6_16k, $engine, 1<<14);
}
}

View File

@ -0,0 +1,86 @@
use super::{Input, BlockInput, FixedOutput};
use generic_array::GenericArray;
#[cfg(feature = "std")]
use std::io;
type Output<N> = GenericArray<u8, N>;
/// The `Digest` trait specifies an interface common for digest functions.
///
/// It's a convinience wrapper around `Input`, `FixedOutput`, `BlockInput` and
/// `Default` traits. It also provides additional convenience methods.
pub trait Digest: Input + BlockInput + FixedOutput + Default {
/// Create new hasher instance
fn new() -> Self {
Self::default()
}
/// Digest input data. This method can be called repeatedly
/// for use with streaming messages.
fn input(&mut self, input: &[u8]) {
self.process(input);
}
/// Retrieve the digest result. This method consumes digest instance.
fn result(self) -> Output<Self::OutputSize> {
self.fixed_result()
}
/// Convenience function to compute hash of the `data`. It will handle
/// hasher creation, data feeding and finalization.
///
/// Example:
///
/// ```rust,ignore
/// println!("{:x}", sha2::Sha256::digest(b"Hello world"));
/// ```
#[inline]
fn digest(data: &[u8]) -> Output<Self::OutputSize> {
let mut hasher = Self::default();
hasher.process(data);
hasher.fixed_result()
}
/// Convenience function to compute hash of the string. It's equivalent to
/// `digest(input_string.as_bytes())`.
#[inline]
fn digest_str(str: &str) -> Output<Self::OutputSize> {
Self::digest(str.as_bytes())
}
/// Convenience function which takes `std::io::Read` as a source and computes
/// value of digest function `D`, e.g. SHA-2, SHA-3, BLAKE2, etc. using 1 KB
/// blocks.
///
/// Usage example:
///
/// ```rust,ignore
/// use std::fs;
/// use sha2::{Sha256, Digest};
///
/// let mut file = fs::File::open("Cargo.toml")?;
/// let result = Sha256::digest_reader(&mut file)?;
/// println!("{:x}", result);
/// ```
#[cfg(feature = "std")]
#[inline]
fn digest_reader(source: &mut io::Read)
-> io::Result<Output<Self::OutputSize>>
{
let mut hasher = Self::default();
let mut buf = [0u8; 8 * 1024];
loop {
let len = match source.read(&mut buf) {
Ok(0) => return Ok(hasher.result()),
Ok(len) => len,
Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
Err(e) => Err(e)?,
};
hasher.process(&buf[..len]);
}
}
}
impl<D: Input + FixedOutput + BlockInput + Default> Digest for D {}

View File

@ -0,0 +1,37 @@
use core::fmt;
#[cfg(feature = "std")]
use std::error;
/// The error type for variable hasher initialization
#[derive(Clone, Copy, Debug, Default)]
pub struct InvalidOutputSize;
/// The error type for variable hasher result
#[derive(Clone, Copy, Debug, Default)]
pub struct InvalidBufferLength;
impl fmt::Display for InvalidOutputSize {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str("invalid output size")
}
}
impl fmt::Display for InvalidBufferLength {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str("invalid buffer length")
}
}
#[cfg(feature = "std")]
impl error::Error for InvalidOutputSize {
fn description(&self) -> &str {
"invalid output size"
}
}
#[cfg(feature = "std")]
impl error::Error for InvalidBufferLength {
fn description(&self) -> &str {
"invalid buffer size"
}
}

View File

@ -0,0 +1,98 @@
//! This crate provides traits for describing funcionality of cryptographic hash
//! functions.
//!
//! By default std functionality in this crate disabled. (e.g. method for
//! hashing `Read`ers) To enable it turn on `std` feature in your `Cargo.toml`
//! for this crate.
#![cfg_attr(not(feature = "std"), no_std)]
pub extern crate generic_array;
#[cfg(feature = "std")]
use std as core;
use generic_array::{GenericArray, ArrayLength};
mod digest;
mod errors;
#[cfg(feature = "dev")]
pub mod dev;
pub use errors::{InvalidOutputSize, InvalidBufferLength};
pub use digest::Digest;
// `process` is choosen to not overlap with `input` method in the digest trait
// change it on trait alias stabilization
/// Trait for processing input data
pub trait Input {
/// Digest input data. This method can be called repeatedly
/// for use with streaming messages.
fn process(&mut self, input: &[u8]);
}
/// Trait to indicate that digest function processes data in blocks of size
/// `BlockSize`. Main usage of this trait is for implementing HMAC generically.
pub trait BlockInput {
type BlockSize: ArrayLength<u8>;
}
/// Trait for returning digest result with the fixed size
pub trait FixedOutput {
type OutputSize: ArrayLength<u8>;
/// Retrieve the digest result. This method consumes digest instance.
fn fixed_result(self) -> GenericArray<u8, Self::OutputSize>;
}
/// The error type for variable digest output
#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct InvalidLength;
/// Trait for returning digest result with the varaible size
pub trait VariableOutput: core::marker::Sized {
/// Create new hasher instance with given output size. Will return
/// `Err(InvalidLength)` in case if hasher can not work with the given
/// output size. Will always return an error if output size equals to zero.
fn new(output_size: usize) -> Result<Self, InvalidLength>;
/// Get output size of the hasher instance provided to the `new` method
fn output_size(&self) -> usize;
/// Retrieve the digest result into provided buffer. Length of the buffer
/// must be equal to output size provided to the `new` method, otherwise
/// `Err(InvalidLength)` will be returned
fn variable_result(self, buffer: &mut [u8]) -> Result<&[u8], InvalidLength>;
}
/// Trait for decribing readers which are used to extract extendable output
/// from the resulting state of hash function.
pub trait XofReader {
/// Read output into the `buffer`. Can be called unlimited number of times.
fn read(&mut self, buffer: &mut [u8]);
}
/// Trait which describes extendable output (XOF) of hash functions. Using this
/// trait you first need to get structure which implements `XofReader`, using
/// which you can read extendable output.
pub trait ExtendableOutput {
type Reader: XofReader;
/// Finalize hash function and return XOF reader
fn xof_result(self) -> Self::Reader;
}
/// Macro for defining opaque `Debug` implementation. It will use the following
/// format: "HasherName { ... }". While it's convinient to have it
/// (e.g. for including in other structs), it could be undesirable to leak
/// internall state, which can happen for example through uncareful logging.
#[macro_export]
macro_rules! impl_opaque_debug {
($state:ty) => {
impl ::core::fmt::Debug for $state {
fn fmt(&self, f: &mut ::core::fmt::Formatter)
-> Result<(), ::core::fmt::Error>
{
write!(f, concat!(stringify!($state), " {{ ... }}"))
}
}
}
}

View File

@ -1 +1 @@
{"files":{"COPYING":"01c266bced4a434da0051174d6bee16a4c82cf634e2679b6155d40d75012390f","Cargo.toml":"9b11b3f077cb37e9314fd44a9c385662bebd96f6858e0886e28b00ab1beee421","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","README.md":"9a9d39001433160095de7a297b51052c91c9ef7f25a94d6f67ebe50343977926","UNLICENSE":"7e12e5df4bae12cb21581ba157ced20e1986a0508dd10d0e8a4ab9a4cf94e85c","completions/docopt-wordlist.bash":"213bf1baea244eeb32af3a24a9ad895212cb538e3cdaee3bfed842b11a2a64d8","examples/cargo.rs":"6a5012a3359e574a61607eca0c15add23ea9e312e8f20fb90d6438740483fefd","examples/cp.rs":"35e705c59968c22a965b7ba9afc4b7a3af5d411e929432b2fb6bd2ed08a7c9ce","examples/decode.rs":"85f5033cf6450a771d6be2af819718d316b92fb98b201e247cdbe0eb39039487","examples/hashmap.rs":"9066a7b7192e15b3b667702519645d31926a371bc54ab8d70b211d98458d5a8d","examples/optional_command.rs":"44d8dda079e237ac140b1d81d34d065cb2427a6edb4e60eadaa2c8ceaff0831c","examples/verbose_multiple.rs":"3279c76c7f3bde135deca90085b9f9d5a86ea3bd619e57ddfed35f4200bb5f4a","src/dopt.rs":"df0132f0e4ddc4f0bc6fa5789cf24b5fe01d1a91338dc1431bf93c5a1d6ffc11","src/lib.rs":"e7089315c3ebd4d2774bad8b5a6b2899db6348a44f88dc4253c840bbb389f147","src/parse.rs":"e67d4a5ee95a9fcc1aa5c84e78605f32a1c2bbc5e772de9109ae1ce5fac6f16a","src/synonym.rs":"152b89b6f755222f81ebb63fd3d372d7407aa8046522fc1dcc2e40f417cfc65b","src/test/mod.rs":"1f3eb58d5740f8789dea7bdb2815b1313e948c6f5de9ea6d79cad5bbed484114","src/test/suggestions.rs":"51e044db856a424ef12d2bc2eb541ae922b93d81ac5548767c9c638ccd87d388","src/test/testcases.docopt":"13fcd2948a5625b76f93b98ac7b6cb53ef70c119fc2c5f85d2cb67e56bd4e9c3","src/test/testcases.rs":"cbecfab0c82249a7d8ad193ad5e9e10f45a7a41b37e69cfc025a9cdc6c213f04","src/wordlist.rs":"45ccc3441d1abf072c2079f15b7f5a7af68bd2989c99a8acd5554133fa8db7fa"},"package":"db2906c2579b5b7207fc1e328796a9a8835dc44e22dbe8e460b1d636f9a7b225"}
{"files":{"COPYING":"01c266bced4a434da0051174d6bee16a4c82cf634e2679b6155d40d75012390f","Cargo.toml":"ef181d3a88c48c794a7f1a97974c83045bfa956eb5b1b8e5efc1f8c92938a135","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","Makefile":"db1787c5c7d2daea87d92c0549976a18bbe0601acb2ab5bd8dc5edb9f2b46e63","README.md":"3b46f46ffd466fc3aa36becb0ce194820b4669ca75d0c186620abef6115317e0","UNLICENSE":"7e12e5df4bae12cb21581ba157ced20e1986a0508dd10d0e8a4ab9a4cf94e85c","completions/docopt-wordlist.bash":"213bf1baea244eeb32af3a24a9ad895212cb538e3cdaee3bfed842b11a2a64d8","ctags.rust":"3d128d3cc59f702e68953ba2fe6c3f46bc6991fc575308db060482d5da0c79f3","examples/cargo.rs":"6a5012a3359e574a61607eca0c15add23ea9e312e8f20fb90d6438740483fefd","examples/cp.rs":"35e705c59968c22a965b7ba9afc4b7a3af5d411e929432b2fb6bd2ed08a7c9ce","examples/decode.rs":"85f5033cf6450a771d6be2af819718d316b92fb98b201e247cdbe0eb39039487","examples/hashmap.rs":"9066a7b7192e15b3b667702519645d31926a371bc54ab8d70b211d98458d5a8d","examples/optional_command.rs":"44d8dda079e237ac140b1d81d34d065cb2427a6edb4e60eadaa2c8ceaff0831c","examples/verbose_multiple.rs":"3279c76c7f3bde135deca90085b9f9d5a86ea3bd619e57ddfed35f4200bb5f4a","scripts/mk-testcases":"649f37d391650175c8462171f7a98fce81735c9317630a5eb13db532ddb22976","session.vim":"1d51566b00f8ff2021d56948c1c55f123959f3e24879a6ad9337eccb11fc8fe9","src/dopt.rs":"4bbdd90fca8f71e4d898bc0656d09dce219e255d4b92671716da8fce5180572a","src/lib.rs":"e916a13a1e7f16566b768f4b9906d2d1a7c31a0524767350b1063d9255a03997","src/parse.rs":"e67d4a5ee95a9fcc1aa5c84e78605f32a1c2bbc5e772de9109ae1ce5fac6f16a","src/synonym.rs":"152b89b6f755222f81ebb63fd3d372d7407aa8046522fc1dcc2e40f417cfc65b","src/test/mod.rs":"1f3eb58d5740f8789dea7bdb2815b1313e948c6f5de9ea6d79cad5bbed484114","src/test/suggestions.rs":"51e044db856a424ef12d2bc2eb541ae922b93d81ac5548767c9c638ccd87d388","src/test/testcases.docopt":"13fcd2948a5625b76f93b98ac7b6cb53ef70c119fc2c5f85d2cb67e56bd4e9c3","src/test/testcases.rs":"cbecfab0c82249a7d8ad193ad5e9e10f45a7a41b37e69cfc025a9cdc6c213f04","src/wordlist.rs":"45ccc3441d1abf072c2079f15b7f5a7af68bd2989c99a8acd5554133fa8db7fa"},"package":"d8acd393692c503b168471874953a2531df0e9ab77d0b6bbc582395743300a4a"}

View File

@ -12,9 +12,8 @@
[package]
name = "docopt"
version = "1.0.2"
version = "0.8.3"
authors = ["Andrew Gallant <jamslam@gmail.com>"]
exclude = ["/.travis.yml", "/Makefile", "/ctags.rust", "/scripts/*", "/session.vim"]
description = "Command line argument parsing."
homepage = "https://github.com/docopt/docopt.rs"
documentation = "http://burntsushi.net/rustdoc/docopt/"
@ -36,7 +35,7 @@ doc = false
version = "1"
[dependencies.regex]
version = "1.0.3"
version = "0.2"
[dependencies.serde]
version = "1.0"
@ -45,4 +44,4 @@ version = "1.0"
version = "1.0"
[dependencies.strsim]
version = "0.7"
version = "0.6"

18
third_party/rust/docopt/Makefile vendored Normal file
View File

@ -0,0 +1,18 @@
all:
@echo Nothing to do
docs: $(LIB_FILES)
cargo doc
# WTF is rustdoc doing?
in-dir ./target/doc fix-perms
rscp ./target/doc/* gopher:~/www/burntsushi.net/rustdoc/
src/test/testcases.rs: src/test/testcases.docopt scripts/mk-testcases
./scripts/mk-testcases ./src/test/testcases.docopt > ./src/test/testcases.rs
ctags:
ctags --recurse --options=ctags.rust --languages=Rust
push:
git push github master
git push origin master

View File

@ -26,11 +26,15 @@ This crate is fully compatible with Cargo. Just add it to your `Cargo.toml`:
```toml
[dependencies]
docopt = "1"
docopt = "0.8"
serde = "1.0" # if you're using `derive(Deserialize)`
serde_derive = "1.0" # if you're using `derive(Deserialize)`
```
If you want to use the macro, then add `docopt_macros = "0.8"` instead.
Note that the **`docopt!` macro only works on a nightly Rust compiler** because
it is a compiler plugin.
### Quick example
@ -83,6 +87,49 @@ fn main() {
}
```
Here is the same example, but with the use of the `docopt!` macro, which will
*generate a struct for you*. Note that this uses a compiler plugin, so it only
works on a **nightly Rust compiler**:
```rust
#![feature(plugin)]
#![plugin(docopt_macros)]
#[macro_use]
extern crate serde_derive;
extern crate docopt;
use docopt::Docopt;
docopt!(Args derive Debug, "
Naval Fate.
Usage:
naval_fate.py ship new <name>...
naval_fate.py ship <name> move <x> <y> [--speed=<kn>]
naval_fate.py ship shoot <x> <y>
naval_fate.py mine (set|remove) <x> <y> [--moored | --drifting]
naval_fate.py (-h | --help)
naval_fate.py --version
Options:
-h --help Show this screen.
--version Show version.
--speed=<kn> Speed in knots [default: 10].
--moored Moored (anchored) mine.
--drifting Drifting mine.
");
fn main() {
let args: Args = Args::docopt().deserialize().unwrap_or_else(|e| e.exit());
println!("{:?}", args);
}
```
The `Args` struct has one static method defined for it: `docopt`. The method
returns a normal `Docopt` value, which can be used to set configuration
options, `argv` and parse or decode command line arguments.
### Struct field name mapping
@ -98,6 +145,125 @@ build => cmd_build
```
### Data validation example
Here's another example that shows how to specify the types of your arguments:
```rust
#![feature(plugin)]
#![plugin(docopt_macros)]
#[macro_use]
extern crate serde_derive;
extern crate docopt;
docopt!(Args, "Usage: add <x> <y>", arg_x: i32, arg_y: i32);
fn main() {
let args: Args = Args::docopt().deserialize().unwrap_or_else(|e| e.exit());
println!("x: {}, y: {}", args.arg_x, args.arg_y);
}
```
In this example, specific type annotations were added. They will be
automatically inserted into the generated struct. You can override as many (or
as few) fields as you want. If you don't specify a type, then one of `bool`,
`u64`, `String` or `Vec<String>` will be chosen depending on the type of
argument. In this case, both `arg_x` and `arg_y` would have been `String`.
If any value cannot be decoded into a value with the right type, then an error
will be shown to the user.
And of course, you don't need the macro to do this. You can do the same thing
with a manually written struct too.
### Modeling `rustc`
Here's a selected subset for some of `rustc`'s options. This also shows how to
restrict values to a list of choices via an `enum` type and demonstrates more
Docopt features.
```rust
#![feature(plugin)]
#![plugin(docopt_macros)]
#[macro_use]
extern crate serde_derive;
extern crate serde;
extern crate docopt;
use serde::de;
docopt!(Args derive Debug, "
Usage: rustc [options] [--cfg SPEC... -L PATH...] INPUT
rustc (--help | --version)
Options:
-h, --help Show this message.
--version Show the version of rustc.
--cfg SPEC Configure the compilation environment.
-L PATH Add a directory to the library search path.
--emit TYPE Configure the output that rustc will produce.
Valid values: asm, ir, bc, obj, link.
--opt-level LEVEL Optimize with possible levels 0-3.
", flag_opt_level: Option<OptLevel>, flag_emit: Option<Emit>);
#[derive(Deserialize, Debug)]
enum Emit { Asm, Ir, Bc, Obj, Link }
#[derive(Debug)]
enum OptLevel { Zero, One, Two, Three }
impl<'de> de::Deserialize<'de> for OptLevel {
fn deserialize<D>(deserializer: D) -> Result<OptLevel, D::Error>
where D: de::Deserializer<'de>
{
let level = match u8::deserialize(deserializer)? {
0 => OptLevel::Zero,
1 => OptLevel::One,
2 => OptLevel::Two,
3 => OptLevel::Three,
n => {
let value = de::Unexpected::Unsigned(n as u64);
let msg = "expected an integer between 0 and 3";
return Err(de::Error::invalid_value(value, &msg));
}
};
Ok(level)
}
}
fn main() {
let args: Args = Args::docopt().deserialize().unwrap_or_else(|e| e.exit());
println!("{:?}", args);
}
```
### Viewing the generated struct
Generating a struct is pretty magical, but if you want, you can look at it by
expanding all macros. Say you wrote the above example for `Usage: add <x> <y>`
into a file called `add.rs`. Then running:
```bash
rustc -L path/containing/docopt/lib -Z unstable-options --pretty=expanded add.rs
```
Will show all macros expanded. The `path/containing/docopt/lib` is usually
`target/debug/deps` or `target/release/deps` in a cargo project. In the generated code, you should be
able to find the generated struct:
```rust
struct Args {
pub arg_x: int,
pub arg_y: int,
}
```
### Traditional Docopt API
The reference implementation of Docopt returns a Python dictionary with names

11
third_party/rust/docopt/ctags.rust vendored Normal file
View File

@ -0,0 +1,11 @@
--langdef=Rust
--langmap=Rust:.rs
--regex-Rust=/^[ \t]*(#\[[^\]]\][ \t]*)*(pub[ \t]+)?(extern[ \t]+)?("[^"]+"[ \t]+)?(unsafe[ \t]+)?fn[ \t]+([a-zA-Z0-9_]+)/\6/f,functions,function definitions/
--regex-Rust=/^[ \t]*(pub[ \t]+)?type[ \t]+([a-zA-Z0-9_]+)/\2/T,types,type definitions/
--regex-Rust=/^[ \t]*(pub[ \t]+)?enum[ \t]+([a-zA-Z0-9_]+)/\2/g,enum,enumeration names/
--regex-Rust=/^[ \t]*(pub[ \t]+)?struct[ \t]+([a-zA-Z0-9_]+)/\2/s,structure names/
--regex-Rust=/^[ \t]*(pub[ \t]+)?mod[ \t]+([a-zA-Z0-9_]+)/\2/m,modules,module names/
--regex-Rust=/^[ \t]*(pub[ \t]+)?static[ \t]+([a-zA-Z0-9_]+)/\2/c,consts,static constants/
--regex-Rust=/^[ \t]*(pub[ \t]+)?trait[ \t]+([a-zA-Z0-9_]+)/\2/t,traits,traits/
--regex-Rust=/^[ \t]*(pub[ \t]+)?impl([ \t\n]+<.*>)?[ \t]+([a-zA-Z0-9_]+)/\3/i,impls,trait implementations/
--regex-Rust=/^[ \t]*macro_rules![ \t]+([a-zA-Z0-9_]+)/\1/d,macros,macro definitions/

80
third_party/rust/docopt/scripts/mk-testcases vendored Executable file
View File

@ -0,0 +1,80 @@
#!/usr/bin/env python2
from __future__ import absolute_import, division, print_function
import argparse
import json
import re
retests = re.compile('(.*?)"""(.*?)(r"""|\s*$)', re.DOTALL)
reinvokes = re.compile('(.+?$)(.+?)\s*(\$|\Z)', re.DOTALL | re.MULTILINE)
p = argparse.ArgumentParser(
description="Outputs src/test/testcases.rs to stdout")
p.add_argument("testcases", metavar="FILE",
help="The testcases.docopt language agnostic test suite.")
args = p.parse_args()
with open(args.testcases) as f:
alltests = f.read()
alltests = re.sub('^r"""', '', alltests)
alltests = re.sub('^\s*#.*$', '', alltests, flags=re.MULTILINE)
tests = [] # [{usage, args, expect}] (expect is None ==> user-error)
for m in retests.finditer(alltests):
usage, invokes = m.group(1).strip(), m.group(2).strip()
assert invokes.startswith('$'), 'Bad test: "%s"' % invokes
invokes = re.sub('^\$', '', invokes)
for mi in reinvokes.finditer(invokes):
invoke, expect = mi.group(1).strip(), mi.group(2).strip()
err = expect.startswith('"user-error"')
tests.append({
'usage': usage,
'args': invoke.split()[1:],
'expect': None if err else json.loads(expect),
})
def show_test(i, t):
def show_expect(e):
kvs = []
for k, v in e.iteritems():
kvs.append('("%s", %s)' % (k, show_value(v)))
return ', '.join(kvs)
def show_value(v):
if v is None:
return 'Plain(None)'
elif isinstance(v, basestring):
return 'Plain(Some("%s".to_string()))' % v
elif isinstance(v, bool):
return 'Switch(%s)' % ('true' if v else 'false')
elif isinstance(v, int):
return 'Counted(%d)' % v
elif isinstance(v, list):
elms = ', '.join(['"%s".to_string()' % el for el in v])
return 'List(vec!(%s))' % elms
else:
raise ValueError('Unrecognized value: "%s" (type: %s)'
% (v, type(v)))
args = ', '.join(['"%s"' % arg for arg in t['args']])
if t['expect'] is None:
return 'test_user_error!(test_%d_testcases, "%s", &[%s]);' \
% (i, t['usage'], args)
else:
expect = show_expect(t['expect'])
return 'test_expect!(test_%d_testcases, "%s", &[%s], vec!(%s));' \
% (i, t['usage'], args, expect)
print(
"""// !!! ATTENTION !!!
// This file is automatically generated by `scripts/mk-testcases`.
// Please do not edit this file directly!
use Value::{{Switch, Counted, Plain, List}};
use test::{{get_args, map_from_alist, same_args}};
{tests}
""".format(tests='\n\n'.join([show_test(i, t) for i, t in enumerate(tests)])))

3
third_party/rust/docopt/session.vim vendored Normal file
View File

@ -0,0 +1,3 @@
au BufWritePost *.rs silent!make ctags > /dev/null 2>&1
" let g:syntastic_rust_rustc_fname = "src/lib.rs"
" let g:syntastic_rust_rustc_args = "--no-trans"

View File

@ -307,6 +307,7 @@ impl Docopt {
}
#[doc(hidden)]
// Exposed for use in `docopt_macros`.
pub fn parser(&self) -> &Parser {
&self.p
}

View File

@ -182,6 +182,53 @@
//! assert_eq!(args.flag_emit, Some(Emit::Ir));
//! # }
//! ```
//!
//! # The `docopt!` macro
//!
//! This package comes bundled with an additional crate, `docopt_macros`,
//! which provides a `docopt!` syntax extension. Its purpose is to automate
//! the creation of a Rust struct from a Docopt usage string. In particular,
//! this provides a single point of truth about the definition of command line
//! arguments in your program.
//!
//! Another advantage of using the macro is that errors in your Docopt usage
//! string will be caught at compile time. Stated differently, your program
//! will not compile with an invalid Docopt usage string.
//!
//! The example above using type based decoding can be simplified to this:
//!
//! ```ignore
//! #![feature(plugin)]
//! #![plugin(docopt_macros)]
//!
//! extern crate serde;
//!
//! extern crate docopt;
//!
//! // Write the Docopt usage string with the `docopt!` macro.
//! docopt!(Args, "
//! Usage: cp [-a] <source> <dest>
//! cp [-a] <source>... <dir>
//!
//! Options:
//! -a, --archive Copy everything.
//! ")
//!
//! fn main() {
//! let argv = || vec!["cp", "-a", "file1", "file2", "dest/"];
//!
//! // Your `Args` struct has a single static method defined on it,
//! // `docopt`, which will return a normal `Docopt` value.
//! let args: Args = Args::docopt().deserialize().unwrap_or_else(|e| e.exit());
//!
//! // Now access your argv values.
//! fn s(x: &str) -> String { x.to_string() }
//! assert!(args.flag_archive);
//! assert_eq!(args.arg_source, vec![s("file1"), s("file2")]);
//! assert_eq!(args.arg_dir, s("dest/"));
//! assert_eq!(args.arg_dest, s(""));
//! }
//! ```
#![crate_name = "docopt"]
#![doc(html_root_url = "http://burntsushi.net/rustdoc/docopt")]

View File

@ -1 +1 @@
{"files":{"Cargo.toml":"479607f839ec311f5b48754953c3b33bd2d170d2bcb3008e904bef21ecad7a6d","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"0621878e61f0d0fda054bcbe02df75192c28bde1ecc8289cbd86aeba2dd72720","README.md":"4b02d7ebfb188b1f2cbef20ade3082197046ccaa89e49d2bcdef6102d48919e3","measurements.txt":"b209f98f2bc696904a48829e86952f4f09b59e4e685f7c12087c59d05ed31829","src/bitvec.rs":"c6c66c348776ff480b7ff6e4a3e0f64554a4194266f614408b45b5e3c324ec0a","src/lib.rs":"294aabf6fb846dbe35bba837d70ea9115f20cd808995a318c0fccb05f91d096f","src/snapshot_vec.rs":"4935b5eb8292e3b62d662ca01d0baef3d6b341f5479811d837e872ebc3c8518f","src/unify/backing_vec.rs":"0bcc5cd9d7a8bf1fd17e87b6388eeb0f9e3c21ed280fa31ab5dcc4a1ee69fcca","src/unify/mod.rs":"1bed8bd5c8f804fb4c225ed309940ede74b05e58d64f6182ff1ea3895c18a930","src/unify/tests.rs":"b18974faeebdf2c03e82035fe7281bf4db3360ab10ce34b1d3441547836b19f2"},"package":"25b4e5febb25f08c49f1b07dc33a182729a6b21edfb562b5aef95f78e0dbe5bb"}
{"files":{"Cargo.toml":"13e445b6bc53bf1ea2379fd2ec33205daa9b1b74d5a41e4dd9ea8cb966185c5a","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"0621878e61f0d0fda054bcbe02df75192c28bde1ecc8289cbd86aeba2dd72720","README.md":"4b02d7ebfb188b1f2cbef20ade3082197046ccaa89e49d2bcdef6102d48919e3","measurements.txt":"b209f98f2bc696904a48829e86952f4f09b59e4e685f7c12087c59d05ed31829","src/bitvec.rs":"c6c66c348776ff480b7ff6e4a3e0f64554a4194266f614408b45b5e3c324ec0a","src/lib.rs":"294aabf6fb846dbe35bba837d70ea9115f20cd808995a318c0fccb05f91d096f","src/snapshot_vec.rs":"abc649bb42dc8592741b02d53ba1ed5f6ad64710b971070872b0c42665d73c93","src/unify/backing_vec.rs":"7d57036ce671169893d069f94454f1c4b95104517ffd62859f180d80cbe490e5","src/unify/mod.rs":"9fc90951778be635fbbf4fba8b3a0a4eb21e2c955660f019377465ac773b9563","src/unify/tests.rs":"b18974faeebdf2c03e82035fe7281bf4db3360ab10ce34b1d3441547836b19f2"},"package":"88dc8393b3c7352f94092497f6b52019643e493b6b890eb417cdb7c46117e621"}

View File

@ -12,7 +12,7 @@
[package]
name = "ena"
version = "0.10.1"
version = "0.9.3"
authors = ["Niko Matsakis <niko@alum.mit.edu>"]
description = "Union-find, congruence closure, and other unification code. Based on code from rustc."
homepage = "https://github.com/nikomatsakis/ena"

View File

@ -75,20 +75,13 @@ pub trait SnapshotVecDelegate {
fn reverse(values: &mut Vec<Self::Value>, action: Self::Undo);
}
// HACK(eddyb) manual impl avoids `Default` bound on `D`.
impl<D: SnapshotVecDelegate> Default for SnapshotVec<D> {
fn default() -> Self {
impl<D: SnapshotVecDelegate> SnapshotVec<D> {
pub fn new() -> SnapshotVec<D> {
SnapshotVec {
values: Vec::new(),
undo_log: Vec::new(),
}
}
}
impl<D: SnapshotVecDelegate> SnapshotVec<D> {
pub fn new() -> Self {
Self::default()
}
pub fn with_capacity(c: usize) -> SnapshotVec<D> {
SnapshotVec {
@ -282,12 +275,8 @@ impl<D: SnapshotVecDelegate> Extend<D::Value> for SnapshotVec<D> {
where
T: IntoIterator<Item = D::Value>,
{
let initial_len = self.values.len();
self.values.extend(iterable);
let final_len = self.values.len();
if self.in_snapshot() {
self.undo_log.extend((initial_len..final_len).map(|len| NewElem(len)));
for item in iterable {
self.push(item);
}
}
}

View File

@ -7,19 +7,18 @@ use std::marker::PhantomData;
use super::{VarValue, UnifyKey, UnifyValue};
#[allow(dead_code)] // rustc BUG
#[allow(type_alias_bounds)]
type Key<S: UnificationStore> = <S as UnificationStore>::Key;
type Key<S> = <S as UnificationStore>::Key;
/// Largely internal trait implemented by the unification table
/// backing store types. The most common such type is `InPlace`,
/// which indicates a standard, mutable unification table.
pub trait UnificationStore:
ops::Index<usize, Output = VarValue<Key<Self>>> + Clone + Default
{
pub trait UnificationStore: ops::Index<usize, Output = VarValue<Key<Self>>> + Clone {
type Key: UnifyKey<Value = Self::Value>;
type Value: UnifyValue;
type Snapshot;
fn new() -> Self;
fn start_snapshot(&mut self) -> Self::Snapshot;
fn rollback_to(&mut self, snapshot: Self::Snapshot);
@ -52,18 +51,16 @@ pub struct InPlace<K: UnifyKey> {
values: sv::SnapshotVec<Delegate<K>>
}
// HACK(eddyb) manual impl avoids `Default` bound on `K`.
impl<K: UnifyKey> Default for InPlace<K> {
fn default() -> Self {
InPlace { values: sv::SnapshotVec::new() }
}
}
impl<K: UnifyKey> UnificationStore for InPlace<K> {
type Key = K;
type Value = K::Value;
type Snapshot = sv::Snapshot;
#[inline]
fn new() -> Self {
InPlace { values: sv::SnapshotVec::new() }
}
#[inline]
fn start_snapshot(&mut self) -> Self::Snapshot {
self.values.start_snapshot()
@ -135,20 +132,17 @@ pub struct Persistent<K: UnifyKey> {
values: DVec<VarValue<K>>
}
// HACK(eddyb) manual impl avoids `Default` bound on `K`.
#[cfg(feature = "persistent")]
impl<K: UnifyKey> Default for Persistent<K> {
fn default() -> Self {
Persistent { values: DVec::new() }
}
}
#[cfg(feature = "persistent")]
impl<K: UnifyKey> UnificationStore for Persistent<K> {
type Key = K;
type Value = K::Value;
type Snapshot = Self;
#[inline]
fn new() -> Self {
Persistent { values: DVec::new() }
}
#[inline]
fn start_snapshot(&mut self) -> Self::Snapshot {
self.clone()

View File

@ -174,20 +174,18 @@ pub struct VarValue<K: UnifyKey> { // FIXME pub
/// cloning the table is an O(1) operation.
/// - This implies that ordinary operations are quite a bit slower though.
/// - Requires the `persistent` feature be selected in your Cargo.toml file.
#[derive(Clone, Debug, Default)]
#[derive(Clone, Debug)]
pub struct UnificationTable<S: UnificationStore> {
/// Indicates the current value of each key.
values: S,
}
/// A unification table that uses an "in-place" vector.
#[allow(type_alias_bounds)]
pub type InPlaceUnificationTable<K: UnifyKey> = UnificationTable<InPlace<K>>;
pub type InPlaceUnificationTable<K> = UnificationTable<InPlace<K>>;
/// A unification table that uses a "persistent" vector.
#[cfg(feature = "persistent")]
#[allow(type_alias_bounds)]
pub type PersistentUnificationTable<K: UnifyKey> = UnificationTable<Persistent<K>>;
pub type PersistentUnificationTable<K> = UnificationTable<Persistent<K>>;
/// At any time, users may snapshot a unification table. The changes
/// made during the snapshot may either be *committed* or *rolled back*.
@ -239,7 +237,9 @@ impl<K: UnifyKey> VarValue<K> {
impl<S: UnificationStore> UnificationTable<S> {
pub fn new() -> Self {
Self::default()
UnificationTable {
values: S::new()
}
}
/// Starts a new snapshot. Each snapshot must be either

View File

@ -0,0 +1 @@
{"files":{"Cargo.toml":"87ff65d640c137c26d338f96e21e769af1e1b2e7fa615b40a1bcc755448bb118","LICENSE":"ad4fcfaf8d5b12b97409c137a03d4a4e4b21024c65c54f976cc3b609c1bd5b0f","README.md":"9a1a45416eac57050036b13df6ec84d21d555e820726af3c782896bd9d37d94b","rustfmt.toml":"2a298b4ce1fe6e16b8f281a0035567b8eb15042ed3062729fd28224f29c2f75a","src/arr.rs":"cc1ea0a9ef6a524b90767cc8a89f6b939394a2948a645ed313c0bf5ce5a258a4","src/hex.rs":"bfbf304fb4dea6f7edc0569b38bf2ac7657ce089c5761891321722509e3b5076","src/impl_serde.rs":"805885478728b3c205b842d46deb377b7dd6dd4c4c50254064431f49f0981a2a","src/impls.rs":"8c54e294a82a2bf344bdcb9949b8a84903fb65698d6b1b1e0ab9f5e7847be64f","src/iter.rs":"e52217f04d0dc046f13ef2e3539b90eabd4d55bb85cf40f76ba0bf86d5e55ef0","src/lib.rs":"da93fa505eee94b40fce0fe98e26ed3bb4d2bc4d4869af01598b6e54fc9c0f8d","tests/hex.rs":"e909bc0564e7d52c5fcf172dfc0fac7085010c6a21d38581bf73a54ab2e256e1","tests/import_name.rs":"1235729ecbde47fc9a38b3bf35c750a53ed55e3cf967c9d2b24fd759dc9e9e0c","tests/mod.rs":"f4100c5338906c038636f98f4d2b3d272f59580662afa89d915eafb96d7bbcf9"},"package":"ef25c5683767570c2bbd7deba372926a55eaae9982d7726ee2a1050239d45b9d"}

View File

@ -0,0 +1,32 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g. crates.io) dependencies
#
# If you believe there's an error in this file please file an
# issue against the rust-lang/cargo repository. If you're
# editing this file be aware that the upstream Cargo.toml
# will likely look very different (and much more reasonable)
[package]
name = "generic-array"
version = "0.9.0"
authors = ["Bartłomiej Kamiński <fizyk20@gmail.com>"]
description = "Generic types implementing functionality of arrays"
documentation = "http://fizyk20.github.io/generic-array/generic_array/"
license = "MIT"
repository = "https://github.com/fizyk20/generic-array.git"
[lib]
name = "generic_array"
[dependencies.typenum]
version = "1.9"
[dependencies.serde]
version = "1.0"
optional = true
default-features = false
[dev-dependencies.serde_json]
version = "1.0"

View File

@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright (c) 2015 Bartłomiej Kamiński
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -0,0 +1,34 @@
[![Crates.io](https://img.shields.io/crates/v/generic-array.svg)](https://crates.io/crates/generic-array)
[![Build Status](https://travis-ci.org/fizyk20/generic-array.svg?branch=master)](https://travis-ci.org/fizyk20/generic-array)
# generic-array
This crate implements generic array types for Rust.
[Documentation](http://fizyk20.github.io/generic-array/generic_array/)
## Usage
The Rust arrays `[T; N]` are problematic in that they can't be used generically with respect to `N`, so for example this won't work:
```rust
struct Foo<N> {
data: [i32; N]
}
```
**generic-array** defines a new trait `ArrayLength<T>` and a struct `GenericArray<T, N: ArrayLength<T>>`, which let the above be implemented as:
```rust
struct Foo<N: ArrayLength<i32>> {
data: GenericArray<i32, N>
}
```
To actually define a type implementing `ArrayLength`, you can use unsigned integer types defined in [typenum](https://github.com/paholg/typenum) crate - for example, `GenericArray<T, U5>` would work almost like `[T; 5]` :)
In version 0.1.1 an `arr!` macro was introduced, allowing for creation of arrays as shown below:
```rust
let array = arr![u32; 1, 2, 3];
assert_eq!(array[2], 3);
```

View File

@ -0,0 +1,3 @@
reorder_imports = true
reorder_imported_names = true
use_try_shorthand = true

View File

@ -0,0 +1,57 @@
//! Implementation for `arr!` macro.
use super::ArrayLength;
use core::ops::Add;
use typenum::U1;
/// Helper trait for `arr!` macro
pub trait AddLength<T, N: ArrayLength<T>>: ArrayLength<T> {
/// Resulting length
type Output: ArrayLength<T>;
}
impl<T, N1, N2> AddLength<T, N2> for N1
where
N1: ArrayLength<T> + Add<N2>,
N2: ArrayLength<T>,
<N1 as Add<N2>>::Output: ArrayLength<T>,
{
type Output = <N1 as Add<N2>>::Output;
}
/// Helper type for `arr!` macro
pub type Inc<T, U> = <U as AddLength<T, U1>>::Output;
#[doc(hidden)]
#[macro_export]
macro_rules! arr_impl {
($T:ty; $N:ty, [$($x:expr),*], []) => ({
unsafe { $crate::transmute::<_, $crate::GenericArray<$T, $N>>([$($x),*]) }
});
($T:ty; $N:ty, [], [$x1:expr]) => (
arr_impl!($T; $crate::arr::Inc<$T, $N>, [$x1 as $T], [])
);
($T:ty; $N:ty, [], [$x1:expr, $($x:expr),+]) => (
arr_impl!($T; $crate::arr::Inc<$T, $N>, [$x1 as $T], [$($x),*])
);
($T:ty; $N:ty, [$($y:expr),+], [$x1:expr]) => (
arr_impl!($T; $crate::arr::Inc<$T, $N>, [$($y),*, $x1 as $T], [])
);
($T:ty; $N:ty, [$($y:expr),+], [$x1:expr, $($x:expr),+]) => (
arr_impl!($T; $crate::arr::Inc<$T, $N>, [$($y),*, $x1 as $T], [$($x),*])
);
}
/// Macro allowing for easy generation of Generic Arrays.
/// Example: `let test = arr![u32; 1, 2, 3];`
#[macro_export]
macro_rules! arr {
($T:ty;) => ({
unsafe { $crate::transmute::<[$T; 0], $crate::GenericArray<$T, $crate::typenum::U0>>([]) }
});
($T:ty; $($x:expr),*) => (
arr_impl!($T; $crate::typenum::U0, [], [$($x),*])
);
($($x:expr,)+) => (arr![$($x),*]);
() => ("""Macro requires a type, e.g. `let array = arr![u32; 1, 2, 3];`")
}

View File

@ -0,0 +1,101 @@
//! Generic array are commonly used as a return value for hash digests, so
//! it's a good idea to allow to hexlify them easily. This module implements
//! `std::fmt::LowerHex` and `std::fmt::UpperHex` traits.
//!
//! Example:
//!
//! ```rust
//! # #[macro_use]
//! # extern crate generic_array;
//! # extern crate typenum;
//! # fn main() {
//! let array = arr![u8; 10, 20, 30];
//! assert_eq!(format!("{:x}", array), "0a141e");
//! # }
//! ```
//!
use {ArrayLength, GenericArray};
use core::fmt;
use core::ops::Add;
use core::str;
use typenum::*;
static LOWER_CHARS: &'static [u8] = b"0123456789abcdef";
static UPPER_CHARS: &'static [u8] = b"0123456789ABCDEF";
impl<T: ArrayLength<u8>> fmt::LowerHex for GenericArray<u8, T>
where
T: Add<T>,
<T as Add<T>>::Output: ArrayLength<u8>,
{
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let max_digits = f.precision().unwrap_or_else(|| self.len());
if T::to_usize() < 1024 {
// For small arrays use a stack allocated
// buffer of 2x number of bytes
let mut res = GenericArray::<u8, Sum<T, T>>::default();
for (i, c) in self.iter().take(max_digits).enumerate() {
res[i * 2] = LOWER_CHARS[(c >> 4) as usize];
res[i * 2 + 1] = LOWER_CHARS[(c & 0xF) as usize];
}
f.write_str(
unsafe { str::from_utf8_unchecked(&res[..max_digits * 2]) },
)?;
} else {
// For large array use chunks of up to 1024 bytes (2048 hex chars)
let mut buf = [0u8; 2048];
for chunk in self[..max_digits].chunks(1024) {
for (i, c) in chunk.iter().enumerate() {
buf[i * 2] = LOWER_CHARS[(c >> 4) as usize];
buf[i * 2 + 1] = LOWER_CHARS[(c & 0xF) as usize];
}
f.write_str(unsafe {
str::from_utf8_unchecked(&buf[..chunk.len() * 2])
})?;
}
}
Ok(())
}
}
impl<T: ArrayLength<u8>> fmt::UpperHex for GenericArray<u8, T>
where
T: Add<T>,
<T as Add<T>>::Output: ArrayLength<u8>,
{
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let max_digits = f.precision().unwrap_or_else(|| self.len());
if T::to_usize() < 1024 {
// For small arrays use a stack allocated
// buffer of 2x number of bytes
let mut res = GenericArray::<u8, Sum<T, T>>::default();
for (i, c) in self.iter().take(max_digits).enumerate() {
res[i * 2] = UPPER_CHARS[(c >> 4) as usize];
res[i * 2 + 1] = UPPER_CHARS[(c & 0xF) as usize];
}
f.write_str(
unsafe { str::from_utf8_unchecked(&res[..max_digits * 2]) },
)?;
} else {
// For large array use chunks of up to 1024 bytes (2048 hex chars)
let mut buf = [0u8; 2048];
for chunk in self[..max_digits].chunks(1024) {
for (i, c) in chunk.iter().enumerate() {
buf[i * 2] = UPPER_CHARS[(c >> 4) as usize];
buf[i * 2 + 1] = UPPER_CHARS[(c & 0xF) as usize];
}
f.write_str(unsafe {
str::from_utf8_unchecked(&buf[..chunk.len() * 2])
})?;
}
}
Ok(())
}
}

View File

@ -0,0 +1,68 @@
//! Serde serialization/deserialization implementation
use {ArrayLength, GenericArray};
use core::fmt;
use core::marker::PhantomData;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use serde::de::{self, SeqAccess, Visitor};
impl<T, N> Serialize for GenericArray<T, N>
where
T: Serialize,
N: ArrayLength<T>,
{
#[inline]
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.collect_seq(self.iter())
}
}
struct GAVisitor<T, N> {
_t: PhantomData<T>,
_n: PhantomData<N>,
}
impl<'de, T, N> Visitor<'de> for GAVisitor<T, N>
where
T: Deserialize<'de> + Default,
N: ArrayLength<T>,
{
type Value = GenericArray<T, N>;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("struct GenericArray")
}
fn visit_seq<A>(self, mut seq: A) -> Result<GenericArray<T, N>, A::Error>
where
A: SeqAccess<'de>,
{
let mut result = GenericArray::default();
for i in 0..N::to_usize() {
result[i] = seq.next_element()?.ok_or_else(
|| de::Error::invalid_length(i, &self),
)?;
}
Ok(result)
}
}
impl<'de, T, N> Deserialize<'de> for GenericArray<T, N>
where
T: Deserialize<'de> + Default,
N: ArrayLength<T>,
{
fn deserialize<D>(deserializer: D) -> Result<GenericArray<T, N>, D::Error>
where
D: Deserializer<'de>,
{
let visitor = GAVisitor {
_t: PhantomData,
_n: PhantomData,
};
deserializer.deserialize_seq(visitor)
}
}

View File

@ -0,0 +1,171 @@
use super::{ArrayLength, GenericArray};
use core::borrow::{Borrow, BorrowMut};
use core::cmp::Ordering;
use core::fmt::{self, Debug};
use core::hash::{Hash, Hasher};
impl<T: Default, N> Default for GenericArray<T, N>
where
N: ArrayLength<T>,
{
#[inline]
fn default() -> Self {
Self::generate(|_| T::default())
}
}
impl<T: Clone, N> Clone for GenericArray<T, N>
where
N: ArrayLength<T>,
{
fn clone(&self) -> GenericArray<T, N> {
self.map_ref(|x| x.clone())
}
}
impl<T: Copy, N> Copy for GenericArray<T, N>
where
N: ArrayLength<T>,
N::ArrayType: Copy,
{
}
impl<T: PartialEq, N> PartialEq for GenericArray<T, N>
where
N: ArrayLength<T>,
{
fn eq(&self, other: &Self) -> bool {
**self == **other
}
}
impl<T: Eq, N> Eq for GenericArray<T, N>
where
N: ArrayLength<T>,
{
}
impl<T: PartialOrd, N> PartialOrd for GenericArray<T, N>
where
N: ArrayLength<T>,
{
fn partial_cmp(&self, other: &GenericArray<T, N>) -> Option<Ordering> {
PartialOrd::partial_cmp(self.as_slice(), other.as_slice())
}
}
impl<T: Ord, N> Ord for GenericArray<T, N>
where
N: ArrayLength<T>,
{
fn cmp(&self, other: &GenericArray<T, N>) -> Ordering {
Ord::cmp(self.as_slice(), other.as_slice())
}
}
impl<T: Debug, N> Debug for GenericArray<T, N>
where
N: ArrayLength<T>,
{
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
self[..].fmt(fmt)
}
}
impl<T, N> Borrow<[T]> for GenericArray<T, N>
where
N: ArrayLength<T>,
{
fn borrow(&self) -> &[T] {
&self[..]
}
}
impl<T, N> BorrowMut<[T]> for GenericArray<T, N>
where
N: ArrayLength<T>,
{
fn borrow_mut(&mut self) -> &mut [T] {
&mut self[..]
}
}
impl<T, N> AsRef<[T]> for GenericArray<T, N>
where
N: ArrayLength<T>,
{
fn as_ref(&self) -> &[T] {
&self[..]
}
}
impl<T, N> AsMut<[T]> for GenericArray<T, N>
where
N: ArrayLength<T>,
{
fn as_mut(&mut self) -> &mut [T] {
&mut self[..]
}
}
impl<T: Hash, N> Hash for GenericArray<T, N>
where
N: ArrayLength<T>,
{
fn hash<H>(&self, state: &mut H)
where
H: Hasher,
{
Hash::hash(&self[..], state)
}
}
macro_rules! impl_from {
($($n: expr => $ty: ty),*) => {
$(
impl<T> From<[T; $n]> for GenericArray<T, $ty> {
fn from(arr: [T; $n]) -> Self {
use core::mem::{forget, transmute_copy};
let x = unsafe { transmute_copy(&arr) };
forget(arr);
x
}
}
)*
}
}
impl_from! {
1 => ::typenum::U1,
2 => ::typenum::U2,
3 => ::typenum::U3,
4 => ::typenum::U4,
5 => ::typenum::U5,
6 => ::typenum::U6,
7 => ::typenum::U7,
8 => ::typenum::U8,
9 => ::typenum::U9,
10 => ::typenum::U10,
11 => ::typenum::U11,
12 => ::typenum::U12,
13 => ::typenum::U13,
14 => ::typenum::U14,
15 => ::typenum::U15,
16 => ::typenum::U16,
17 => ::typenum::U17,
18 => ::typenum::U18,
19 => ::typenum::U19,
20 => ::typenum::U20,
21 => ::typenum::U21,
22 => ::typenum::U22,
23 => ::typenum::U23,
24 => ::typenum::U24,
25 => ::typenum::U25,
26 => ::typenum::U26,
27 => ::typenum::U27,
28 => ::typenum::U28,
29 => ::typenum::U29,
30 => ::typenum::U30,
31 => ::typenum::U31,
32 => ::typenum::U32
}

View File

@ -0,0 +1,117 @@
//! `GenericArray` iterator implementation.
use super::{ArrayLength, GenericArray};
use core::{cmp, ptr};
use core::mem::ManuallyDrop;
/// An iterator that moves out of a `GenericArray`
pub struct GenericArrayIter<T, N: ArrayLength<T>> {
// Invariants: index <= index_back <= N
// Only values in array[index..index_back] are alive at any given time.
// Values from array[..index] and array[index_back..] are already moved/dropped.
array: ManuallyDrop<GenericArray<T, N>>,
index: usize,
index_back: usize,
}
impl<T, N> IntoIterator for GenericArray<T, N>
where
N: ArrayLength<T>,
{
type Item = T;
type IntoIter = GenericArrayIter<T, N>;
fn into_iter(self) -> Self::IntoIter {
GenericArrayIter {
array: ManuallyDrop::new(self),
index: 0,
index_back: N::to_usize(),
}
}
}
impl<T, N> Drop for GenericArrayIter<T, N>
where
N: ArrayLength<T>,
{
fn drop(&mut self) {
// Drop values that are still alive.
for p in &mut self.array[self.index..self.index_back] {
unsafe {
ptr::drop_in_place(p);
}
}
}
}
impl<T, N> Iterator for GenericArrayIter<T, N>
where
N: ArrayLength<T>,
{
type Item = T;
fn next(&mut self) -> Option<T> {
if self.len() > 0 {
unsafe {
let p = self.array.get_unchecked(self.index);
self.index += 1;
Some(ptr::read(p))
}
} else {
None
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
let len = self.len();
(len, Some(len))
}
fn count(self) -> usize {
self.len()
}
fn nth(&mut self, n: usize) -> Option<T> {
// First consume values prior to the nth.
let ndrop = cmp::min(n, self.len());
for p in &mut self.array[self.index..self.index + ndrop] {
self.index += 1;
unsafe {
ptr::drop_in_place(p);
}
}
self.next()
}
fn last(mut self) -> Option<T> {
// Note, everything else will correctly drop first as `self` leaves scope.
self.next_back()
}
}
impl<T, N> DoubleEndedIterator for GenericArrayIter<T, N>
where
N: ArrayLength<T>,
{
fn next_back(&mut self) -> Option<T> {
if self.len() > 0 {
self.index_back -= 1;
unsafe {
let p = self.array.get_unchecked(self.index_back);
Some(ptr::read(p))
}
} else {
None
}
}
}
impl<T, N> ExactSizeIterator for GenericArrayIter<T, N>
where
N: ArrayLength<T>,
{
fn len(&self) -> usize {
self.index_back - self.index
}
}

View File

@ -0,0 +1,464 @@
//! This crate implements a structure that can be used as a generic array type.use
//! Core Rust array types `[T; N]` can't be used generically with
//! respect to `N`, so for example this:
//!
//! ```{should_fail}
//! struct Foo<T, N> {
//! data: [T; N]
//! }
//! ```
//!
//! won't work.
//!
//! **generic-array** exports a `GenericArray<T,N>` type, which lets
//! the above be implemented as:
//!
//! ```
//! # use generic_array::{ArrayLength, GenericArray};
//! struct Foo<T, N: ArrayLength<T>> {
//! data: GenericArray<T,N>
//! }
//! ```
//!
//! The `ArrayLength<T>` trait is implemented by default for
//! [unsigned integer types](../typenum/uint/index.html) from
//! [typenum](../typenum/index.html).
//!
//! For ease of use, an `arr!` macro is provided - example below:
//!
//! ```
//! # #[macro_use]
//! # extern crate generic_array;
//! # extern crate typenum;
//! # fn main() {
//! let array = arr![u32; 1, 2, 3];
//! assert_eq!(array[2], 3);
//! # }
//! ```
//#![deny(missing_docs)]
#![no_std]
pub extern crate typenum;
#[cfg(feature = "serde")]
extern crate serde;
mod hex;
mod impls;
#[cfg(feature = "serde")]
pub mod impl_serde;
use core::{mem, ptr, slice};
use core::marker::PhantomData;
use core::mem::ManuallyDrop;
pub use core::mem::transmute;
use core::ops::{Deref, DerefMut};
use typenum::bit::{B0, B1};
use typenum::uint::{UInt, UTerm, Unsigned};
#[cfg_attr(test, macro_use)]
pub mod arr;
pub mod iter;
pub use iter::GenericArrayIter;
/// Trait making `GenericArray` work, marking types to be used as length of an array
pub unsafe trait ArrayLength<T>: Unsigned {
/// Associated type representing the array type for the number
type ArrayType;
}
unsafe impl<T> ArrayLength<T> for UTerm {
#[doc(hidden)]
type ArrayType = ();
}
/// Internal type used to generate a struct of appropriate size
#[allow(dead_code)]
#[repr(C)]
#[doc(hidden)]
pub struct GenericArrayImplEven<T, U> {
parent1: U,
parent2: U,
_marker: PhantomData<T>,
}
impl<T: Clone, U: Clone> Clone for GenericArrayImplEven<T, U> {
fn clone(&self) -> GenericArrayImplEven<T, U> {
GenericArrayImplEven {
parent1: self.parent1.clone(),
parent2: self.parent2.clone(),
_marker: PhantomData,
}
}
}
impl<T: Copy, U: Copy> Copy for GenericArrayImplEven<T, U> {}
/// Internal type used to generate a struct of appropriate size
#[allow(dead_code)]
#[repr(C)]
#[doc(hidden)]
pub struct GenericArrayImplOdd<T, U> {
parent1: U,
parent2: U,
data: T,
}
impl<T: Clone, U: Clone> Clone for GenericArrayImplOdd<T, U> {
fn clone(&self) -> GenericArrayImplOdd<T, U> {
GenericArrayImplOdd {
parent1: self.parent1.clone(),
parent2: self.parent2.clone(),
data: self.data.clone(),
}
}
}
impl<T: Copy, U: Copy> Copy for GenericArrayImplOdd<T, U> {}
unsafe impl<T, N: ArrayLength<T>> ArrayLength<T> for UInt<N, B0> {
#[doc(hidden)]
type ArrayType = GenericArrayImplEven<T, N::ArrayType>;
}
unsafe impl<T, N: ArrayLength<T>> ArrayLength<T> for UInt<N, B1> {
#[doc(hidden)]
type ArrayType = GenericArrayImplOdd<T, N::ArrayType>;
}
/// Struct representing a generic array - `GenericArray<T, N>` works like [T; N]
#[allow(dead_code)]
pub struct GenericArray<T, U: ArrayLength<T>> {
data: U::ArrayType,
}
impl<T, N> Deref for GenericArray<T, N>
where
N: ArrayLength<T>,
{
type Target = [T];
fn deref(&self) -> &[T] {
unsafe { slice::from_raw_parts(self as *const Self as *const T, N::to_usize()) }
}
}
impl<T, N> DerefMut for GenericArray<T, N>
where
N: ArrayLength<T>,
{
fn deref_mut(&mut self) -> &mut [T] {
unsafe { slice::from_raw_parts_mut(self as *mut Self as *mut T, N::to_usize()) }
}
}
struct ArrayBuilder<T, N: ArrayLength<T>> {
array: ManuallyDrop<GenericArray<T, N>>,
position: usize,
}
impl<T, N: ArrayLength<T>> ArrayBuilder<T, N> {
fn new() -> ArrayBuilder<T, N> {
ArrayBuilder {
array: ManuallyDrop::new(unsafe { mem::uninitialized() }),
position: 0,
}
}
fn into_inner(self) -> GenericArray<T, N> {
let array = unsafe { ptr::read(&self.array) };
mem::forget(self);
ManuallyDrop::into_inner(array)
}
}
impl<T, N: ArrayLength<T>> Drop for ArrayBuilder<T, N> {
fn drop(&mut self) {
for value in self.array.iter_mut().take(self.position) {
unsafe {
ptr::drop_in_place(value);
}
}
}
}
struct ArrayConsumer<T, N: ArrayLength<T>> {
array: ManuallyDrop<GenericArray<T, N>>,
position: usize,
}
impl<T, N: ArrayLength<T>> ArrayConsumer<T, N> {
fn new(array: GenericArray<T, N>) -> ArrayConsumer<T, N> {
ArrayConsumer {
array: ManuallyDrop::new(array),
position: 0,
}
}
}
impl<T, N: ArrayLength<T>> Drop for ArrayConsumer<T, N> {
fn drop(&mut self) {
for i in self.position..N::to_usize() {
unsafe {
ptr::drop_in_place(self.array.get_unchecked_mut(i));
}
}
}
}
impl<T, N> GenericArray<T, N>
where
N: ArrayLength<T>,
{
/// Initializes a new `GenericArray` instance using the given function.
///
/// If the generator function panics while initializing the array,
/// any already initialized elements will be dropped.
pub fn generate<F>(f: F) -> GenericArray<T, N>
where
F: Fn(usize) -> T,
{
let mut destination = ArrayBuilder::new();
for (i, dst) in destination.array.iter_mut().enumerate() {
unsafe {
ptr::write(dst, f(i));
}
destination.position += 1;
}
destination.into_inner()
}
/// Map a function over a slice to a `GenericArray`.
///
/// The length of the slice *must* be equal to the length of the array.
#[inline]
pub fn map_slice<S, F: Fn(&S) -> T>(s: &[S], f: F) -> GenericArray<T, N> {
assert_eq!(s.len(), N::to_usize());
Self::generate(|i| f(unsafe { s.get_unchecked(i) }))
}
/// Maps a `GenericArray` to another `GenericArray`.
///
/// If the mapping function panics, any already initialized elements in the new array
/// will be dropped, AND any unused elements in the source array will also be dropped.
pub fn map<U, F>(self, f: F) -> GenericArray<U, N>
where
F: Fn(T) -> U,
N: ArrayLength<U>,
{
let mut source = ArrayConsumer::new(self);
let mut destination = ArrayBuilder::new();
for (dst, src) in destination.array.iter_mut().zip(source.array.iter()) {
unsafe {
ptr::write(dst, f(ptr::read(src)));
}
source.position += 1;
destination.position += 1;
}
destination.into_inner()
}
/// Maps a `GenericArray` to another `GenericArray` by reference.
///
/// If the mapping function panics, any already initialized elements will be dropped.
#[inline]
pub fn map_ref<U, F>(&self, f: F) -> GenericArray<U, N>
where
F: Fn(&T) -> U,
N: ArrayLength<U>,
{
GenericArray::generate(|i| f(unsafe { self.get_unchecked(i) }))
}
/// Combines two `GenericArray` instances and iterates through both of them,
/// initializing a new `GenericArray` with the result of the zipped mapping function.
///
/// If the mapping function panics, any already initialized elements in the new array
/// will be dropped, AND any unused elements in the source arrays will also be dropped.
pub fn zip<B, U, F>(self, rhs: GenericArray<B, N>, f: F) -> GenericArray<U, N>
where
F: Fn(T, B) -> U,
N: ArrayLength<B> + ArrayLength<U>,
{
let mut left = ArrayConsumer::new(self);
let mut right = ArrayConsumer::new(rhs);
let mut destination = ArrayBuilder::new();
for (dst, (lhs, rhs)) in
destination.array.iter_mut().zip(left.array.iter().zip(
right.array.iter(),
))
{
unsafe {
ptr::write(dst, f(ptr::read(lhs), ptr::read(rhs)));
}
destination.position += 1;
left.position += 1;
right.position += 1;
}
destination.into_inner()
}
/// Combines two `GenericArray` instances and iterates through both of them by reference,
/// initializing a new `GenericArray` with the result of the zipped mapping function.
///
/// If the mapping function panics, any already initialized elements will be dropped.
pub fn zip_ref<B, U, F>(&self, rhs: &GenericArray<B, N>, f: F) -> GenericArray<U, N>
where
F: Fn(&T, &B) -> U,
N: ArrayLength<B> + ArrayLength<U>,
{
GenericArray::generate(|i| unsafe {
f(self.get_unchecked(i), rhs.get_unchecked(i))
})
}
/// Extracts a slice containing the entire array.
#[inline]
pub fn as_slice(&self) -> &[T] {
self.deref()
}
/// Extracts a mutable slice containing the entire array.
#[inline]
pub fn as_mut_slice(&mut self) -> &mut [T] {
self.deref_mut()
}
/// Converts slice to a generic array reference with inferred length;
///
/// Length of the slice must be equal to the length of the array.
#[inline]
pub fn from_slice(slice: &[T]) -> &GenericArray<T, N> {
assert_eq!(slice.len(), N::to_usize());
unsafe { &*(slice.as_ptr() as *const GenericArray<T, N>) }
}
/// Converts mutable slice to a mutable generic array reference
///
/// Length of the slice must be equal to the length of the array.
#[inline]
pub fn from_mut_slice(slice: &mut [T]) -> &mut GenericArray<T, N> {
assert_eq!(slice.len(), N::to_usize());
unsafe { &mut *(slice.as_mut_ptr() as *mut GenericArray<T, N>) }
}
}
impl<T: Clone, N> GenericArray<T, N>
where
N: ArrayLength<T>,
{
/// Construct a `GenericArray` from a slice by cloning its content
///
/// Length of the slice must be equal to the length of the array
#[inline]
pub fn clone_from_slice(list: &[T]) -> GenericArray<T, N> {
Self::from_exact_iter(list.iter().cloned()).expect(
"Slice must be the same length as the array",
)
}
}
impl<T, N> GenericArray<T, N>
where
N: ArrayLength<T>,
{
pub fn from_exact_iter<I>(iter: I) -> Option<Self>
where
I: IntoIterator<Item = T>,
<I as IntoIterator>::IntoIter: ExactSizeIterator,
{
let iter = iter.into_iter();
if iter.len() == N::to_usize() {
let mut destination = ArrayBuilder::new();
for (dst, src) in destination.array.iter_mut().zip(iter.into_iter()) {
unsafe {
ptr::write(dst, src);
}
destination.position += 1;
}
let array = unsafe { ptr::read(&destination.array) };
mem::forget(destination);
Some(ManuallyDrop::into_inner(array))
} else {
None
}
}
}
impl<T, N> ::core::iter::FromIterator<T> for GenericArray<T, N>
where
N: ArrayLength<T>,
T: Default,
{
fn from_iter<I>(iter: I) -> GenericArray<T, N>
where
I: IntoIterator<Item = T>,
{
let mut destination = ArrayBuilder::new();
let defaults = ::core::iter::repeat(()).map(|_| T::default());
for (dst, src) in destination.array.iter_mut().zip(
iter.into_iter().chain(defaults),
)
{
unsafe {
ptr::write(dst, src);
}
}
destination.into_inner()
}
}
#[cfg(test)]
mod test {
// Compile with:
// cargo rustc --lib --profile test --release --
// -C target-cpu=native -C opt-level=3 --emit asm
// and view the assembly to make sure test_assembly generates
// SIMD instructions instead of a niave loop.
#[inline(never)]
pub fn black_box<T>(val: T) -> T {
use core::{mem, ptr};
let ret = unsafe { ptr::read_volatile(&val) };
mem::forget(val);
ret
}
#[test]
fn test_assembly() {
let a = black_box(arr![i32; 1, 3, 5, 7]);
let b = black_box(arr![i32; 2, 4, 6, 8]);
let c = a.zip_ref(&b, |l, r| l + r);
assert_eq!(c, arr![i32; 3, 7, 11, 15]);
}
}

View File

@ -0,0 +1,44 @@
#[macro_use]
extern crate generic_array;
extern crate typenum;
use generic_array::GenericArray;
use std::str::from_utf8;
use typenum::U2048;
#[test]
fn short_lower_hex() {
let ar = arr![u8; 10, 20, 30];
assert_eq!(format!("{:x}", ar), "0a141e");
}
#[test]
fn short_upper_hex() {
let ar = arr![u8; 30, 20, 10];
assert_eq!(format!("{:X}", ar), "1E140A");
}
#[test]
fn long_lower_hex() {
let ar = GenericArray::<u8, U2048>::default();
assert_eq!(format!("{:x}", ar), from_utf8(&[b'0'; 4096]).unwrap());
}
#[test]
fn long_upper_hex() {
let ar = GenericArray::<u8, U2048>::default();
assert_eq!(format!("{:X}", ar), from_utf8(&[b'0'; 4096]).unwrap());
}
#[test]
fn truncated_lower_hex() {
let ar = arr![u8; 10, 20, 30, 40, 50];
assert_eq!(format!("{:.2x}", ar), "0a14");
}
#[test]
fn truncated_upper_hex() {
let ar = arr![u8; 30, 20, 10, 17, 0];
assert_eq!(format!("{:.4X}", ar), "1E140A11");
}

View File

@ -0,0 +1,10 @@
#[macro_use]
extern crate generic_array as gen_arr;
use gen_arr::typenum;
#[test]
fn test_different_crate_name() {
let _: gen_arr::GenericArray<u32, typenum::U4> = arr![u32; 0, 1, 2, 3];
let _: gen_arr::GenericArray<u32, typenum::U0> = arr![u32;];
}

View File

@ -0,0 +1,169 @@
#![recursion_limit="128"]
#![no_std]
#[macro_use]
extern crate generic_array;
use core::cell::Cell;
use core::ops::Drop;
use generic_array::GenericArray;
use generic_array::typenum::{U1, U3, U4, U97};
#[test]
fn test() {
let mut list97 = [0; 97];
for i in 0..97 {
list97[i] = i as i32;
}
let l: GenericArray<i32, U97> = GenericArray::clone_from_slice(&list97);
assert_eq!(l[0], 0);
assert_eq!(l[1], 1);
assert_eq!(l[32], 32);
assert_eq!(l[56], 56);
}
#[test]
fn test_drop() {
#[derive(Clone)]
struct TestDrop<'a>(&'a Cell<u32>);
impl<'a> Drop for TestDrop<'a> {
fn drop(&mut self) {
self.0.set(self.0.get() + 1);
}
}
let drop_counter = Cell::new(0);
{
let _: GenericArray<TestDrop, U3> =
arr![TestDrop; TestDrop(&drop_counter),
TestDrop(&drop_counter),
TestDrop(&drop_counter)];
}
assert_eq!(drop_counter.get(), 3);
}
#[test]
fn test_arr() {
let test: GenericArray<u32, U3> = arr![u32; 1, 2, 3];
assert_eq!(test[1], 2);
}
#[test]
fn test_copy() {
let test = arr![u32; 1, 2, 3];
let test2 = test;
// if GenericArray is not copy, this should fail as a use of a moved value
assert_eq!(test[1], 2);
assert_eq!(test2[0], 1);
}
#[test]
fn test_iter_flat_map() {
assert!((0..5).flat_map(|i| arr![i32; 2 * i, 2 * i + 1]).eq(0..10));
}
#[derive(Debug, PartialEq, Eq)]
struct NoClone<T>(T);
#[test]
fn test_from_slice() {
let arr = [1, 2, 3, 4];
let gen_arr = GenericArray::<_, U3>::from_slice(&arr[..3]);
assert_eq!(&arr[..3], gen_arr.as_slice());
let arr = [NoClone(1u32), NoClone(2), NoClone(3), NoClone(4)];
let gen_arr = GenericArray::<_, U3>::from_slice(&arr[..3]);
assert_eq!(&arr[..3], gen_arr.as_slice());
}
#[test]
fn test_from_mut_slice() {
let mut arr = [1, 2, 3, 4];
{
let gen_arr = GenericArray::<_, U3>::from_mut_slice(&mut arr[..3]);
gen_arr[2] = 10;
}
assert_eq!(arr, [1, 2, 10, 4]);
let mut arr = [NoClone(1u32), NoClone(2), NoClone(3), NoClone(4)];
{
let gen_arr = GenericArray::<_, U3>::from_mut_slice(&mut arr[..3]);
gen_arr[2] = NoClone(10);
}
assert_eq!(arr, [NoClone(1), NoClone(2), NoClone(10), NoClone(4)]);
}
#[test]
fn test_default() {
let arr = GenericArray::<u8, U1>::default();
assert_eq!(arr[0], 0);
}
#[test]
fn test_from() {
let data = [(1, 2, 3), (4, 5, 6), (7, 8, 9)];
let garray: GenericArray<(usize, usize, usize), U3> = data.into();
assert_eq!(&data, garray.as_slice());
}
#[test]
fn test_unit_macro() {
let arr = arr![f32; 3.14];
assert_eq!(arr[0], 3.14);
}
#[test]
fn test_empty_macro() {
let _arr = arr![f32;];
}
#[test]
fn test_cmp() {
arr![u8; 0x00].cmp(&arr![u8; 0x00]);
}
/// This test should cause a helpful compile error if uncommented.
// #[test]
// fn test_empty_macro2(){
// let arr = arr![];
// }
#[cfg(feature = "serde")]
mod impl_serde {
extern crate serde_json;
use generic_array::GenericArray;
use generic_array::typenum::U6;
#[test]
fn test_serde_implementation() {
let array: GenericArray<f64, U6> = arr![f64; 0.0, 5.0, 3.0, 7.07192, 76.0, -9.0];
let string = serde_json::to_string(&array).unwrap();
assert_eq!(string, "[0.0,5.0,3.0,7.07192,76.0,-9.0]");
let test_array: GenericArray<f64, U6> = serde_json::from_str(&string).unwrap();
assert_eq!(test_array, array);
}
}
#[test]
fn test_map() {
let b: GenericArray<i32, U4> = GenericArray::generate(|i| i as i32 * 4).map(|x| x - 3);
assert_eq!(b, arr![i32; -3, 1, 5, 9]);
}
#[test]
fn test_zip() {
let a: GenericArray<_, U4> = GenericArray::generate(|i| i + 1);
let b: GenericArray<_, U4> = GenericArray::generate(|i| i as i32 * 4);
let c = a.zip(b, |r, l| r as i32 + l);
assert_eq!(c, arr![i32; 1, 6, 11, 16]);
}
#[test]
fn test_from_iter() {
use core::iter::repeat;
let a: GenericArray<_, U4> = repeat(11).take(3).collect();
assert_eq!(a, arr![i32; 11, 11, 11, 0]);
}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,65 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g. crates.io) dependencies
#
# If you believe there's an error in this file please file an
# issue against the rust-lang/cargo repository. If you're
# editing this file be aware that the upstream Cargo.toml
# will likely look very different (and much more reasonable)
[package]
name = "lalrpop-snap"
version = "0.16.0"
authors = ["Niko Matsakis <niko@alum.mit.edu>"]
description = "convenient LR(1) parser generator"
readme = "../README.md"
keywords = ["parser", "generator", "LR", "yacc", "grammar"]
categories = ["parsing"]
license = "Apache-2.0/MIT"
repository = "https://github.com/lalrpop/lalrpop"
[lib]
doctest = false
[dependencies.ascii-canvas]
version = "1.0"
[dependencies.atty]
version = "0.2"
[dependencies.bit-set]
version = "0.5.0"
[dependencies.diff]
version = "0.1.9"
[dependencies.ena]
version = "0.9"
[dependencies.itertools]
version = "0.7"
[dependencies.lalrpop-util]
version = "0.16.0"
[dependencies.petgraph]
version = "0.4.13"
[dependencies.regex]
version = "1"
[dependencies.regex-syntax]
version = "0.4.0"
[dependencies.string_cache]
version = "0.7.1"
[dependencies.term]
version = "0.4.5"
[dependencies.unicode-xid]
version = "0.1"
[dev-dependencies.rand]
version = "0.4"

View File

@ -0,0 +1,182 @@
use build;
use log::Level;
use session::{ColorConfig, Session};
use std::default::Default;
use std::env;
use std::env::current_dir;
use std::error::Error;
use std::path::{Path, PathBuf};
use std::rc::Rc;
/// Configure various aspects of how LALRPOP works.
/// Intended for use within a `build.rs` script.
/// To get the default configuration, use `Configuration::new`.
#[derive(Clone, Default)]
pub struct Configuration {
session: Session,
}
impl Configuration {
/// Creates the default configuration; equivalent to `Configuration::default`.
pub fn new() -> Configuration {
Configuration::default()
}
/// Always use ANSI colors in output, even if output does not appear to be a TTY.
pub fn always_use_colors(&mut self) -> &mut Configuration {
self.session.color_config = ColorConfig::Yes;
self
}
/// Never use ANSI colors in output, even if output appears to be a TTY.
pub fn never_use_colors(&mut self) -> &mut Configuration {
self.session.color_config = ColorConfig::No;
self
}
/// Use ANSI colors in output if output appears to be a TTY, but
/// not otherwise. This is the default.
pub fn use_colors_if_tty(&mut self) -> &mut Configuration {
self.session.color_config = ColorConfig::IfTty;
self
}
/// Specify a custom directory to search for input files. This
/// directory is recursively searched for `.lalrpop` files to be
/// considered as input files. This configuration setting also
/// impacts where output files are placed; paths are made relative
/// to the input path before being resolved relative to the output
/// path. By default, the input directory is the current working
/// directory.
pub fn set_in_dir<P>(&mut self, dir: P) -> &mut Self
where
P: Into<PathBuf>,
{
self.session.in_dir = Some(dir.into());
self
}
/// Specify a custom directory to use when writing output files.
/// By default, the output directory is the same as the input
/// directory.
pub fn set_out_dir<P>(&mut self, dir: P) -> &mut Self
where
P: Into<PathBuf>,
{
self.session.out_dir = Some(dir.into());
self
}
/// Apply `cargo` directory location conventions, by setting the
/// input directory to `src` and the output directory to
/// `$OUT_DIR`.
pub fn use_cargo_dir_conventions(&mut self) -> &mut Self {
self.set_in_dir("src")
.set_out_dir(env::var("OUT_DIR").unwrap());
self
}
/// If true, always convert `.lalrpop` files into `.rs` files, even if the
/// `.rs` file is newer. Default is false.
pub fn force_build(&mut self, val: bool) -> &mut Configuration {
self.session.force_build = val;
self
}
/// If true, emit comments into the generated code. This makes the
/// generated code significantly larger. Default is false.
pub fn emit_comments(&mut self, val: bool) -> &mut Configuration {
self.session.emit_comments = val;
self
}
/// If true, emit report file about generated code.
pub fn emit_report(&mut self, val: bool) -> &mut Configuration {
self.session.emit_report = val;
self
}
/// Minimal logs: only for errors that halt progress.
pub fn log_quiet(&mut self) -> &mut Configuration {
self.session.log.set_level(Level::Taciturn);
self
}
/// Informative logs: give some high-level indications of
/// progress (default).
pub fn log_info(&mut self) -> &mut Configuration {
self.session.log.set_level(Level::Informative);
self
}
/// Verbose logs: more than info, but still not overwhelming.
pub fn log_verbose(&mut self) -> &mut Configuration {
self.session.log.set_level(Level::Verbose);
self
}
/// Debug logs: better redirect this to a file. Intended for
/// debugging LALRPOP itself.
pub fn log_debug(&mut self) -> &mut Configuration {
self.session.log.set_level(Level::Debug);
self
}
/// Enables "unit-testing" configuration. This is only for
/// lalrpop-test.
#[doc(hidden)]
pub fn unit_test(&mut self) -> &mut Configuration {
self.session.unit_test = true;
self
}
/// Process all files according to the `set_in_dir` and
/// `set_out_dir` configuration.
pub fn process(&self) -> Result<(), Box<Error>> {
let root = if let Some(ref d) = self.session.in_dir {
d.as_path()
} else {
Path::new(".")
};
self.process_dir(root)
}
/// Process all files in the current directory, which -- unless you
/// have changed it -- is typically the root of the crate being compiled.
pub fn process_current_dir(&self) -> Result<(), Box<Error>> {
self.process_dir(try!(current_dir()))
}
/// Process all `.lalrpop` files in `path`.
pub fn process_dir<P: AsRef<Path>>(&self, path: P) -> Result<(), Box<Error>> {
let session = Rc::new(self.session.clone());
try!(build::process_dir(session, path));
Ok(())
}
/// Process the given `.lalrpop` file.
pub fn process_file<P: AsRef<Path>>(&self, path: P) -> Result<(), Box<Error>> {
let session = Rc::new(self.session.clone());
try!(build::process_file(session, path));
Ok(())
}
}
/// Process all files in the current directory, which -- unless you
/// have changed it -- is typically the root of the crate being compiled.
///
/// Equivalent to `Configuration::new().process_current_dir()`.
pub fn process_root() -> Result<(), Box<Error>> {
Configuration::new().process_current_dir()
}
/// Deprecated in favor of `Configuration`. Try:
///
/// ```rust
/// Configuration::new().force_build(true).process_current_dir()
/// ```
///
/// instead.
pub fn process_root_unconditionally() -> Result<(), Box<Error>> {
Configuration::new().force_build(true).process_current_dir()
}

View File

@ -0,0 +1,422 @@
//! Code for generating action code.
//!
//! From the outside, action fns have one of two forms. If they take
//! symbols as input, e.g. from a production like `X = Y Z => ...`
//! (which takes Y and Z as input), they have this form:
//!
//! ```
//! fn __action17<
//! 'input, // user-declared type parameters (*)
//! >(
//! input: &'input str, // user-declared parameters
//! __0: (usize, usize, usize), // symbols being reduced, if any
//! ...
//! __N: (usize, Foo, usize), // each has a type (L, T, L)
//! ) -> Box<Expr<'input>>
//! ```
//!
//! Otherwise, they have this form:
//!
//! ```
//! fn __action17<
//! 'input, // user-declared type parameters (*)
//! >(
//! input: &'input str, // user-declared parameters
//! __lookbehind: &usize, // value for @R -- "end of previous token"
//! __lookahead: &usize, // value for @L -- "start of next token"
//! ) -> Box<Expr<'input>>
//! ```
//!
//! * -- in this case, those "user-declared" parameters are inserted by
//! the "internal tokenizer".
use grammar::repr as r;
use rust::RustWrite;
use std::io::{self, Write};
pub fn emit_action_code<W: Write>(grammar: &r::Grammar, rust: &mut RustWrite<W>) -> io::Result<()> {
for (i, defn) in grammar.action_fn_defns.iter().enumerate() {
rust!(rust, "");
// we always thread the parameters through to the action code,
// even if they are not used, and hence we need to disable the
// unused variables lint, which otherwise gets very excited.
if !grammar.parameters.is_empty() {
rust!(rust, "#[allow(unused_variables)]");
}
match defn.kind {
r::ActionFnDefnKind::User(ref data) => {
try!(emit_user_action_code(grammar, rust, i, defn, data))
}
r::ActionFnDefnKind::Lookaround(ref variant) => {
try!(emit_lookaround_action_code(grammar, rust, i, defn, variant))
}
r::ActionFnDefnKind::Inline(ref data) => {
try!(emit_inline_action_code(grammar, rust, i, defn, data))
}
}
}
Ok(())
}
fn ret_type_string(grammar: &r::Grammar, defn: &r::ActionFnDefn) -> String {
if defn.fallible {
format!(
"Result<{},{}lalrpop_util::ParseError<{},{},{}>>",
defn.ret_type,
grammar.prefix,
grammar.types.terminal_loc_type(),
grammar.types.terminal_token_type(),
grammar.types.error_type()
)
} else {
format!("{}", defn.ret_type)
}
}
fn emit_user_action_code<W: Write>(
grammar: &r::Grammar,
rust: &mut RustWrite<W>,
index: usize,
defn: &r::ActionFnDefn,
data: &r::UserActionFnDefn,
) -> io::Result<()> {
let ret_type = ret_type_string(grammar, defn);
// For each symbol to be reduced, we will receive
// a (L, T, L) triple where the Ls are locations and
// the T is the data. Ignore the locations and bind
// the data to the name the user gave.
let mut arguments: Vec<String> = data.arg_patterns
.iter()
.zip(
data.arg_types
.iter()
.cloned()
.map(|t| grammar.types.spanned_type(t)),
)
.map(|(p, t)| format!("(_, {}, _): {}", p, t))
.collect();
// If this is a reduce of an empty production, we will
// automatically add position information in the form of
// lookbehind/lookahead values. Otherwise, those values would be
// determined from the arguments themselves.
if data.arg_patterns.is_empty() {
arguments.extend(vec![
format!(
"{}lookbehind: &{}",
grammar.prefix,
grammar.types.terminal_loc_type()
),
format!(
"{}lookahead: &{}",
grammar.prefix,
grammar.types.terminal_loc_type()
),
]);
}
try!(rust.write_fn_header(
grammar,
&r::Visibility::Priv,
format!("{}action{}", grammar.prefix, index),
vec![],
None,
arguments,
ret_type,
vec![]
));
rust!(rust, "{{");
rust!(rust, "{}", data.code);
rust!(rust, "}}");
Ok(())
}
fn emit_lookaround_action_code<W: Write>(
grammar: &r::Grammar,
rust: &mut RustWrite<W>,
index: usize,
_defn: &r::ActionFnDefn,
data: &r::LookaroundActionFnDefn,
) -> io::Result<()> {
try!(rust.write_fn_header(
grammar,
&r::Visibility::Priv,
format!("{}action{}", grammar.prefix, index),
vec![],
None,
vec![
format!(
"{}lookbehind: &{}",
grammar.prefix,
grammar.types.terminal_loc_type()
),
format!(
"{}lookahead: &{}",
grammar.prefix,
grammar.types.terminal_loc_type()
),
],
format!("{}", grammar.types.terminal_loc_type()),
vec![]
));
rust!(rust, "{{");
match *data {
r::LookaroundActionFnDefn::Lookahead => {
// take the lookahead, if any; otherwise, we are
// at EOF, so taker the lookbehind (end of last
// pushed token); if that is missing too, then
// supply default.
rust!(rust, "{}lookahead.clone()", grammar.prefix);
}
r::LookaroundActionFnDefn::Lookbehind => {
// take lookbehind or supply default
rust!(rust, "{}lookbehind.clone()", grammar.prefix);
}
}
rust!(rust, "}}");
Ok(())
}
fn emit_inline_action_code<W: Write>(
grammar: &r::Grammar,
rust: &mut RustWrite<W>,
index: usize,
defn: &r::ActionFnDefn,
data: &r::InlineActionFnDefn,
) -> io::Result<()> {
let ret_type = ret_type_string(grammar, defn);
let arg_types: Vec<_> = data.symbols
.iter()
.flat_map(|sym| match *sym {
r::InlinedSymbol::Original(ref s) => vec![s.clone()],
r::InlinedSymbol::Inlined(_, ref syms) => syms.clone(),
})
.map(|s| s.ty(&grammar.types))
.collect();
// this is the number of symbols we expect to be passed in; it is
// distinct from data.symbols.len(), because sometimes we have
// inlined actions with no input symbols
let num_flat_args = arg_types.len();
let mut arguments: Vec<_> = arg_types
.iter()
.map(|&t| grammar.types.spanned_type(t.clone()))
.enumerate()
.map(|(i, t)| format!("{}{}: {}", grammar.prefix, i, t))
.collect();
// If no symbols are being reduced, add in the
// lookbehind/lookahead.
if arguments.len() == 0 {
arguments.extend(vec![
format!(
"{}lookbehind: &{}",
grammar.prefix,
grammar.types.terminal_loc_type()
),
format!(
"{}lookahead: &{}",
grammar.prefix,
grammar.types.terminal_loc_type()
),
]);
}
try!(rust.write_fn_header(
grammar,
&r::Visibility::Priv,
format!("{}action{}", grammar.prefix, index),
vec![],
None,
arguments,
ret_type,
vec![]
));
rust!(rust, "{{");
// For each inlined thing, compute the start/end locations.
// Do this first so that none of the arguments have been moved
// yet and we can easily access their locations.
let mut arg_counter = 0;
let mut temp_counter = 0;
for symbol in &data.symbols {
match *symbol {
r::InlinedSymbol::Original(_) => {
arg_counter += 1;
}
r::InlinedSymbol::Inlined(_, ref syms) => {
if syms.len() > 0 {
// If we are reducing symbols, then start and end
// can be the start/end location of the first/last
// symbol respectively. Easy peezy.
rust!(
rust,
"let {}start{} = {}{}.0.clone();",
grammar.prefix,
temp_counter,
grammar.prefix,
arg_counter
);
let last_arg_index = arg_counter + syms.len() - 1;
rust!(
rust,
"let {}end{} = {}{}.2.clone();",
grammar.prefix,
temp_counter,
grammar.prefix,
last_arg_index
);
} else {
// If we have no symbols, then `arg_counter`
// represents index of the first symbol after this
// inlined item (if any), and `arg_counter-1`
// represents index of the symbol before this
// item.
if arg_counter > 0 {
rust!(
rust,
"let {}start{} = {}{}.2.clone();",
grammar.prefix,
temp_counter,
grammar.prefix,
arg_counter - 1
);
} else if num_flat_args > 0 {
rust!(
rust,
"let {}start{} = {}{}.0.clone();",
grammar.prefix,
temp_counter,
grammar.prefix,
arg_counter
);
} else {
rust!(
rust,
"let {}start{} = {}lookbehind.clone();",
grammar.prefix,
temp_counter,
grammar.prefix
);
}
if arg_counter < num_flat_args {
rust!(
rust,
"let {}end{} = {}{}.0.clone();",
grammar.prefix,
temp_counter,
grammar.prefix,
arg_counter
);
} else if num_flat_args > 0 {
rust!(
rust,
"let {}end{} = {}{}.2.clone();",
grammar.prefix,
temp_counter,
grammar.prefix,
num_flat_args - 1
);
} else {
rust!(
rust,
"let {}end{} = {}lookahead.clone();",
grammar.prefix,
temp_counter,
grammar.prefix
);
}
}
temp_counter += 1;
arg_counter += syms.len();
}
}
}
// Now create temporaries for the inlined things
let mut arg_counter = 0;
let mut temp_counter = 0;
for symbol in &data.symbols {
match *symbol {
r::InlinedSymbol::Original(_) => {
arg_counter += 1;
}
r::InlinedSymbol::Inlined(inlined_action, ref syms) => {
// execute the inlined reduce action
rust!(
rust,
"let {}temp{} = {}action{}(",
grammar.prefix,
temp_counter,
grammar.prefix,
inlined_action.index()
);
for parameter in &grammar.parameters {
rust!(rust, "{},", parameter.name);
}
for i in 0..syms.len() {
rust!(rust, "{}{},", grammar.prefix, arg_counter + i);
}
if syms.len() == 0 {
rust!(rust, "&{}start{},", grammar.prefix, temp_counter);
rust!(rust, "&{}end{},", grammar.prefix, temp_counter);
}
rust!(rust, ");");
// wrap up the inlined value along with its span
rust!(
rust,
"let {}temp{} = ({}start{}, {}temp{}, {}end{});",
grammar.prefix,
temp_counter,
grammar.prefix,
temp_counter,
grammar.prefix,
temp_counter,
grammar.prefix,
temp_counter
);
temp_counter += 1;
arg_counter += syms.len();
}
}
}
rust!(rust, "{}action{}(", grammar.prefix, data.action.index());
for parameter in &grammar.parameters {
rust!(rust, "{},", parameter.name);
}
let mut arg_counter = 0;
let mut temp_counter = 0;
for symbol in &data.symbols {
match *symbol {
r::InlinedSymbol::Original(_) => {
rust!(rust, "{}{},", grammar.prefix, arg_counter);
arg_counter += 1;
}
r::InlinedSymbol::Inlined(_, ref syms) => {
rust!(rust, "{}temp{},", grammar.prefix, temp_counter);
temp_counter += 1;
arg_counter += syms.len();
}
}
}
assert!(data.symbols.len() > 0);
rust!(rust, ")");
rust!(rust, "}}");
Ok(())
}

View File

@ -0,0 +1,84 @@
use std::io::{self, Write};
use term::{self, Attr, Terminal};
use term::color::Color;
/// A `Terminal` that just ignores all attempts at formatting. Used
/// to report errors when no ANSI terminfo is available.
pub struct FakeTerminal<W: Write> {
write: W,
}
impl<W: Write> FakeTerminal<W> {
pub fn new(write: W) -> FakeTerminal<W> {
FakeTerminal { write: write }
}
}
impl<W: Write> Write for FakeTerminal<W> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
self.write.write(buf)
}
fn flush(&mut self) -> io::Result<()> {
self.write.flush()
}
}
impl<W: Write> Terminal for FakeTerminal<W> {
type Output = W;
fn fg(&mut self, _color: Color) -> term::Result<()> {
Ok(())
}
fn bg(&mut self, _color: Color) -> term::Result<()> {
Ok(())
}
fn attr(&mut self, _attr: Attr) -> term::Result<()> {
Ok(())
}
fn supports_attr(&self, _attr: Attr) -> bool {
false
}
fn reset(&mut self) -> term::Result<()> {
Ok(())
}
fn supports_reset(&self) -> bool {
false
}
fn supports_color(&self) -> bool {
false
}
fn cursor_up(&mut self) -> term::Result<()> {
Ok(())
}
fn delete_line(&mut self) -> term::Result<()> {
Ok(())
}
fn carriage_return(&mut self) -> term::Result<()> {
Ok(())
}
fn get_ref(&self) -> &Self::Output {
&self.write
}
fn get_mut(&mut self) -> &mut Self::Output {
&mut self.write
}
fn into_inner(self) -> Self::Output
where
Self: Sized,
{
self.write
}
}

View File

@ -0,0 +1,585 @@
//! Utilies for running in a build script.
use atty;
use file_text::FileText;
use grammar::parse_tree as pt;
use grammar::repr as r;
use lalrpop_util::ParseError;
use lexer::intern_token;
use lr1;
use message::{Content, Message};
use message::builder::InlineBuilder;
use normalize;
use parser;
use rust::RustWrite;
use session::{ColorConfig, Session};
use term;
use tls::Tls;
use tok;
use std::fs;
use std::io::{self, BufRead, Write};
use std::path::{Path, PathBuf};
use std::process::exit;
use std::rc::Rc;
mod action;
mod fake_term;
use self::fake_term::FakeTerminal;
const LALRPOP_VERSION_HEADER: &'static str = concat!(
"// auto-generated: \"",
env!("CARGO_PKG_NAME"),
" ",
env!("CARGO_PKG_VERSION"),
"\""
);
pub fn process_dir<P: AsRef<Path>>(session: Rc<Session>, root_dir: P) -> io::Result<()> {
let lalrpop_files = try!(lalrpop_files(root_dir));
for lalrpop_file in lalrpop_files {
try!(process_file(session.clone(), lalrpop_file));
}
Ok(())
}
pub fn process_file<P: AsRef<Path>>(session: Rc<Session>, lalrpop_file: P) -> io::Result<()> {
let lalrpop_file = lalrpop_file.as_ref();
let rs_file = try!(resolve_rs_file(&session, lalrpop_file));
let report_file = try!(resolve_report_file(&session, lalrpop_file));
process_file_into(session, lalrpop_file, &rs_file, &report_file)
}
fn resolve_rs_file(session: &Session, lalrpop_file: &Path) -> io::Result<PathBuf> {
gen_resolve_file(session, lalrpop_file, "rs")
}
fn resolve_report_file(session: &Session, lalrpop_file: &Path) -> io::Result<PathBuf> {
gen_resolve_file(session, lalrpop_file, "report")
}
fn gen_resolve_file(session: &Session, lalrpop_file: &Path, ext: &str) -> io::Result<PathBuf> {
let in_dir = if let Some(ref d) = session.in_dir {
d.as_path()
} else {
Path::new(".")
};
let out_dir = if let Some(ref d) = session.out_dir {
d.as_path()
} else {
in_dir
};
// If the lalrpop file is not in in_dir, the result is that the
// .rs file is created in the same directory as the lalrpop file
// for compatibility reasons
Ok(out_dir
.join(lalrpop_file.strip_prefix(&in_dir).unwrap_or(lalrpop_file))
.with_extension(ext))
}
fn process_file_into(
session: Rc<Session>,
lalrpop_file: &Path,
rs_file: &Path,
report_file: &Path,
) -> io::Result<()> {
if session.force_build || try!(needs_rebuild(&lalrpop_file, &rs_file)) {
log!(
session,
Informative,
"processing file `{}`",
lalrpop_file.to_string_lossy()
);
if let Some(parent) = rs_file.parent() {
try!(fs::create_dir_all(parent));
}
try!(make_read_only(&rs_file, false));
try!(remove_old_file(&rs_file));
// Load the LALRPOP source text for this file:
let file_text = Rc::new(try!(FileText::from_path(lalrpop_file.to_path_buf())));
// Store the session and file-text in TLS -- this is not
// intended to be used in this high-level code, but it gives
// easy access to this information pervasively in the
// low-level LR(1) and grammar normalization code. This is
// particularly useful for error-reporting.
let _tls = Tls::install(session.clone(), file_text.clone());
// Do the LALRPOP processing itself and write the resulting
// buffer into a file. We use a buffer so that if LR(1)
// generation fails at some point, we don't leave a partial
// file behind.
{
let grammar = try!(parse_and_normalize_grammar(&session, &file_text));
let buffer = try!(emit_recursive_ascent(&session, &grammar, &report_file));
let mut output_file = try!(fs::File::create(&rs_file));
try!(writeln!(output_file, "{}", LALRPOP_VERSION_HEADER));
try!(output_file.write_all(&buffer));
}
try!(make_read_only(&rs_file, true));
}
Ok(())
}
fn remove_old_file(rs_file: &Path) -> io::Result<()> {
match fs::remove_file(rs_file) {
Ok(()) => Ok(()),
Err(e) => {
// Unix reports NotFound, Windows PermissionDenied!
match e.kind() {
io::ErrorKind::NotFound | io::ErrorKind::PermissionDenied => Ok(()),
_ => Err(e),
}
}
}
}
fn needs_rebuild(lalrpop_file: &Path, rs_file: &Path) -> io::Result<bool> {
return match fs::metadata(&rs_file) {
Ok(rs_metadata) => {
let lalrpop_metadata = try!(fs::metadata(&lalrpop_file));
if compare_modification_times(&lalrpop_metadata, &rs_metadata) {
return Ok(true);
}
compare_lalrpop_version(rs_file)
}
Err(e) => match e.kind() {
io::ErrorKind::NotFound => Ok(true),
_ => Err(e),
},
};
#[cfg(unix)]
fn compare_modification_times(
lalrpop_metadata: &fs::Metadata,
rs_metadata: &fs::Metadata,
) -> bool {
use std::os::unix::fs::MetadataExt;
lalrpop_metadata.mtime() >= rs_metadata.mtime()
}
#[cfg(windows)]
fn compare_modification_times(
lalrpop_metadata: &fs::Metadata,
rs_metadata: &fs::Metadata,
) -> bool {
use std::os::windows::fs::MetadataExt;
lalrpop_metadata.last_write_time() >= rs_metadata.last_write_time()
}
#[cfg(not(any(unix, windows)))]
fn compare_modification_times(
lalrpop_metadata: &fs::Metadata,
rs_metadata: &fs::Metadata,
) -> bool {
true
}
fn compare_lalrpop_version(rs_file: &Path) -> io::Result<bool> {
let mut input_str = String::new();
let mut f = io::BufReader::new(try!(fs::File::open(&rs_file)));
try!(f.read_line(&mut input_str));
Ok(input_str.trim() != LALRPOP_VERSION_HEADER)
}
}
fn make_read_only(rs_file: &Path, ro: bool) -> io::Result<()> {
if rs_file.is_file() {
let rs_metadata = try!(fs::metadata(&rs_file));
let mut rs_permissions = rs_metadata.permissions();
rs_permissions.set_readonly(ro);
fs::set_permissions(&rs_file, rs_permissions)
} else {
Ok(())
}
}
fn lalrpop_files<P: AsRef<Path>>(root_dir: P) -> io::Result<Vec<PathBuf>> {
let mut result = vec![];
for entry in try!(fs::read_dir(root_dir)) {
let entry = try!(entry);
let file_type = try!(entry.file_type());
let path = entry.path();
if file_type.is_dir() {
result.extend(try!(lalrpop_files(&path)));
}
if file_type.is_file() && path.extension().is_some()
&& path.extension().unwrap() == "lalrpop"
{
result.push(path);
}
}
Ok(result)
}
fn parse_and_normalize_grammar(session: &Session, file_text: &FileText) -> io::Result<r::Grammar> {
let grammar = match parser::parse_grammar(file_text.text()) {
Ok(grammar) => grammar,
Err(ParseError::InvalidToken { location }) => {
let ch = file_text.text()[location..].chars().next().unwrap();
report_error(
&file_text,
pt::Span(location, location),
&format!("invalid character `{}`", ch),
);
}
Err(ParseError::UnrecognizedToken {
token: None,
expected: _,
}) => {
let len = file_text.text().len();
report_error(
&file_text,
pt::Span(len, len),
&format!("unexpected end of file"),
);
}
Err(ParseError::UnrecognizedToken {
token: Some((lo, _, hi)),
expected,
}) => {
let _ = expected; // didn't implement this yet :)
let text = &file_text.text()[lo..hi];
report_error(
&file_text,
pt::Span(lo, hi),
&format!("unexpected token: `{}`", text),
);
}
Err(ParseError::ExtraToken { token: (lo, _, hi) }) => {
let text = &file_text.text()[lo..hi];
report_error(
&file_text,
pt::Span(lo, hi),
&format!("extra token at end of input: `{}`", text),
);
}
Err(ParseError::User { error }) => {
let string = match error.code {
tok::ErrorCode::UnrecognizedToken => "unrecognized token",
tok::ErrorCode::UnterminatedEscape => "unterminated escape; missing '`'?",
tok::ErrorCode::UnterminatedStringLiteral => {
"unterminated string literal; missing `\"`?"
}
tok::ErrorCode::UnterminatedCharacterLiteral => {
"unterminated character literal; missing `'`?"
}
tok::ErrorCode::UnterminatedAttribute => "unterminated #! attribute; missing `]`?",
tok::ErrorCode::ExpectedStringLiteral => "expected string literal; missing `\"`?",
tok::ErrorCode::UnterminatedCode => {
"unterminated code block; perhaps a missing `;`, `)`, `]` or `}`?"
}
};
report_error(
&file_text,
pt::Span(error.location, error.location + 1),
string,
)
}
};
match normalize::normalize(session, grammar) {
Ok(grammar) => Ok(grammar),
Err(error) => report_error(&file_text, error.span, &error.message),
}
}
fn report_error(file_text: &FileText, span: pt::Span, message: &str) -> ! {
println!("{} error: {}", file_text.span_str(span), message);
let out = io::stderr();
let mut out = out.lock();
file_text.highlight(span, &mut out).unwrap();
exit(1);
}
fn report_messages(messages: Vec<Message>) -> term::Result<()> {
let builder = InlineBuilder::new().begin_paragraphs();
let builder = messages
.into_iter()
.fold(builder, |b, m| b.push(Box::new(m)));
let content = builder.end().end();
report_content(&*content)
}
fn report_content(content: &Content) -> term::Result<()> {
// FIXME -- can we query the size of the terminal somehow?
let canvas = content.emit_to_canvas(80);
let try_colors = match Tls::session().color_config {
ColorConfig::Yes => true,
ColorConfig::No => false,
ColorConfig::IfTty => atty::is(atty::Stream::Stdout),
};
if try_colors {
if let Some(mut stdout) = term::stdout() {
return canvas.write_to(&mut *stdout);
}
}
let stdout = io::stdout();
let mut stdout = FakeTerminal::new(stdout.lock());
canvas.write_to(&mut stdout)
}
fn emit_module_attributes<W: Write>(
grammar: &r::Grammar,
rust: &mut RustWrite<W>,
) -> io::Result<()> {
rust.write_module_attributes(grammar)
}
fn emit_uses<W: Write>(grammar: &r::Grammar, rust: &mut RustWrite<W>) -> io::Result<()> {
rust.write_uses("", grammar)
}
fn emit_recursive_ascent(
session: &Session,
grammar: &r::Grammar,
report_file: &Path,
) -> io::Result<Vec<u8>> {
let mut rust = RustWrite::new(vec![]);
// We generate a module structure like this:
//
// ```
// mod <output-file> {
// // For each public symbol:
// pub fn parse_XYZ();
// mod __XYZ { ... }
//
// // For each bit of action code:
// <action-code>
// }
// ```
//
// Note that the action code goes in the outer module. This is
// intentional because it means that the foo.lalrpop file serves
// as a module in the rust hierarchy, so if the action code
// includes things like `super::` it will resolve in the natural
// way.
try!(emit_module_attributes(grammar, &mut rust));
try!(emit_uses(grammar, &mut rust));
if grammar.start_nonterminals.is_empty() {
println!("Error: no public symbols declared in grammar");
exit(1);
}
for (user_nt, start_nt) in &grammar.start_nonterminals {
// We generate these, so there should always be exactly 1
// production. Otherwise the LR(1) algorithm doesn't know
// where to stop!
assert_eq!(grammar.productions_for(start_nt).len(), 1);
log!(
session,
Verbose,
"Building states for public nonterminal `{}`",
user_nt
);
let _lr1_tls = lr1::Lr1Tls::install(grammar.terminals.clone());
let lr1result = lr1::build_states(&grammar, start_nt.clone());
if session.emit_report {
let mut output_report_file = try!(fs::File::create(&report_file));
try!(lr1::generate_report(&mut output_report_file, &lr1result));
}
let states = match lr1result {
Ok(states) => states,
Err(error) => {
let messages = lr1::report_error(&grammar, &error);
let _ = report_messages(messages);
exit(1) // FIXME -- propagate up instead of calling `exit`
}
};
match grammar.algorithm.codegen {
r::LrCodeGeneration::RecursiveAscent => try!(lr1::codegen::ascent::compile(
&grammar,
user_nt.clone(),
start_nt.clone(),
&states,
"super",
&mut rust,
)),
r::LrCodeGeneration::TableDriven => try!(lr1::codegen::parse_table::compile(
&grammar,
user_nt.clone(),
start_nt.clone(),
&states,
"super",
&mut rust,
)),
r::LrCodeGeneration::TestAll => try!(lr1::codegen::test_all::compile(
&grammar,
user_nt.clone(),
start_nt.clone(),
&states,
&mut rust,
)),
}
rust!(
rust,
"{}use self::{}parse{}::{}Parser;",
grammar.nonterminals[&user_nt].visibility,
grammar.prefix,
start_nt,
user_nt
);
}
if let Some(ref intern_token) = grammar.intern_token {
try!(intern_token::compile(&grammar, intern_token, &mut rust));
rust!(rust, "pub use self::{}intern_token::Token;", grammar.prefix);
}
try!(action::emit_action_code(grammar, &mut rust));
try!(emit_to_triple_trait(grammar, &mut rust));
Ok(rust.into_inner())
}
fn emit_to_triple_trait<W: Write>(grammar: &r::Grammar, rust: &mut RustWrite<W>) -> io::Result<()> {
#![allow(non_snake_case)]
let L = grammar.types.terminal_loc_type();
let T = grammar.types.terminal_token_type();
let E = grammar.types.error_type();
let mut user_type_parameters = String::new();
for type_parameter in &grammar.type_parameters {
user_type_parameters.push_str(&format!("{}, ", type_parameter));
}
rust!(rust, "");
rust!(
rust,
"pub trait {}ToTriple<{}> {{",
grammar.prefix,
user_type_parameters,
);
rust!(rust, "type Error;");
rust!(
rust,
"fn to_triple(value: Self) -> Result<({},{},{}),Self::Error>;",
L,
T,
L,
);
rust!(rust, "}}");
rust!(rust, "");
if grammar.types.opt_terminal_loc_type().is_some() {
rust!(
rust,
"impl<{}> {}ToTriple<{}> for ({}, {}, {}) {{",
user_type_parameters,
grammar.prefix,
user_type_parameters,
L,
T,
L,
);
rust!(rust, "type Error = {};", E);
rust!(
rust,
"fn to_triple(value: Self) -> Result<({},{},{}),{}> {{",
L,
T,
L,
E,
);
rust!(rust, "Ok(value)");
rust!(rust, "}}");
rust!(rust, "}}");
rust!(
rust,
"impl<{}> {}ToTriple<{}> for Result<({}, {}, {}),{}> {{",
user_type_parameters,
grammar.prefix,
user_type_parameters,
L,
T,
L,
E,
);
rust!(rust, "type Error = {};", E);
rust!(
rust,
"fn to_triple(value: Self) -> Result<({},{},{}),{}> {{",
L,
T,
L,
E,
);
rust!(rust, "value");
rust!(rust, "}}");
rust!(rust, "}}");
} else {
rust!(
rust,
"impl<{}> {}ToTriple<{}> for {} {{",
user_type_parameters,
grammar.prefix,
user_type_parameters,
T,
);
rust!(rust, "type Error = {};", E);
rust!(
rust,
"fn to_triple(value: Self) -> Result<((),{},()),{}> {{",
T,
E,
);
rust!(rust, "Ok(((), value, ()))");
rust!(rust, "}}");
rust!(rust, "}}");
rust!(
rust,
"impl<{}> {}ToTriple<{}> for Result<({}),{}> {{",
user_type_parameters,
grammar.prefix,
user_type_parameters,
T,
E,
);
rust!(rust, "type Error = {};", E);
rust!(
rust,
"fn to_triple(value: Self) -> Result<((),{},()),{}> {{",
T,
E,
);
rust!(rust, "value.map(|v| ((), v, ()))");
rust!(rust, "}}");
rust!(rust, "}}");
}
Ok(())
}

View File

@ -0,0 +1,15 @@
use std::collections::BTreeMap;
pub use std::collections::btree_map::Entry;
/// In general, we avoid coding directly against any particular map,
/// but rather build against `util::Map` (and `util::map` to construct
/// an instance). This should be a deterministic map, such that two
/// runs of LALRPOP produce the same output, but otherwise it doesn't
/// matter much. I'd probably prefer to use `HashMap` with an
/// alternative hasher, but that's not stable.
pub type Map<K, V> = BTreeMap<K, V>;
pub fn map<K: Ord, V>() -> Map<K, V> {
Map::<K, V>::default()
}

View File

@ -0,0 +1,7 @@
mod map;
mod multimap;
mod set;
pub use self::map::{map, Entry, Map};
pub use self::multimap::{Collection, Multimap};
pub use self::set::{set, Set};

View File

@ -0,0 +1,140 @@
use std::collections::btree_map;
use std::default::Default;
use std::iter::FromIterator;
use super::map::{map, Map};
use super::set::Set;
pub struct Multimap<K, C: Collection> {
map: Map<K, C>,
}
pub trait Collection: Default {
type Item;
/// Push `item` into the collection and return `true` if
/// collection changed.
fn push(&mut self, item: Self::Item) -> bool;
}
impl<K: Ord, C: Collection> Multimap<K, C> {
pub fn new() -> Multimap<K, C> {
Multimap { map: map() }
}
pub fn is_empty(&self) -> bool {
self.map.is_empty()
}
/// Push `value` to the collection associated with `key`. Returns
/// true if the collection was changed from the default.
pub fn push(&mut self, key: K, value: C::Item) -> bool {
let mut inserted = false;
let pushed = self.map
.entry(key)
.or_insert_with(|| {
inserted = true;
C::default()
})
.push(value);
inserted || pushed
}
pub fn get(&self, key: &K) -> Option<&C> {
self.map.get(key)
}
pub fn iter(&self) -> btree_map::Iter<K, C> {
self.map.iter()
}
pub fn into_iter(self) -> btree_map::IntoIter<K, C> {
self.map.into_iter()
}
}
impl<K: Ord, C: Collection> IntoIterator for Multimap<K, C> {
type Item = (K, C);
type IntoIter = btree_map::IntoIter<K, C>;
fn into_iter(self) -> btree_map::IntoIter<K, C> {
self.into_iter()
}
}
impl<'iter, K: Ord, C: Collection> IntoIterator for &'iter Multimap<K, C> {
type Item = (&'iter K, &'iter C);
type IntoIter = btree_map::Iter<'iter, K, C>;
fn into_iter(self) -> btree_map::Iter<'iter, K, C> {
self.iter()
}
}
impl<K: Ord, C: Collection> FromIterator<(K, C::Item)> for Multimap<K, C> {
fn from_iter<T>(iterator: T) -> Self
where
T: IntoIterator<Item = (K, C::Item)>,
{
let mut map = Multimap::new();
for (key, value) in iterator {
map.push(key, value);
}
map
}
}
impl Collection for () {
type Item = ();
fn push(&mut self, _item: ()) -> bool {
false
}
}
impl<T> Collection for Vec<T> {
type Item = T;
fn push(&mut self, item: T) -> bool {
self.push(item);
true // always changes
}
}
impl<T: Ord> Collection for Set<T> {
type Item = T;
fn push(&mut self, item: T) -> bool {
self.insert(item)
}
}
impl<K: Ord, C: Collection> Default for Multimap<K, C> {
fn default() -> Self {
Multimap::new()
}
}
impl<K: Ord, C: Collection<Item = I>, I> Collection for Multimap<K, C> {
type Item = (K, I);
fn push(&mut self, item: (K, I)) -> bool {
let (key, value) = item;
self.push(key, value)
}
}
#[test]
fn push() {
let mut m: Multimap<u32, Set<char>> = Multimap::new();
assert!(m.push(0, 'a'));
assert!(m.push(0, 'b'));
assert!(!m.push(0, 'b'));
assert!(m.push(1, 'a'));
}
#[test]
fn push_nil() {
let mut m: Multimap<u32, ()> = Multimap::new();
assert!(m.push(0, ()));
assert!(!m.push(0, ()));
assert!(m.push(1, ()));
assert!(!m.push(0, ()));
}

View File

@ -0,0 +1,8 @@
use std::collections::BTreeSet;
/// As `Map`, but for sets.
pub type Set<K> = BTreeSet<K>;
pub fn set<K: Ord>() -> Set<K> {
Set::<K>::default()
}

View File

@ -0,0 +1,144 @@
use grammar::parse_tree as pt;
use std::fmt::{Display, Error, Formatter};
use std::fs::File;
use std::path::PathBuf;
use std::io::{self, Read, Write};
pub struct FileText {
path: PathBuf,
input_str: String,
newlines: Vec<usize>,
}
impl FileText {
pub fn from_path(path: PathBuf) -> io::Result<FileText> {
let mut input_str = String::new();
let mut f = try!(File::open(&path));
try!(f.read_to_string(&mut input_str));
Ok(FileText::new(path, input_str))
}
pub fn new(path: PathBuf, input_str: String) -> FileText {
let newline_indices: Vec<usize> = {
let input_indices = input_str
.as_bytes()
.iter()
.enumerate()
.filter(|&(_, &b)| b == ('\n' as u8))
.map(|(i, _)| i + 1); // index of first char in the line
Some(0).into_iter().chain(input_indices).collect()
};
FileText {
path: path,
input_str: input_str,
newlines: newline_indices,
}
}
#[cfg(test)]
pub fn test() -> FileText {
Self::new(PathBuf::from("test.lalrpop"), String::from(""))
}
pub fn text(&self) -> &String {
&self.input_str
}
pub fn span_str(&self, span: pt::Span) -> String {
let (start_line, start_col) = self.line_col(span.0);
let (end_line, end_col) = self.line_col(span.1);
format!(
"{}:{}:{}: {}:{}",
self.path.display(),
start_line + 1,
start_col + 1,
end_line + 1,
end_col
)
}
fn line_col(&self, pos: usize) -> (usize, usize) {
let num_lines = self.newlines.len();
let line = (0..num_lines)
.filter(|&i| self.newlines[i] > pos)
.map(|i| i - 1)
.next()
.unwrap_or(num_lines - 1);
// offset of the first character in `line`
let line_offset = self.newlines[line];
// find the column; use `saturating_sub` in case `pos` is the
// newline itself, which we'll call column 0
let col = pos - line_offset;
(line, col)
}
fn line_text(&self, line_num: usize) -> &str {
let start_offset = self.newlines[line_num];
if line_num == self.newlines.len() - 1 {
&self.input_str[start_offset..]
} else {
let end_offset = self.newlines[line_num + 1];
&self.input_str[start_offset..end_offset - 1]
}
}
pub fn highlight(&self, span: pt::Span, out: &mut Write) -> io::Result<()> {
let (start_line, start_col) = self.line_col(span.0);
let (end_line, end_col) = self.line_col(span.1);
// (*) use `saturating_sub` since the start line could be the newline
// itself, in which case we'll call it column zero
// span is within one line:
if start_line == end_line {
let text = self.line_text(start_line);
try!(writeln!(out, " {}", text));
if end_col - start_col <= 1 {
try!(writeln!(out, " {}^", Repeat(' ', start_col)));
} else {
let width = end_col - start_col;
try!(writeln!(
out,
" {}~{}~",
Repeat(' ', start_col),
Repeat('~', width.saturating_sub(2))
));
}
} else {
// span is across many lines, find the maximal width of any of those
let line_strs: Vec<_> = (start_line..end_line + 1)
.map(|i| self.line_text(i))
.collect();
let max_len = line_strs.iter().map(|l| l.len()).max().unwrap();
try!(writeln!(
out,
" {}{}~+",
Repeat(' ', start_col),
Repeat('~', max_len - start_col)
));
for line in &line_strs[..line_strs.len() - 1] {
try!(writeln!(out, "| {0:<1$} |", line, max_len));
}
try!(writeln!(out, "| {}", line_strs[line_strs.len() - 1]));
try!(writeln!(out, "+~{}", Repeat('~', end_col)));
}
Ok(())
}
}
struct Repeat(char, usize);
impl Display for Repeat {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
for _ in 0..self.1 {
try!(write!(fmt, "{}", self.0));
}
Ok(())
}
}

View File

@ -0,0 +1,78 @@
//! Generate valid parse trees.
use grammar::repr::*;
use rand::{self, Rng};
use std::iter::Iterator;
#[derive(PartialEq, Eq)]
pub enum ParseTree {
Nonterminal(NonterminalString, Vec<ParseTree>),
Terminal(TerminalString),
}
pub fn random_parse_tree(grammar: &Grammar, symbol: NonterminalString) -> ParseTree {
let mut gen = Generator {
grammar: grammar,
rng: rand::thread_rng(),
depth: 0,
};
loop {
// sometimes, the random walk overflows the stack, so we have a max, and if
// it is exceeded, we just try again
if let Some(result) = gen.nonterminal(symbol.clone()) {
return result;
}
gen.depth = 0;
}
}
struct Generator<'grammar> {
grammar: &'grammar Grammar,
rng: rand::ThreadRng,
depth: u32,
}
const MAX_DEPTH: u32 = 10000;
impl<'grammar> Generator<'grammar> {
fn nonterminal(&mut self, nt: NonterminalString) -> Option<ParseTree> {
if self.depth > MAX_DEPTH {
return None;
}
self.depth += 1;
let productions = self.grammar.productions_for(&nt);
let index: usize = self.rng.gen_range(0, productions.len());
let production = &productions[index];
let trees: Option<Vec<_>> = production
.symbols
.iter()
.map(|sym| self.symbol(sym.clone()))
.collect();
trees.map(|trees| ParseTree::Nonterminal(nt, trees))
}
fn symbol(&mut self, symbol: Symbol) -> Option<ParseTree> {
match symbol {
Symbol::Nonterminal(nt) => self.nonterminal(nt),
Symbol::Terminal(t) => Some(ParseTree::Terminal(t)),
}
}
}
impl ParseTree {
pub fn terminals(&self) -> Vec<TerminalString> {
let mut vec = vec![];
self.push_terminals(&mut vec);
vec
}
fn push_terminals(&self, vec: &mut Vec<TerminalString>) {
match *self {
ParseTree::Terminal(ref s) => vec.push(s.clone()),
ParseTree::Nonterminal(_, ref trees) => for tree in trees {
tree.push_terminals(vec);
},
}
}
}

View File

@ -0,0 +1,26 @@
/// Recognized associated type for the token location
pub const LOCATION: &'static str = "Location";
/// Recognized associated type for custom errors
pub const ERROR: &'static str = "Error";
/// The lifetime parameter injected when we do not have an external token enum
pub const INPUT_LIFETIME: &'static str = "'input";
/// The parameter injected when we do not have an external token enum
pub const INPUT_PARAMETER: &'static str = "input";
/// The annotation to request inlining.
pub const INLINE: &'static str = "inline";
/// Annotation to request LALR.
pub const LALR: &'static str = "LALR";
/// Annotation to request recursive-ascent-style code generation.
pub const TABLE_DRIVEN: &'static str = "table_driven";
/// Annotation to request recursive-ascent-style code generation.
pub const RECURSIVE_ASCENT: &'static str = "recursive_ascent";
/// Annotation to request test-all-style code generation.
pub const TEST_ALL: &'static str = "test_all";

View File

@ -0,0 +1,7 @@
//! The grammar definition.
pub mod consts;
pub mod parse_tree;
pub mod pattern;
pub mod repr;
// pub mod token;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,129 @@
/*!
The definition of patterns is shared between the parse-tree and the
repr, but customized by a type T that represents the different type
representations.
*/
use string_cache::DefaultAtom as Atom;
use grammar::parse_tree::{Path, Span};
use std::fmt::{Display, Error, Formatter};
use util::Sep;
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Pattern<T> {
pub span: Span,
pub kind: PatternKind<T>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct FieldPattern<T> {
pub field_span: Span,
pub field_name: Atom,
pub pattern: Pattern<T>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum PatternKind<T> {
Enum(Path, Vec<Pattern<T>>),
Struct(Path, Vec<FieldPattern<T>>, /* trailing ..? */ bool),
Path(Path),
Tuple(Vec<Pattern<T>>),
TupleStruct(Path, Vec<Pattern<T>>),
Usize(usize),
Underscore,
DotDot,
Choose(T),
CharLiteral(Atom),
}
impl<T> Pattern<T> {
pub fn for_each_binding<U>(&self, map_fn: &mut FnMut(&T) -> U) {
self.map(map_fn);
}
pub fn map<U>(&self, map_fn: &mut FnMut(&T) -> U) -> Pattern<U> {
Pattern {
span: self.span,
kind: self.kind.map(map_fn),
}
}
}
impl<T> PatternKind<T> {
pub fn map<U>(&self, map_fn: &mut FnMut(&T) -> U) -> PatternKind<U> {
match *self {
PatternKind::Path(ref path) => PatternKind::Path(path.clone()),
PatternKind::Enum(ref path, ref pats) => PatternKind::Enum(
path.clone(),
pats.iter().map(|pat| pat.map(map_fn)).collect(),
),
PatternKind::Struct(ref path, ref fields, dotdot) => PatternKind::Struct(
path.clone(),
fields.iter().map(|pat| pat.map(map_fn)).collect(),
dotdot,
),
PatternKind::Tuple(ref pats) => {
PatternKind::Tuple(pats.iter().map(|p| p.map(map_fn)).collect())
}
PatternKind::TupleStruct(ref path, ref pats) => {
PatternKind::TupleStruct(path.clone(), pats.iter().map(|p| p.map(map_fn)).collect())
}
PatternKind::Underscore => PatternKind::Underscore,
PatternKind::DotDot => PatternKind::DotDot,
PatternKind::Usize(n) => PatternKind::Usize(n),
PatternKind::Choose(ref ty) => PatternKind::Choose(map_fn(ty)),
PatternKind::CharLiteral(ref c) => PatternKind::CharLiteral(c.clone()),
}
}
}
impl<T> FieldPattern<T> {
pub fn map<U>(&self, map_fn: &mut FnMut(&T) -> U) -> FieldPattern<U> {
FieldPattern {
field_name: self.field_name.clone(),
field_span: self.field_span,
pattern: self.pattern.map(map_fn),
}
}
}
impl<T: Display> Display for Pattern<T> {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
write!(fmt, "{}", self.kind)
}
}
impl<T: Display> Display for PatternKind<T> {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
match *self {
PatternKind::Path(ref path) => write!(fmt, "{}", path),
PatternKind::Enum(ref path, ref pats) => write!(fmt, "{}({})", path, Sep(", ", pats)),
PatternKind::Struct(ref path, ref fields, false) => {
write!(fmt, "{} {{ {} }}", path, Sep(", ", fields))
}
PatternKind::Struct(ref path, ref fields, true) if fields.len() == 0 => {
write!(fmt, "{} {{ .. }}", path)
}
PatternKind::Struct(ref path, ref fields, true) => {
write!(fmt, "{} {{ {}, .. }}", path, Sep(", ", fields))
}
PatternKind::Tuple(ref paths) => write!(fmt, "({})", Sep(", ", paths)),
PatternKind::TupleStruct(ref path, ref paths) => {
write!(fmt, "{}({})", path, Sep(", ", paths))
}
PatternKind::Underscore => write!(fmt, "_"),
PatternKind::DotDot => write!(fmt, ".."),
PatternKind::Usize(n) => write!(fmt, "{}", n),
PatternKind::Choose(ref ty) => write!(fmt, "{}", ty),
PatternKind::CharLiteral(ref c) => write!(fmt, "'{}'", c),
}
}
}
impl<T: Display> Display for FieldPattern<T> {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
write!(fmt, "{}: {}", self.field_name, self.pattern)
}
}

View File

@ -0,0 +1,600 @@
/*!
* Compiled representation of a grammar. Simplified, normalized
* version of `parse_tree`. The normalization passes produce this
* representation incrementally.
*/
use string_cache::DefaultAtom as Atom;
use grammar::pattern::Pattern;
use message::Content;
use std::fmt::{Debug, Display, Error, Formatter};
use collections::{map, Map};
use util::Sep;
// These concepts we re-use wholesale
pub use grammar::parse_tree::{Annotation, InternToken, NonterminalString, Path, Span,
TerminalLiteral, TerminalString, TypeParameter, Visibility,
WhereClause};
#[derive(Clone, Debug)]
pub struct Grammar {
// a unique prefix that can be appended to identifiers to ensure
// that they do not conflict with any action strings
pub prefix: String,
// algorithm user requested for this parser
pub algorithm: Algorithm,
// true if the grammar mentions the `!` terminal anywhere
pub uses_error_recovery: bool,
// these are the nonterminals that were declared to be public; the
// key is the user's name for the symbol, the value is the
// artificial symbol we introduce, which will always have a single
// production like `Foo' = Foo`.
pub start_nonterminals: Map<NonterminalString, NonterminalString>,
// the "use foo;" statements that the user declared
pub uses: Vec<String>,
// type parameters declared on the grammar, like `grammar<T>;`
pub type_parameters: Vec<TypeParameter>,
// actual parameters declared on the grammar, like the `x: u32` in `grammar(x: u32);`
pub parameters: Vec<Parameter>,
// where clauses declared on the grammar, like `grammar<T> where T: Sized`
pub where_clauses: Vec<WhereClause<TypeRepr>>,
// optional tokenizer DFA; this is only needed if the user did not supply
// an extern token declaration
pub intern_token: Option<InternToken>,
// the grammar proper:
pub action_fn_defns: Vec<ActionFnDefn>,
pub terminals: TerminalSet,
pub nonterminals: Map<NonterminalString, NonterminalData>,
pub token_span: Span,
pub conversions: Map<TerminalString, Pattern<TypeRepr>>,
pub types: Types,
pub module_attributes: Vec<String>,
}
/// For each terminal, we map it to a small integer from 0 to N.
/// This struct contains the mappings to go back and forth.
#[derive(Clone, Debug)]
pub struct TerminalSet {
pub all: Vec<TerminalString>,
pub bits: Map<TerminalString, usize>,
}
#[derive(Clone, Debug)]
pub struct NonterminalData {
pub name: NonterminalString,
pub visibility: Visibility,
pub span: Span,
pub annotations: Vec<Annotation>,
pub productions: Vec<Production>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Algorithm {
pub lalr: bool,
pub codegen: LrCodeGeneration,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum LrCodeGeneration {
TableDriven,
RecursiveAscent,
TestAll,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Parameter {
pub name: Atom,
pub ty: TypeRepr,
}
#[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub struct Production {
// this overlaps with the key in the hashmap, obviously, but it's
// handy to have it
pub nonterminal: NonterminalString,
pub symbols: Vec<Symbol>,
pub action: ActionFn,
pub span: Span,
}
#[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub enum Symbol {
Nonterminal(NonterminalString),
Terminal(TerminalString),
}
#[derive(Clone, PartialEq, Eq)]
pub struct ActionFnDefn {
pub fallible: bool,
pub ret_type: TypeRepr,
pub kind: ActionFnDefnKind,
}
#[derive(Clone, PartialEq, Eq)]
pub enum ActionFnDefnKind {
User(UserActionFnDefn),
Inline(InlineActionFnDefn),
Lookaround(LookaroundActionFnDefn),
}
/// An action fn written by a user.
#[derive(Clone, PartialEq, Eq)]
pub struct UserActionFnDefn {
pub arg_patterns: Vec<Atom>,
pub arg_types: Vec<TypeRepr>,
pub code: String,
}
/// An action fn generated by the inlining pass. If we were
/// inlining `A = B C D` (with action 44) into `X = Y A Z` (with
/// action 22), this would look something like:
///
/// ```
/// fn __action66(__0: Y, __1: B, __2: C, __3: D, __4: Z) {
/// __action22(__0, __action44(__1, __2, __3), __4)
/// }
/// ```
#[derive(Clone, PartialEq, Eq)]
pub struct InlineActionFnDefn {
/// in the example above, this would be `action22`
pub action: ActionFn,
/// in the example above, this would be `Y, {action44: B, C, D}, Z`
pub symbols: Vec<InlinedSymbol>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum LookaroundActionFnDefn {
Lookahead,
Lookbehind,
}
#[derive(Clone, PartialEq, Eq)]
pub enum InlinedSymbol {
Original(Symbol),
Inlined(ActionFn, Vec<Symbol>),
}
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord)]
pub enum TypeRepr {
Tuple(Vec<TypeRepr>),
Nominal(NominalTypeRepr),
Associated {
type_parameter: Atom,
id: Atom,
},
Lifetime(Atom),
Ref {
lifetime: Option<Atom>,
mutable: bool,
referent: Box<TypeRepr>,
},
}
impl TypeRepr {
pub fn is_unit(&self) -> bool {
match *self {
TypeRepr::Tuple(ref v) => v.is_empty(),
_ => false,
}
}
pub fn usize() -> TypeRepr {
TypeRepr::Nominal(NominalTypeRepr {
path: Path::usize(),
types: vec![],
})
}
pub fn str() -> TypeRepr {
TypeRepr::Nominal(NominalTypeRepr {
path: Path::str(),
types: vec![],
})
}
/// Returns the type parameters (or potential type parameters)
/// referenced by this type. e.g., for the type `&'x X`, would
/// return `[TypeParameter::Lifetime('x), TypeParameter::Id(X)]`.
/// This is later used to prune the type parameters list so that
/// only those that are actually used are included.
pub fn referenced(&self) -> Vec<TypeParameter> {
match *self {
TypeRepr::Tuple(ref tys) => tys.iter().flat_map(|t| t.referenced()).collect(),
TypeRepr::Nominal(ref data) => data.types
.iter()
.flat_map(|t| t.referenced())
.chain(match data.path.as_id() {
Some(id) => vec![TypeParameter::Id(id)],
None => vec![],
})
.collect(),
TypeRepr::Associated {
ref type_parameter, ..
} => vec![TypeParameter::Id(type_parameter.clone())],
TypeRepr::Lifetime(ref l) => vec![TypeParameter::Lifetime(l.clone())],
TypeRepr::Ref {
ref lifetime,
mutable: _,
ref referent,
} => lifetime
.iter()
.map(|id| TypeParameter::Lifetime(id.clone()))
.chain(referent.referenced())
.collect(),
}
}
}
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct NominalTypeRepr {
pub path: Path,
pub types: Vec<TypeRepr>,
}
#[derive(Clone, Debug)]
pub struct Types {
terminal_token_type: TypeRepr,
terminal_loc_type: Option<TypeRepr>,
error_type: Option<TypeRepr>,
terminal_types: Map<TerminalString, TypeRepr>,
nonterminal_types: Map<NonterminalString, TypeRepr>,
parse_error_type: TypeRepr,
error_recovery_type: TypeRepr,
}
impl Types {
pub fn new(
prefix: &str,
terminal_loc_type: Option<TypeRepr>,
error_type: Option<TypeRepr>,
terminal_token_type: TypeRepr,
) -> Types {
let mut types = Types {
terminal_loc_type: terminal_loc_type,
error_type: error_type,
terminal_token_type: terminal_token_type,
terminal_types: map(),
nonterminal_types: map(),
// the following two will be overwritten later
parse_error_type: TypeRepr::Tuple(vec![]),
error_recovery_type: TypeRepr::Tuple(vec![]),
};
let args = vec![
types.terminal_loc_type().clone(),
types.terminal_token_type().clone(),
types.error_type(),
];
types.parse_error_type = TypeRepr::Nominal(NominalTypeRepr {
path: Path {
absolute: false,
ids: vec![
Atom::from(format!("{}lalrpop_util", prefix)),
Atom::from("ParseError"),
],
},
types: args.clone(),
});
types.error_recovery_type = TypeRepr::Nominal(NominalTypeRepr {
path: Path {
absolute: false,
ids: vec![
Atom::from(format!("{}lalrpop_util", prefix)),
Atom::from("ErrorRecovery"),
],
},
types: args,
});
types
.terminal_types
.insert(TerminalString::Error, types.error_recovery_type.clone());
types
}
pub fn add_type(&mut self, nt_id: NonterminalString, ty: TypeRepr) {
assert!(self.nonterminal_types.insert(nt_id, ty).is_none());
}
pub fn add_term_type(&mut self, term: TerminalString, ty: TypeRepr) {
assert!(self.terminal_types.insert(term, ty).is_none());
}
pub fn terminal_token_type(&self) -> &TypeRepr {
&self.terminal_token_type
}
pub fn opt_terminal_loc_type(&self) -> Option<&TypeRepr> {
self.terminal_loc_type.as_ref()
}
pub fn terminal_loc_type(&self) -> TypeRepr {
self.terminal_loc_type
.clone()
.unwrap_or_else(|| TypeRepr::Tuple(vec![]))
}
pub fn error_type(&self) -> TypeRepr {
self.error_type.clone().unwrap_or_else(|| TypeRepr::Ref {
lifetime: Some(Atom::from("'static")),
mutable: false,
referent: Box::new(TypeRepr::str()),
})
}
pub fn terminal_type(&self, id: &TerminalString) -> &TypeRepr {
self.terminal_types
.get(&id)
.unwrap_or(&self.terminal_token_type)
}
pub fn terminal_types(&self) -> Vec<TypeRepr> {
self.terminal_types.values().cloned().collect()
}
pub fn lookup_nonterminal_type(&self, id: &NonterminalString) -> Option<&TypeRepr> {
self.nonterminal_types.get(&id)
}
pub fn nonterminal_type(&self, id: &NonterminalString) -> &TypeRepr {
&self.nonterminal_types[&id]
}
pub fn nonterminal_types(&self) -> Vec<TypeRepr> {
self.nonterminal_types.values().cloned().collect()
}
pub fn parse_error_type(&self) -> &TypeRepr {
&self.parse_error_type
}
pub fn error_recovery_type(&self) -> &TypeRepr {
&self.error_recovery_type
}
/// Returns a type `(L, T, L)` where L is the location type and T
/// is the token type.
pub fn triple_type(&self) -> TypeRepr {
self.spanned_type(self.terminal_token_type().clone())
}
/// Returns a type `(L, T, L)` where L is the location type and T
/// is the argument.
pub fn spanned_type(&self, ty: TypeRepr) -> TypeRepr {
let location_type = self.terminal_loc_type();
TypeRepr::Tuple(vec![location_type.clone(), ty, location_type])
}
}
impl Display for Parameter {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
write!(fmt, "{}: {}", self.name, self.ty)
}
}
impl Display for TypeRepr {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
match *self {
TypeRepr::Tuple(ref types) => write!(fmt, "({})", Sep(", ", types)),
TypeRepr::Nominal(ref data) => write!(fmt, "{}", data),
TypeRepr::Associated {
ref type_parameter,
ref id,
} => write!(fmt, "{}::{}", type_parameter, id),
TypeRepr::Lifetime(ref id) => write!(fmt, "{}", id),
TypeRepr::Ref {
lifetime: None,
mutable: false,
ref referent,
} => write!(fmt, "&{}", referent),
TypeRepr::Ref {
lifetime: Some(ref l),
mutable: false,
ref referent,
} => write!(fmt, "&{} {}", l, referent),
TypeRepr::Ref {
lifetime: None,
mutable: true,
ref referent,
} => write!(fmt, "&mut {}", referent),
TypeRepr::Ref {
lifetime: Some(ref l),
mutable: true,
ref referent,
} => write!(fmt, "&{} mut {}", l, referent),
}
}
}
impl Debug for TypeRepr {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
Display::fmt(self, fmt)
}
}
impl Display for NominalTypeRepr {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
if self.types.len() == 0 {
write!(fmt, "{}", self.path)
} else {
write!(fmt, "{}<{}>", self.path, Sep(", ", &self.types))
}
}
}
impl Debug for NominalTypeRepr {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
Display::fmt(self, fmt)
}
}
#[derive(Copy, Clone, Debug, Hash, PartialOrd, Ord, PartialEq, Eq)]
pub struct ActionFn(u32);
impl ActionFn {
pub fn new(x: usize) -> ActionFn {
ActionFn(x as u32)
}
pub fn index(&self) -> usize {
self.0 as usize
}
}
impl Symbol {
pub fn is_terminal(&self) -> bool {
match *self {
Symbol::Terminal(..) => true,
Symbol::Nonterminal(..) => false,
}
}
pub fn ty<'ty>(&self, t: &'ty Types) -> &'ty TypeRepr {
match *self {
Symbol::Terminal(ref id) => t.terminal_type(id),
Symbol::Nonterminal(ref id) => t.nonterminal_type(id),
}
}
}
impl Display for Symbol {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
match *self {
Symbol::Nonterminal(ref id) => write!(fmt, "{}", id.clone()),
Symbol::Terminal(ref id) => write!(fmt, "{}", id.clone()),
}
}
}
impl Debug for Symbol {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
Display::fmt(self, fmt)
}
}
impl Into<Box<Content>> for Symbol {
fn into(self) -> Box<Content> {
match self {
Symbol::Nonterminal(nt) => nt.into(),
Symbol::Terminal(term) => term.into(),
}
}
}
impl Debug for Production {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
write!(
fmt,
"{} = {} => {:?};",
self.nonterminal,
Sep(", ", &self.symbols),
self.action,
)
}
}
impl Debug for ActionFnDefn {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
write!(fmt, "{}", self.to_fn_string("_"))
}
}
impl ActionFnDefn {
fn to_fn_string(&self, name: &str) -> String {
match self.kind {
ActionFnDefnKind::User(ref data) => data.to_fn_string(self, name),
ActionFnDefnKind::Inline(ref data) => data.to_fn_string(name),
ActionFnDefnKind::Lookaround(ref data) => format!("{:?}", data),
}
}
}
impl UserActionFnDefn {
fn to_fn_string(&self, defn: &ActionFnDefn, name: &str) -> String {
let arg_strings: Vec<String> = self.arg_patterns
.iter()
.zip(self.arg_types.iter())
.map(|(p, t)| format!("{}: {}", p, t))
.collect();
format!(
"fn {}({}) -> {} {{ {} }}",
name,
Sep(", ", &arg_strings),
defn.ret_type,
self.code,
)
}
}
impl InlineActionFnDefn {
fn to_fn_string(&self, name: &str) -> String {
let arg_strings: Vec<String> = self.symbols
.iter()
.map(|inline_sym| match *inline_sym {
InlinedSymbol::Original(ref s) => format!("{}", s),
InlinedSymbol::Inlined(a, ref s) => format!("{:?}({})", a, Sep(", ", s)),
})
.collect();
format!(
"fn {}(..) {{ {:?}({}) }}",
name,
self.action,
Sep(", ", &arg_strings),
)
}
}
impl Grammar {
pub fn pattern(&self, t: &TerminalString) -> &Pattern<TypeRepr> {
&self.conversions[t]
}
pub fn productions_for(&self, nonterminal: &NonterminalString) -> &[Production] {
match self.nonterminals.get(nonterminal) {
Some(v) => &v.productions[..],
None => &[], // this...probably shouldn't happen actually?
}
}
pub fn user_parameter_refs(&self) -> String {
let mut result = String::new();
for parameter in &self.parameters {
result.push_str(&format!("{}, ", parameter.name));
}
result
}
pub fn action_is_fallible(&self, f: ActionFn) -> bool {
self.action_fn_defns[f.index()].fallible
}
pub fn non_lifetime_type_parameters(&self) -> Vec<&TypeParameter> {
self.type_parameters
.iter()
.filter(|&tp| match *tp {
TypeParameter::Lifetime(_) => false,
TypeParameter::Id(_) => true,
})
.collect()
}
}
impl Default for Algorithm {
fn default() -> Self {
Algorithm {
lalr: false,
codegen: LrCodeGeneration::TableDriven,
}
}
}

View File

@ -0,0 +1,28 @@
use std::collections::HashMap;
use grammar::parse_tree::TypeRef;
use string_cache::DefaultAtom as Atom;
#[cfg(test)]
mod test;
pub struct TokenDefinition {
// if the enum type is `foo::bar::baz<X,Y>` then:
enum_type: TypeRef,
// map from a custom string, like `"("` to a variant name like LPAREN
token_map: HashMap<Atom, Atom>,
}
impl TokenDefinition {
pub fn new(enum_type: TypeRef, token_map: Vec<(Atom, Atom)>) -> TokenDefinition {
TokenDefinition {
enum_type: enum_type,
token_map: token_map.into_iter().collect(),
}
}
pub fn enum_type(&self) -> &TypeRef {
&self.enum_type
}
}

View File

@ -0,0 +1 @@

View File

@ -0,0 +1,41 @@
use std::collections::VecDeque;
use std::fmt::Debug;
use std::hash::Hash;
use collections::{map, Map};
pub struct KernelSet<K: Kernel> {
counter: usize,
kernels: VecDeque<K>,
map: Map<K, K::Index>,
}
pub trait Kernel: Clone + Debug + Hash + Eq + PartialOrd + Ord {
type Index: Copy + Debug;
fn index(c: usize) -> Self::Index;
}
impl<K: Kernel> KernelSet<K> {
pub fn new() -> KernelSet<K> {
KernelSet {
kernels: VecDeque::new(),
map: map(),
counter: 0,
}
}
pub fn add_state(&mut self, kernel: K) -> K::Index {
let kernels = &mut self.kernels;
let counter = &mut self.counter;
*self.map.entry(kernel.clone()).or_insert_with(|| {
let index = *counter;
*counter += 1;
kernels.push_back(kernel);
K::index(index)
})
}
pub fn next(&mut self) -> Option<K> {
self.kernels.pop_front()
}
}

View File

@ -0,0 +1,40 @@
use lexer::dfa::{Kind, NFAIndex, DFA, START};
pub fn interpret<'text>(dfa: &DFA, input: &'text str) -> Option<(NFAIndex, &'text str)> {
let mut longest: Option<(NFAIndex, usize)> = None;
let mut state_index = START;
for (offset, ch) in input.char_indices() {
let state = &dfa.states[state_index.0];
let target = dfa.state(state_index)
.test_edges
.iter()
.filter_map(|&(test, target)| {
if test.contains_char(ch) {
Some(target)
} else {
None
}
})
.next();
if let Some(target) = target {
state_index = target;
} else {
state_index = state.other_edge;
}
match dfa.state(state_index).kind {
Kind::Accepts(nfa) => {
longest = Some((nfa, offset + ch.len_utf8()));
}
Kind::Reject => {
break;
}
Kind::Neither => {}
}
}
longest.map(|(index, offset)| (index, &input[..offset]))
}

View File

@ -0,0 +1,326 @@
//! Constructs a DFA which picks the longest matching regular
//! expression from the input.
use collections::Set;
use kernel_set::{Kernel, KernelSet};
use std::fmt::{Debug, Display, Error, Formatter};
use std::rc::Rc;
use lexer::re;
use lexer::nfa::{self, NFAConstructionError, NFAStateIndex, Test, NFA};
#[cfg(test)]
mod test;
#[cfg(test)]
pub mod interpret;
mod overlap;
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct DFA {
pub states: Vec<State>,
}
#[derive(Copy, Clone, Debug, PartialOrd, Ord, PartialEq, Eq)]
pub struct Precedence(pub usize);
#[derive(Debug)]
pub enum DFAConstructionError {
NFAConstructionError {
index: NFAIndex,
error: NFAConstructionError,
},
/// Either of the two regexs listed could match, and they have equal
/// priority.
Ambiguity { match0: NFAIndex, match1: NFAIndex },
}
pub fn build_dfa(
regexs: &[re::Regex],
precedences: &[Precedence],
) -> Result<DFA, DFAConstructionError> {
assert_eq!(regexs.len(), precedences.len());
let nfas: Vec<_> = try! {
regexs.iter()
.enumerate()
.map(|(i, r)| match NFA::from_re(r) {
Ok(nfa) => Ok(nfa),
Err(e) => Err(DFAConstructionError::NFAConstructionError {
index: NFAIndex(i),
error: e
}),
})
.collect()
};
let builder = DFABuilder {
nfas: &nfas,
precedences: precedences.to_vec(),
};
let dfa = try!(builder.build());
Ok(dfa)
}
struct DFABuilder<'nfa> {
nfas: &'nfa [NFA],
precedences: Vec<Precedence>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct State {
item_set: DFAItemSet,
pub kind: Kind,
pub test_edges: Vec<(Test, DFAStateIndex)>,
pub other_edge: DFAStateIndex,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Kind {
Accepts(NFAIndex),
Reject,
Neither,
}
#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub struct NFAIndex(usize);
#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub struct DFAStateIndex(usize);
type DFAKernelSet = KernelSet<DFAItemSet>;
#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
struct DFAItemSet {
items: Rc<Vec<Item>>,
}
#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
struct Item {
// which regular expression?
nfa_index: NFAIndex,
// what state within the NFA are we at?
nfa_state: NFAStateIndex,
}
const START: DFAStateIndex = DFAStateIndex(0);
impl<'nfa> DFABuilder<'nfa> {
fn build(&self) -> Result<DFA, DFAConstructionError> {
let mut kernel_set = KernelSet::new();
let mut states = vec![];
let start_state_index = self.start_state(&mut kernel_set);
assert_eq!(start_state_index, START);
while let Some(item_set) = kernel_set.next() {
// collect all the specific tests we expect from any of
// the items in this state
let tests: Set<Test> = item_set
.items
.iter()
.flat_map(|&item| {
self.nfa(item)
.edges::<Test>(item.nfa_state)
.map(|edge| edge.label)
})
.collect();
let tests = overlap::remove_overlap(&tests);
// if any NFA is in an accepting state, that makes this
// DFA state an accepting state
let mut all_accepts: Vec<(Precedence, NFAIndex)> = item_set
.items
.iter()
.cloned()
.filter(|&item| self.nfa(item).is_accepting_state(item.nfa_state))
.map(|item| (self.precedences[item.nfa_index.0], item.nfa_index))
.collect();
// if all NFAs are in a rejecting state, that makes this
// DFA a rejecting state
let all_rejects: bool = item_set
.items
.iter()
.all(|&item| self.nfa(item).is_rejecting_state(item.nfa_state));
let kind = if all_rejects || item_set.items.is_empty() {
Kind::Reject
} else if all_accepts.len() == 0 {
Kind::Neither
} else if all_accepts.len() == 1 {
// accepts just one NFA, easy case
Kind::Accepts(all_accepts[0].1)
} else {
all_accepts.sort(); // sort regex with higher precedence, well, higher
let (best_priority, best_nfa) = all_accepts[all_accepts.len() - 1];
let (next_priority, next_nfa) = all_accepts[all_accepts.len() - 2];
if best_priority == next_priority {
return Err(DFAConstructionError::Ambiguity {
match0: best_nfa,
match1: next_nfa,
});
}
Kind::Accepts(best_nfa)
};
// for each specific test, find what happens if we see a
// character matching that test
let mut test_edges: Vec<(Test, DFAStateIndex)> = tests
.iter()
.map(|&test| {
let items: Vec<_> = item_set
.items
.iter()
.filter_map(|&item| self.accept_test(item, test))
.collect();
// at least one of those items should accept this test
assert!(!items.is_empty());
(test, kernel_set.add_state(self.transitive_closure(items)))
})
.collect();
test_edges.sort();
// Consider what there is some character that doesn't meet
// any of the tests. In this case, we can just ignore all
// the test edges for each of the items and just union all
// the "other" edges -- because if it were one of those
// test edges, then that transition is represented above.
let other_transitions: Vec<_> = item_set
.items
.iter()
.filter_map(|&item| self.accept_other(item))
.collect();
// we never know the full set
assert!(item_set.items.is_empty() || !other_transitions.is_empty());
let other_edge = kernel_set.add_state(self.transitive_closure(other_transitions));
let state = State {
item_set: item_set,
kind: kind,
test_edges: test_edges,
other_edge: other_edge,
};
states.push(state);
}
Ok(DFA { states: states })
}
fn start_state(&self, kernel_set: &mut DFAKernelSet) -> DFAStateIndex {
// starting state is at the beginning of all regular expressions
let items: Vec<_> = (0..self.nfas.len())
.map(|i| Item {
nfa_index: NFAIndex(i),
nfa_state: nfa::START,
})
.collect();
let item_set = self.transitive_closure(items);
kernel_set.add_state(item_set)
}
fn accept_test(&self, item: Item, test: Test) -> Option<Item> {
let nfa = self.nfa(item);
let matching_test = nfa.edges::<Test>(item.nfa_state)
.filter(|edge| edge.label.intersects(test))
.map(|edge| item.to(edge.to));
let matching_other = nfa.edges::<nfa::Other>(item.nfa_state)
.map(|edge| item.to(edge.to));
matching_test.chain(matching_other).next()
}
fn accept_other(&self, item: Item) -> Option<Item> {
let nfa = self.nfa(item);
nfa.edges::<nfa::Other>(item.nfa_state)
.map(|edge| item.to(edge.to))
.next()
}
fn transitive_closure(&self, mut items: Vec<Item>) -> DFAItemSet {
let mut observed: Set<Item> = items.iter().cloned().collect();
let mut counter = 0;
while counter < items.len() {
let item = items[counter];
let derived_states = self.nfa(item)
.edges::<nfa::Noop>(item.nfa_state)
.map(|edge| item.to(edge.to))
.filter(|&item| observed.insert(item));
items.extend(derived_states);
counter += 1;
}
items.sort();
items.dedup();
DFAItemSet {
items: Rc::new(items),
}
}
fn nfa(&self, item: Item) -> &NFA {
&self.nfas[item.nfa_index.0]
}
}
impl Kernel for DFAItemSet {
type Index = DFAStateIndex;
fn index(c: usize) -> DFAStateIndex {
DFAStateIndex(c)
}
}
impl DFA {
fn state(&self, index: DFAStateIndex) -> &State {
&self.states[index.0]
}
}
impl Item {
fn to(&self, s: NFAStateIndex) -> Item {
Item {
nfa_index: self.nfa_index,
nfa_state: s,
}
}
}
impl Debug for DFAStateIndex {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
write!(fmt, "DFA{}", self.0)
}
}
impl Display for DFAStateIndex {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
Debug::fmt(self, fmt)
}
}
impl NFAIndex {
pub fn index(&self) -> usize {
self.0
}
}
impl DFAStateIndex {
pub fn index(&self) -> usize {
self.0
}
}
impl Debug for Item {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
write!(fmt, "({:?}:{:?})", self.nfa_index, self.nfa_state)
}
}

View File

@ -0,0 +1,149 @@
//! When we are combining two NFAs, we will grab all the outgoing
//! edges from a set of nodes and wind up with a bunch of potentially
//! overlapping character ranges like:
//!
//! a-z
//! c-l
//! 0-9
//!
//! This module contains code to turn those into non-overlapping ranges like:
//!
//! a-b
//! c-l
//! m-z
//! 0-9
//!
//! Specifically, we want to ensure that the same set of characters is
//! covered when we started, and that each of the input ranges is
//! covered precisely by some set of ranges in the output.
use collections::Set;
use lexer::nfa::Test;
use std::cmp;
pub fn remove_overlap(ranges: &Set<Test>) -> Vec<Test> {
// We will do this in the dumbest possible way to start. :)
// Maintain a result vector that contains disjoint ranges. To
// insert a new range, we walk over this vector and split things
// up as we go. This algorithm is so naive as to be exponential, I
// think. Sue me.
let mut disjoint_ranges = vec![];
for &range in ranges {
add_range(range, 0, &mut disjoint_ranges);
}
// the algorithm above leaves some empty ranges in for simplicity;
// prune them out.
disjoint_ranges.retain(|r| !r.is_empty());
disjoint_ranges
}
fn add_range(range: Test, start_index: usize, disjoint_ranges: &mut Vec<Test>) {
if range.is_empty() {
return;
}
// Find first overlapping range in `disjoint_ranges`, if any.
match disjoint_ranges[start_index..]
.iter()
.position(|r| r.intersects(range))
{
Some(index) => {
let index = index + start_index;
let overlapping_range = disjoint_ranges[index];
// If the range we are trying to add already exists, we're all done.
if overlapping_range == range {
return;
}
// Otherwise, we want to create three ranges (some of which may
// be empty). e.g. imagine one range is `a-z` and the other
// is `c-l`, we want `a-b`, `c-l`, and `m-z`.
let min_min = cmp::min(range.start, overlapping_range.start);
let mid_min = cmp::max(range.start, overlapping_range.start);
let mid_max = cmp::min(range.end, overlapping_range.end);
let max_max = cmp::max(range.end, overlapping_range.end);
let low_range = Test {
start: min_min,
end: mid_min,
};
let mid_range = Test {
start: mid_min,
end: mid_max,
};
let max_range = Test {
start: mid_max,
end: max_max,
};
assert!(low_range.is_disjoint(mid_range));
assert!(low_range.is_disjoint(max_range));
assert!(mid_range.is_disjoint(max_range));
// Replace the existing range with the low range, and then
// add the mid and max ranges in. (The low range may be
// empty, but we'll prune that out later.)
disjoint_ranges[index] = low_range;
add_range(mid_range, index + 1, disjoint_ranges);
add_range(max_range, index + 1, disjoint_ranges);
}
None => {
// no overlap -- easy case.
disjoint_ranges.push(range);
}
}
}
#[cfg(test)]
macro_rules! test {
($($range:expr,)*) => {
{
use collections::set;
use lexer::nfa::Test;
use std::char;
let mut s = set();
$({ let r = $range; s.insert(Test::exclusive_range(r.start, r.end)); })*
remove_overlap(&s).into_iter()
.map(|r|
char::from_u32(r.start).unwrap() ..
char::from_u32(r.end).unwrap())
.collect::<Vec<_>>()
}
}
}
#[test]
fn alphabet() {
let result = test! {
'a' .. 'z',
'c' .. 'l',
'0' .. '9',
};
assert_eq!(result, vec!['0'..'9', 'a'..'c', 'c'..'l', 'l'..'z']);
}
#[test]
fn repeat() {
let result = test! {
'a' .. 'z',
'c' .. 'l',
'l' .. 'z',
'0' .. '9',
};
assert_eq!(result, vec!['0'..'9', 'a'..'c', 'c'..'l', 'l'..'z']);
}
#[test]
fn stagger() {
let result = test! {
'0' .. '3',
'2' .. '4',
'3' .. '5',
};
assert_eq!(result, vec!['0'..'2', '2'..'3', '3'..'4', '4'..'5']);
}

View File

@ -0,0 +1,76 @@
use lexer::dfa::{self, DFAConstructionError, NFAIndex, Precedence, DFA};
use lexer::dfa::interpret::interpret;
use lexer::re;
pub fn dfa(inputs: &[(&str, Precedence)]) -> Result<DFA, DFAConstructionError> {
let regexs: Result<Vec<_>, _> = inputs.iter().map(|&(s, _)| re::parse_regex(s)).collect();
let regexs = match regexs {
Ok(rs) => rs,
Err(_) => panic!("unexpected parse error"),
};
let precedences: Vec<_> = inputs.iter().map(|&(_, p)| p).collect();
dfa::build_dfa(&regexs, &precedences)
}
const P1: Precedence = Precedence(1);
const P0: Precedence = Precedence(0);
#[test]
fn tokenizer() {
let dfa = dfa(&[
/* 0 */ (r#"class"#, P1),
/* 1 */ (r#"[a-zA-Z_][a-zA-Z0-9_]*"#, P0),
/* 2 */ (r#"[0-9]+"#, P0),
/* 3 */ (r#" +"#, P0),
/* 4 */ (r#">>"#, P0),
/* 5 */ (r#">"#, P0),
]).unwrap();
assert_eq!(interpret(&dfa, "class Foo"), Some((NFAIndex(0), "class")));
assert_eq!(interpret(&dfa, "classz Foo"), Some((NFAIndex(1), "classz")));
assert_eq!(interpret(&dfa, "123"), Some((NFAIndex(2), "123")));
assert_eq!(interpret(&dfa, " classz Foo"), Some((NFAIndex(3), " ")));
assert_eq!(interpret(&dfa, ">"), Some((NFAIndex(5), ">")));
assert_eq!(interpret(&dfa, ">>"), Some((NFAIndex(4), ">>")));
}
#[test]
fn ambiguous_regex() {
// here the keyword and the regex have same precedence, so we have
// an ambiguity
assert!(dfa(&[(r#"class"#, P0), (r#"[a-zA-Z_][a-zA-Z0-9_]*"#, P0)]).is_err());
}
#[test]
fn issue_32() {
assert!(dfa(&[(r#"."#, P0)]).is_ok());
}
#[test]
fn issue_35() {
assert!(dfa(&[(r#".*"#, P0), (r"[-+]?[0-9]*\.?[0-9]+", P0)]).is_err());
}
#[test]
fn alternatives() {
let dfa = dfa(&[(r#"abc|abd"#, P0)]).unwrap();
assert_eq!(interpret(&dfa, "abc"), Some((NFAIndex(0), "abc")));
assert_eq!(interpret(&dfa, "abd"), Some((NFAIndex(0), "abd")));
assert_eq!(interpret(&dfa, "123"), None);
}
#[test]
fn alternatives_extension() {
let dfa = dfa(&[(r#"abc|abcd"#, P0)]).unwrap();
assert_eq!(interpret(&dfa, "abc"), Some((NFAIndex(0), "abc")));
assert_eq!(interpret(&dfa, "abcd"), Some((NFAIndex(0), "abcd")));
assert_eq!(interpret(&dfa, "123"), None);
}
#[test]
fn alternatives_contraction() {
let dfa = dfa(&[(r#"abcd|abc"#, P0)]).unwrap();
assert_eq!(interpret(&dfa, "abc"), Some((NFAIndex(0), "abc")));
assert_eq!(interpret(&dfa, "abcd"), Some((NFAIndex(0), "abcd")));
assert_eq!(interpret(&dfa, "123"), None);
}

View File

@ -0,0 +1,293 @@
/*!
Generates an iterator type `Matcher` that looks roughly like
```ignore
mod intern_token {
extern crate regex as regex;
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct Token<'input>(pub usize, pub &'input str);
// ~~~~~~ ~~~~~~~~~~~
// token token
// index text
// (type)
impl<'a> fmt::Display for Token<'a> { ... }
pub struct MatcherBuilder {
regex_set: regex::RegexSet,
regex_vec: Vec<regex::Regex>,
}
impl MatcherBuilder {
fn new() -> MatchBuilder { ... }
fn matcher<'input, 'builder>(&'builder self, s: &'input str) -> Matcher<'input, 'builder> { ... }
}
pub struct Matcher<'input, 'builder> {
text: &'input str,
consumed: usize,
regex_set: &'builder regex::RegexSet,
regex_vec: &'builder Vec<regex::Regex>,
}
impl Matcher<'input> {
fn tokenize(&self, text: &str) -> Option<(usize, usize)> { ... }
}
impl<'input> Iterator for Matcher<'input> {
type Item = Result<(usize, Token<'input>, usize), ParseError>;
// ~~~~~ ~~~~~~~~~~~~~ ~~~~~
// start token end
}
}
```
*/
use lexer::re;
use grammar::parse_tree::InternToken;
use grammar::repr::{Grammar, TerminalLiteral};
use rust::RustWrite;
use std::io::{self, Write};
pub fn compile<W: Write>(
grammar: &Grammar,
intern_token: &InternToken,
out: &mut RustWrite<W>,
) -> io::Result<()> {
let prefix = &grammar.prefix;
rust!(out, "#[cfg_attr(rustfmt, rustfmt_skip)]");
rust!(out, "mod {}intern_token {{", prefix);
rust!(out, "#![allow(unused_imports)]");
try!(out.write_uses("", &grammar));
rust!(out, "extern crate regex as {}regex;", prefix);
rust!(out, "use std::fmt as {}fmt;", prefix);
rust!(out, "");
rust!(
out,
"#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]"
);
rust!(out, "pub struct Token<'input>(pub usize, pub &'input str);");
rust!(out, "impl<'a> {}fmt::Display for Token<'a> {{", prefix);
rust!(
out,
"fn fmt(&self, formatter: &mut {}fmt::Formatter) -> Result<(), {}fmt::Error> {{",
prefix,
prefix
);
rust!(out, "{}fmt::Display::fmt(self.1, formatter)", prefix);
rust!(out, "}}");
rust!(out, "}}");
rust!(out, "");
rust!(out, "pub struct {}MatcherBuilder {{", prefix);
rust!(out, "regex_set: {}regex::RegexSet,", prefix);
rust!(out, "regex_vec: Vec<{}regex::Regex>,", prefix);
rust!(out, "}}");
rust!(out, "");
rust!(out, "impl {}MatcherBuilder {{", prefix);
rust!(out, "pub fn new() -> {}MatcherBuilder {{", prefix);
// create a vector of rust string literals with the text of each
// regular expression
let regex_strings: Vec<String> = {
intern_token
.match_entries
.iter()
.map(|match_entry| match match_entry.match_literal {
TerminalLiteral::Quoted(ref s) => re::parse_literal(&s),
TerminalLiteral::Regex(ref s) => re::parse_regex(&s).unwrap(),
})
.map(|regex| {
// make sure all regex are anchored at the beginning of the input
format!("^({})", regex)
})
.map(|regex_str| {
// create a rust string with text of the regex; the Debug impl
// will add quotes and escape
format!("{:?}", regex_str)
})
.collect()
};
rust!(out, "let {}strs: &[&str] = &[", prefix);
for literal in &regex_strings {
rust!(out, "{},", literal);
}
rust!(out, "];");
rust!(
out,
"let {}regex_set = {}regex::RegexSet::new({}strs).unwrap();",
prefix,
prefix,
prefix
);
rust!(out, "let {}regex_vec = vec![", prefix);
for literal in &regex_strings {
rust!(out, "{}regex::Regex::new({}).unwrap(),", prefix, literal);
}
rust!(out, "];");
rust!(
out,
"{0}MatcherBuilder {{ regex_set: {0}regex_set, regex_vec: {0}regex_vec }}",
prefix
);
rust!(out, "}}"); // fn new()
rust!(
out,
"pub fn matcher<'input, 'builder>(&'builder self, s: &'input str) \
-> {}Matcher<'input, 'builder> {{",
prefix
);
rust!(out, "{}Matcher {{", prefix);
rust!(out, "text: s,");
rust!(out, "consumed: 0,");
rust!(out, "regex_set: &self.regex_set,");
rust!(out, "regex_vec: &self.regex_vec,");
rust!(out, "}}"); // struct literal
rust!(out, "}}"); // fn matcher()
rust!(out, "}}"); // impl MatcherBuilder
rust!(out, "");
rust!(out, "pub struct {}Matcher<'input, 'builder> {{", prefix);
rust!(out, "text: &'input str,"); // remaining input
rust!(out, "consumed: usize,"); // number of chars consumed thus far
rust!(out, "regex_set: &'builder {}regex::RegexSet,", prefix);
rust!(out, "regex_vec: &'builder Vec<{}regex::Regex>,", prefix);
rust!(out, "}}");
rust!(out, "");
rust!(
out,
"impl<'input, 'builder> Iterator for {}Matcher<'input, 'builder> {{",
prefix
);
rust!(
out,
"type Item = Result<(usize, Token<'input>, usize), \
{}lalrpop_util::ParseError<usize,Token<'input>,{}>>;",
prefix,
grammar.types.error_type()
);
rust!(out, "");
rust!(out, "fn next(&mut self) -> Option<Self::Item> {{");
// start by trimming whitespace from left
rust!(out, "let {}text = self.text.trim_left();", prefix);
rust!(
out,
"let {}whitespace = self.text.len() - {}text.len();",
prefix,
prefix
);
rust!(
out,
"let {}start_offset = self.consumed + {}whitespace;",
prefix,
prefix
);
// if nothing left, return None
rust!(out, "if {}text.is_empty() {{", prefix);
rust!(out, "self.text = {}text;", prefix);
rust!(out, "self.consumed = {}start_offset;", prefix);
rust!(out, "None");
rust!(out, "}} else {{");
// otherwise, use regex-set to find list of matching tokens
rust!(
out,
"let {}matches = self.regex_set.matches({}text);",
prefix,
prefix
);
// if nothing matched, return an error
rust!(out, "if !{}matches.matched_any() {{", prefix);
rust!(
out,
"Some(Err({}lalrpop_util::ParseError::InvalidToken {{",
prefix
);
rust!(out, "location: {}start_offset,", prefix);
rust!(out, "}}))");
rust!(out, "}} else {{");
// otherwise, have to find longest, highest-priority match. We have the literals
// sorted in order of increasing precedence, so we'll iterate over them one by one,
// checking if each one matches, and remembering the longest one.
rust!(out, "let mut {}longest_match = 0;", prefix); // length of longest match
rust!(out, "let mut {}index = 0;", prefix); // index of longest match
rust!(
out,
"for {}i in 0 .. {} {{",
prefix,
intern_token.match_entries.len()
);
rust!(out, "if {}matches.matched({}i) {{", prefix, prefix);
// re-run the regex to find out how long this particular match
// was, then compare that against the longest-match so far. Note
// that the order of the tuple is carefully constructed to ensure
// that (a) we get the longest-match but (b) if two matches are
// equal, we get the largest index. This is because the indices
// are sorted in order of increasing priority, and because we know
// that indices of equal priority cannot both match (because of
// the DFA check).
rust!(
out,
"let {}match = self.regex_vec[{}i].find({}text).unwrap();",
prefix,
prefix,
prefix
);
rust!(out, "let {}len = {}match.end();", prefix, prefix);
rust!(out, "if {}len >= {}longest_match {{", prefix, prefix);
rust!(out, "{}longest_match = {}len;", prefix, prefix);
rust!(out, "{}index = {}i;", prefix, prefix);
rust!(out, "}}"); // if is longest match
rust!(out, "}}"); // if matches.matched(i)
rust!(out, "}}"); // for loop
// transform the result into the expected return value
rust!(
out,
"let {}result = &{}text[..{}longest_match];",
prefix,
prefix,
prefix
);
rust!(
out,
"let {}remaining = &{}text[{}longest_match..];",
prefix,
prefix,
prefix
);
rust!(
out,
"let {}end_offset = {}start_offset + {}longest_match;",
prefix,
prefix,
prefix
);
rust!(out, "self.text = {}remaining;", prefix);
rust!(out, "self.consumed = {}end_offset;", prefix);
rust!(
out,
"Some(Ok(({}start_offset, Token({}index, {}result), {}end_offset)))",
prefix,
prefix,
prefix,
prefix
);
rust!(out, "}}"); // else
rust!(out, "}}"); // else
rust!(out, "}}"); // fn
rust!(out, "}}"); // impl
rust!(out, "}}"); // mod
Ok(())
}

View File

@ -0,0 +1,8 @@
//! Code related to generating tokenizers.
#![allow(dead_code)] // not yet fully activated
pub mod dfa;
pub mod intern_token;
pub mod re;
pub mod nfa;

View File

@ -0,0 +1,72 @@
//! A depth-first interpreter for NFAs.
use lexer::nfa::{NFAStateIndex, Noop, Other, StateKind, Test, NFA, START};
use std::cmp::max;
/// Interpret `nfa` applied to `test`, returning the longest matching
/// string that we can find (if any).
pub fn interpret<'text>(nfa: &NFA, text: &'text str) -> Option<&'text str> {
let mut longest: Option<usize> = None;
let mut stack: Vec<(NFAStateIndex, usize)> = vec![(START, 0)];
while let Some((state, offset)) = stack.pop() {
match nfa.kind(state) {
StateKind::Accept => match longest {
None => longest = Some(offset),
Some(o) => longest = Some(max(o, offset)),
},
StateKind::Reject => {
// the rejection state is a dead-end
continue;
}
StateKind::Neither => {}
}
// transition the no-op edges, to start
for edge in nfa.edges::<Noop>(state) {
push(&mut stack, (edge.to, offset));
}
// check whether there is another character
let ch = match text[offset..].chars().next() {
Some(ch) => ch, // yep
None => {
continue;
} // nope
};
let offset1 = offset + ch.len_utf8();
// transition test edges
let mut tests = 0;
for edge in nfa.edges::<Test>(state) {
if edge.label.contains_char(ch) {
push(&mut stack, (edge.to, offset1));
tests += 1;
}
}
// should *never* match more than one test, because tests
// ought to be disjoint
assert!(tests <= 1);
// if no tests passed, use the "Other" edge
if tests == 0 {
for edge in nfa.edges::<Other>(state) {
push(&mut stack, (edge.to, offset1));
tests += 1;
}
// should *never* have more than one "otherwise" edge
assert!(tests <= 1);
}
}
longest.map(|offset| &text[..offset])
}
fn push<T: Eq>(v: &mut Vec<T>, t: T) {
if !v.contains(&t) {
v.push(t);
}
}

View File

@ -0,0 +1,637 @@
//! The NFA we construct for each regex. Since the states are not
//! really of interest, we represent this just as a vector of labeled
//! edges.
use lexer::re::Regex;
use regex_syntax::{ClassRange, Expr, Repeater};
use std::char;
use std::fmt::{Debug, Error as FmtError, Formatter};
use std::usize;
#[cfg(test)]
mod interpret;
#[cfg(test)]
mod test;
#[derive(Debug)]
pub struct NFA {
states: Vec<State>,
edges: Edges,
}
/// An edge label representing a range of characters, inclusive. Note
/// that this range may contain some endpoints that are not valid
/// unicode, hence we store u32.
#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub struct Test {
pub start: u32,
pub end: u32,
}
/// An "epsilon" edge -- no input
#[derive(Debug, PartialEq, Eq)]
pub struct Noop;
/// An "other" edge -- fallback if no other edges apply
#[derive(Debug, PartialEq, Eq)]
pub struct Other;
/// For each state, we just store the indices of the first char and
/// test edges, or usize::MAX if no such edge. You can then find all
/// edges by enumerating subsequent edges in the vectors until you
/// find one with a different `from` value.
#[derive(Debug)]
pub struct State {
kind: StateKind,
first_noop_edge: usize,
first_test_edge: usize,
first_other_edge: usize,
}
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum StateKind {
Accept,
Reject,
Neither,
}
#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub struct NFAStateIndex(usize);
/// A set of edges for the state machine. Edges are kept sorted by the
/// type of label they have. Within a vector, all edges with the same
/// `from` are grouped together so they can be enumerated later (for
/// now we just ensure this during construction, but one could easily
/// sort).
#[derive(Debug)]
pub struct Edges {
noop_edges: Vec<Edge<Noop>>,
// edges where we are testing the character in some way; for any
// given state, there should not be multiple edges with the same
// test
test_edges: Vec<Edge<Test>>,
// fallback rules if no test_edge applies
other_edges: Vec<Edge<Other>>,
}
#[derive(PartialEq, Eq)]
pub struct Edge<L> {
pub from: NFAStateIndex,
pub label: L,
pub to: NFAStateIndex,
}
pub const ACCEPT: NFAStateIndex = NFAStateIndex(0);
pub const REJECT: NFAStateIndex = NFAStateIndex(1);
pub const START: NFAStateIndex = NFAStateIndex(2);
#[derive(Debug, PartialEq, Eq)]
pub enum NFAConstructionError {
NamedCaptures,
NonGreedy,
WordBoundary,
LineBoundary,
TextBoundary,
ByteRegex,
}
impl NFA {
pub fn from_re(regex: &Regex) -> Result<NFA, NFAConstructionError> {
let mut nfa = NFA::new();
let s0 = try!(nfa.expr(regex, ACCEPT, REJECT));
nfa.push_edge(START, Noop, s0);
Ok(nfa)
}
///////////////////////////////////////////////////////////////////////////
// Public methods for querying an NFA
pub fn edges<L: EdgeLabel>(&self, from: NFAStateIndex) -> EdgeIterator<L> {
let vec = L::vec(&self.edges);
let first = *L::first(&self.states[from.0]);
EdgeIterator {
edges: vec,
from: from,
index: first,
}
}
pub fn kind(&self, from: NFAStateIndex) -> StateKind {
self.states[from.0].kind
}
pub fn is_accepting_state(&self, from: NFAStateIndex) -> bool {
self.states[from.0].kind == StateKind::Accept
}
pub fn is_rejecting_state(&self, from: NFAStateIndex) -> bool {
self.states[from.0].kind == StateKind::Reject
}
///////////////////////////////////////////////////////////////////////////
// Private methods for building an NFA
fn new() -> NFA {
let mut nfa = NFA {
states: vec![],
edges: Edges {
noop_edges: vec![],
test_edges: vec![],
other_edges: vec![],
},
};
// reserve the ACCEPT, REJECT, and START states ahead of time
assert!(nfa.new_state(StateKind::Accept) == ACCEPT);
assert!(nfa.new_state(StateKind::Reject) == REJECT);
assert!(nfa.new_state(StateKind::Neither) == START);
// the ACCEPT state, given another token, becomes a REJECT
nfa.push_edge(ACCEPT, Other, REJECT);
// the REJECT state loops back to itself no matter what
nfa.push_edge(REJECT, Other, REJECT);
nfa
}
fn new_state(&mut self, kind: StateKind) -> NFAStateIndex {
let index = self.states.len();
// these edge indices will be patched later by patch_edges()
self.states.push(State {
kind: kind,
first_noop_edge: usize::MAX,
first_test_edge: usize::MAX,
first_other_edge: usize::MAX,
});
NFAStateIndex(index)
}
// pushes an edge: note that all outgoing edges from a particular
// state should be pushed together, so that the edge vectors are
// suitably sorted
fn push_edge<L: EdgeLabel>(&mut self, from: NFAStateIndex, label: L, to: NFAStateIndex) {
let edge_vec = L::vec_mut(&mut self.edges);
let edge_index = edge_vec.len();
edge_vec.push(Edge {
from: from,
label: label,
to: to,
});
// if this is the first edge from the `from` state, set the
// index
let first_index = L::first_mut(&mut self.states[from.0]);
if *first_index == usize::MAX {
*first_index = edge_index;
} else {
// otherwise, check that all edges are continuous
assert_eq!(edge_vec[edge_index - 1].from, from);
}
}
fn expr(
&mut self,
expr: &Expr,
accept: NFAStateIndex,
reject: NFAStateIndex,
) -> Result<NFAStateIndex, NFAConstructionError> {
match *expr {
Expr::Empty => Ok(accept),
Expr::Literal { ref chars, casei } => {
// for e.g. "abc":
// [s0] -a-> [ ] -b-> [ ] -c-> [accept]
// | | |
// +--------+--------+--otherwise-> [reject]
Ok(if casei {
chars.iter().rev().fold(accept, |s, &ch| {
let s1 = self.new_state(StateKind::Neither);
for ch1 in ch.to_lowercase().chain(ch.to_uppercase()) {
self.push_edge(s1, Test::char(ch1), s);
}
self.push_edge(s1, Other, reject);
s1
})
} else {
chars.iter().rev().fold(accept, |s, &ch| {
let s1 = self.new_state(StateKind::Neither);
self.push_edge(s1, Test::char(ch), s);
self.push_edge(s1, Other, reject);
s1
})
})
}
Expr::AnyCharNoNL => {
// [s0] -otherwise-> [accept]
// |
// '\n' etc
// |
// v
// [reject]
let s0 = self.new_state(StateKind::Neither);
for nl_char in "\n\r".chars() {
self.push_edge(s0, Test::char(nl_char), reject);
}
self.push_edge(s0, Other, accept);
Ok(s0)
}
Expr::AnyChar => {
// [s0] -otherwise-> [accept]
let s0 = self.new_state(StateKind::Neither);
self.push_edge(s0, Other, accept);
Ok(s0)
}
Expr::Class(ref class) => {
// [s0] --c0--> [accept]
// | | ^
// | | ... |
// | | |
// | +---cn-------+
// +---------------> [reject]
let s0 = self.new_state(StateKind::Neither);
for &range in class {
let test: Test = range.into();
self.push_edge(s0, test, accept);
}
self.push_edge(s0, Other, reject);
Ok(s0)
}
// currently we don't support any boundaries because
// I was too lazy to code them up or think about them
Expr::StartLine | Expr::EndLine => Err(NFAConstructionError::LineBoundary),
Expr::StartText | Expr::EndText => Err(NFAConstructionError::TextBoundary),
Expr::WordBoundaryAscii
| Expr::NotWordBoundaryAscii
| Expr::WordBoundary
| Expr::NotWordBoundary => Err(NFAConstructionError::WordBoundary),
// currently we treat all groups the same, whether they
// capture or not; but we don't permit named groups,
// in case we want to give them significance in the future
Expr::Group {
ref e,
i: _,
name: None,
} => self.expr(e, accept, reject),
Expr::Group { name: Some(_), .. } => Err(NFAConstructionError::NamedCaptures),
// currently we always report the longest match possible
Expr::Repeat { greedy: false, .. } => Err(NFAConstructionError::NonGreedy),
Expr::Repeat {
ref e,
r: Repeater::ZeroOrOne,
greedy: true,
} => self.optional_expr(e, accept, reject),
Expr::Repeat {
ref e,
r: Repeater::ZeroOrMore,
greedy: true,
} => self.star_expr(e, accept, reject),
Expr::Repeat {
ref e,
r: Repeater::OneOrMore,
greedy: true,
} => self.plus_expr(e, accept, reject),
Expr::Repeat {
ref e,
r: Repeater::Range { min, max: None },
greedy: true,
} => {
// +---min times----+
// | |
//
// [s0] --..e..-- [s1] --..e*..--> [accept]
// | |
// | v
// +-> [reject]
let mut s = try!(self.star_expr(e, accept, reject));
for _ in 0..min {
s = try!(self.expr(e, s, reject));
}
Ok(s)
}
Expr::Repeat {
ref e,
r:
Repeater::Range {
min,
max: Some(max),
},
greedy: true,
} => {
let mut s = accept;
for _ in min..max {
s = try!(self.optional_expr(e, s, reject));
}
for _ in 0..min {
s = try!(self.expr(e, s, reject));
}
Ok(s)
}
Expr::Concat(ref exprs) => {
let mut s = accept;
for expr in exprs.iter().rev() {
s = try!(self.expr(expr, s, reject));
}
Ok(s)
}
Expr::Alternate(ref exprs) => {
// [s0] --exprs[0]--> [accept/reject]
// | ^
// | |
// +----exprs[..]------+
// | |
// | |
// +----exprs[n-1]-----+
let s0 = self.new_state(StateKind::Neither);
let targets: Vec<_> = try!(
exprs
.iter()
.map(|expr| self.expr(expr, accept, reject))
.collect()
);
// push edges from s0 all together so they are
// adjacant in the edge array
for target in targets {
self.push_edge(s0, Noop, target);
}
Ok(s0)
}
// If we ever support byte regexs, these
// can be merged in with the cases above.
Expr::AnyByte | Expr::AnyByteNoNL | Expr::ClassBytes(_) | Expr::LiteralBytes { .. } => {
Err(NFAConstructionError::ByteRegex)
}
}
}
fn optional_expr(
&mut self,
expr: &Expr,
accept: NFAStateIndex,
reject: NFAStateIndex,
) -> Result<NFAStateIndex, NFAConstructionError> {
// [s0] ----> [accept]
// | ^
// v |
// [s1] --...----+
// |
// v
// [reject]
let s1 = try!(self.expr(expr, accept, reject));
let s0 = self.new_state(StateKind::Neither);
self.push_edge(s0, Noop, accept); // they might supply nothing
self.push_edge(s0, Noop, s1);
Ok(s0)
}
fn star_expr(
&mut self,
expr: &Expr,
accept: NFAStateIndex,
reject: NFAStateIndex,
) -> Result<NFAStateIndex, NFAConstructionError> {
// [s0] ----> [accept]
// | ^
// | |
// | +----------+
// v |
// [s1] --...----+
// |
// v
// [reject]
let s0 = self.new_state(StateKind::Neither);
let s1 = try!(self.expr(expr, s0, reject));
self.push_edge(s0, Noop, accept);
self.push_edge(s0, Noop, s1);
Ok(s0)
}
fn plus_expr(
&mut self,
expr: &Expr,
accept: NFAStateIndex,
reject: NFAStateIndex,
) -> Result<NFAStateIndex, NFAConstructionError> {
// [accept]
// ^
// |
// +----------+
// v |
// [s0] --...--[s1]
// |
// v
// [reject]
let s1 = self.new_state(StateKind::Neither);
let s0 = try!(self.expr(expr, s1, reject));
self.push_edge(s1, Noop, accept);
self.push_edge(s1, Noop, s0);
Ok(s0)
}
}
pub trait EdgeLabel: Sized {
fn vec_mut(nfa: &mut Edges) -> &mut Vec<Edge<Self>>;
fn vec(nfa: &Edges) -> &Vec<Edge<Self>>;
fn first_mut(state: &mut State) -> &mut usize;
fn first(state: &State) -> &usize;
}
impl EdgeLabel for Noop {
fn vec_mut(nfa: &mut Edges) -> &mut Vec<Edge<Noop>> {
&mut nfa.noop_edges
}
fn first_mut(state: &mut State) -> &mut usize {
&mut state.first_noop_edge
}
fn vec(nfa: &Edges) -> &Vec<Edge<Noop>> {
&nfa.noop_edges
}
fn first(state: &State) -> &usize {
&state.first_noop_edge
}
}
impl EdgeLabel for Other {
fn vec_mut(nfa: &mut Edges) -> &mut Vec<Edge<Other>> {
&mut nfa.other_edges
}
fn first_mut(state: &mut State) -> &mut usize {
&mut state.first_other_edge
}
fn vec(nfa: &Edges) -> &Vec<Edge<Other>> {
&nfa.other_edges
}
fn first(state: &State) -> &usize {
&state.first_other_edge
}
}
impl EdgeLabel for Test {
fn vec_mut(nfa: &mut Edges) -> &mut Vec<Edge<Test>> {
&mut nfa.test_edges
}
fn first_mut(state: &mut State) -> &mut usize {
&mut state.first_test_edge
}
fn vec(nfa: &Edges) -> &Vec<Edge<Test>> {
&nfa.test_edges
}
fn first(state: &State) -> &usize {
&state.first_test_edge
}
}
pub struct EdgeIterator<'nfa, L: EdgeLabel + 'nfa> {
edges: &'nfa [Edge<L>],
from: NFAStateIndex,
index: usize,
}
impl<'nfa, L: EdgeLabel> Iterator for EdgeIterator<'nfa, L> {
type Item = &'nfa Edge<L>;
fn next(&mut self) -> Option<&'nfa Edge<L>> {
let index = self.index;
if index == usize::MAX {
return None;
}
let next_index = index + 1;
if next_index >= self.edges.len() || self.edges[next_index].from != self.from {
self.index = usize::MAX;
} else {
self.index = next_index;
}
Some(&self.edges[index])
}
}
impl Test {
pub fn char(c: char) -> Test {
let c = c as u32;
Test {
start: c,
end: c + 1,
}
}
pub fn inclusive_range(s: char, e: char) -> Test {
Test {
start: s as u32,
end: e as u32 + 1,
}
}
pub fn exclusive_range(s: char, e: char) -> Test {
Test {
start: s as u32,
end: e as u32,
}
}
pub fn is_char(self) -> bool {
self.len() == 1
}
pub fn len(self) -> u32 {
self.end - self.start
}
pub fn contains_u32(self, c: u32) -> bool {
c >= self.start && c < self.end
}
pub fn contains_char(self, c: char) -> bool {
self.contains_u32(c as u32)
}
pub fn intersects(self, r: Test) -> bool {
!self.is_empty() && !r.is_empty()
&& (self.contains_u32(r.start) || r.contains_u32(self.start))
}
pub fn is_disjoint(self, r: Test) -> bool {
!self.intersects(r)
}
pub fn is_empty(self) -> bool {
self.start == self.end
}
}
impl From<ClassRange> for Test {
fn from(range: ClassRange) -> Test {
Test::inclusive_range(range.start, range.end)
}
}
impl Debug for Test {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), FmtError> {
match (char::from_u32(self.start), char::from_u32(self.end)) {
(Some(start), Some(end)) => {
if self.is_char() {
if ".[]()?+*!".contains(start) {
write!(fmt, "\\{}", start)
} else {
write!(fmt, "{}", start)
}
} else {
write!(fmt, "[{:?}..{:?}]", start, end)
}
}
_ => write!(fmt, "[{:?}..{:?}]", self.start, self.end),
}
}
}
impl Debug for NFAStateIndex {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), FmtError> {
write!(fmt, "NFA{}", self.0)
}
}
impl<L: Debug> Debug for Edge<L> {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), FmtError> {
write!(fmt, "{:?} -{:?}-> {:?}", self.from, self.label, self.to)
}
}

View File

@ -0,0 +1,159 @@
use lexer::nfa::{NFAConstructionError, Noop, Other, StateKind, Test, NFA};
use lexer::nfa::interpret::interpret;
use lexer::re;
#[test]
fn edge_iter() {
let mut nfa = NFA::new();
let s0 = nfa.new_state(StateKind::Neither);
let s1 = nfa.new_state(StateKind::Neither);
let s2 = nfa.new_state(StateKind::Neither);
let s3 = nfa.new_state(StateKind::Neither);
nfa.push_edge(s2, Noop, s3);
nfa.push_edge(s0, Noop, s1);
nfa.push_edge(s0, Noop, s3);
nfa.push_edge(s1, Noop, s2);
// check that if we mixed up the indies between Noop/Other, we'd get wrong thing here
nfa.push_edge(s0, Other, s2);
let s0_edges: Vec<_> = nfa.edges::<Noop>(s0).map(|e| e.to).collect();
let s1_edges: Vec<_> = nfa.edges::<Noop>(s1).map(|e| e.to).collect();
let s2_edges: Vec<_> = nfa.edges::<Noop>(s2).map(|e| e.to).collect();
let s3_edges: Vec<_> = nfa.edges::<Noop>(s3).map(|e| e.to).collect();
let s0_other_edges: Vec<_> = nfa.edges::<Other>(s0).map(|e| e.to).collect();
let s0_test_edges: Vec<_> = nfa.edges::<Test>(s0).map(|e| e.to).collect();
assert_eq!(s0_edges, &[s1, s3]);
assert_eq!(s1_edges, &[s2]);
assert_eq!(s2_edges, &[s3]);
assert_eq!(s3_edges, &[]);
assert_eq!(s0_other_edges, &[s2]);
assert_eq!(s0_test_edges, &[]);
}
#[test]
fn identifier_regex() {
let ident = re::parse_regex(r#"[a-zA-Z_][a-zA-Z0-9_]*"#).unwrap();
println!("{:#?}", ident);
let nfa = NFA::from_re(&ident).unwrap();
println!("{:#?}", nfa);
assert_eq!(interpret(&nfa, "0123"), None);
assert_eq!(interpret(&nfa, "hello0123"), Some("hello0123"));
assert_eq!(interpret(&nfa, "hello0123 abc"), Some("hello0123"));
assert_eq!(interpret(&nfa, "_0123 abc"), Some("_0123"));
}
#[test]
fn regex_star_group() {
let ident = re::parse_regex(r#"(abc)*"#).unwrap();
let nfa = NFA::from_re(&ident).unwrap();
assert_eq!(interpret(&nfa, "abcabcabcab"), Some("abcabcabc"));
}
#[test]
fn regex_number() {
let num = re::parse_regex(r#"[0-9]+"#).unwrap();
let nfa = NFA::from_re(&num).unwrap();
assert_eq!(interpret(&nfa, "123"), Some("123"));
}
#[test]
fn dot_newline() {
let num = re::parse_regex(r#"."#).unwrap();
let nfa = NFA::from_re(&num).unwrap();
assert_eq!(interpret(&nfa, "\n"), None);
}
#[test]
fn max_range() {
let num = re::parse_regex(r#"ab{2,4}"#).unwrap();
let nfa = NFA::from_re(&num).unwrap();
assert_eq!(interpret(&nfa, "a"), None);
assert_eq!(interpret(&nfa, "ab"), None);
assert_eq!(interpret(&nfa, "abb"), Some("abb"));
assert_eq!(interpret(&nfa, "abbb"), Some("abbb"));
assert_eq!(interpret(&nfa, "abbbb"), Some("abbbb"));
assert_eq!(interpret(&nfa, "abbbbb"), Some("abbbb"));
assert_eq!(interpret(&nfa, "ac"), None);
}
#[test]
fn literal() {
let num = re::parse_regex(r#"(?i:aBCdeF)"#).unwrap();
let nfa = NFA::from_re(&num).unwrap();
assert_eq!(interpret(&nfa, "abcdef"), Some("abcdef"));
assert_eq!(interpret(&nfa, "AbcDEf"), Some("AbcDEf"));
}
// Test that uses of disallowed features trigger errors
// during NFA construction:
#[test]
fn captures() {
let num = re::parse_regex(r#"(aBCdeF)"#).unwrap();
NFA::from_re(&num).unwrap(); // captures are ok
let num = re::parse_regex(r#"(?:aBCdeF)"#).unwrap();
NFA::from_re(&num).unwrap(); // non-captures are ok
let num = re::parse_regex(r#"(?P<foo>aBCdeF)"#).unwrap(); // named captures are not
assert_eq!(
NFA::from_re(&num).unwrap_err(),
NFAConstructionError::NamedCaptures
);
}
#[test]
fn line_boundaries() {
let num = re::parse_regex(r#"^aBCdeF"#).unwrap();
assert_eq!(
NFA::from_re(&num).unwrap_err(),
NFAConstructionError::TextBoundary
);
let num = re::parse_regex(r#"aBCdeF$"#).unwrap();
assert_eq!(
NFA::from_re(&num).unwrap_err(),
NFAConstructionError::TextBoundary
);
}
#[test]
fn text_boundaries() {
let num = re::parse_regex(r#"(?m)^aBCdeF"#).unwrap();
assert_eq!(
NFA::from_re(&num).unwrap_err(),
NFAConstructionError::LineBoundary
);
let num = re::parse_regex(r#"(?m)aBCdeF$"#).unwrap();
assert_eq!(
NFA::from_re(&num).unwrap_err(),
NFAConstructionError::LineBoundary
);
}
#[test]
fn word_boundaries() {
let num = re::parse_regex(r#"\baBCdeF"#).unwrap();
assert_eq!(
NFA::from_re(&num).unwrap_err(),
NFAConstructionError::WordBoundary
);
let num = re::parse_regex(r#"aBCdeF\B"#).unwrap();
assert_eq!(
NFA::from_re(&num).unwrap_err(),
NFAConstructionError::WordBoundary
);
}
#[test]
fn issue_101() {
let num = re::parse_regex(r#"(1|0?)"#).unwrap();
NFA::from_re(&num).unwrap();
}

View File

@ -0,0 +1,23 @@
//! A parser and representation of regular expressions.
use regex_syntax::{self, Error, Expr};
#[cfg(test)]
mod test;
pub type Regex = Expr;
pub type RegexError = Error;
/// Convert a string literal into a parsed regular expression.
pub fn parse_literal(s: &str) -> Regex {
match parse_regex(&regex_syntax::escape(s)) {
Ok(v) => v,
Err(_) => panic!("failed to parse literal regular expression"),
}
}
/// Parse a regular expression like `a+` etc.
pub fn parse_regex(s: &str) -> Result<Regex, RegexError> {
let expr = try!(Expr::parse(s));
Ok(expr)
}

View File

@ -0,0 +1,11 @@
use super::*;
#[test]
fn parse_unclosed_group() {
parse_regex(r"(123").unwrap_err();
}
#[test]
fn alt_oom() {
parse_regex(r"(%%|[^%])+").unwrap();
}

View File

@ -0,0 +1,56 @@
// Need this for rusty_peg
#![recursion_limit = "256"]
// I hate this lint.
#![allow(unused_parens)]
// The builtin tests don't cover the CLI and so forth, and it's just
// too darn annoying to try and make them do so.
#![cfg_attr(test, allow(dead_code))]
extern crate ascii_canvas;
extern crate atty;
extern crate bit_set;
extern crate diff;
extern crate ena;
extern crate itertools;
extern crate lalrpop_util;
extern crate petgraph;
extern crate regex;
extern crate regex_syntax;
extern crate string_cache;
extern crate term;
extern crate unicode_xid;
#[cfg(test)]
extern crate rand;
// hoist the modules that define macros up earlier
#[macro_use]
mod rust;
#[macro_use]
mod log;
mod api;
mod build;
mod collections;
mod file_text;
mod grammar;
mod lexer;
mod lr1;
mod message;
mod normalize;
mod parser;
mod kernel_set;
mod session;
mod tls;
mod tok;
mod util;
#[cfg(test)]
mod generate;
#[cfg(test)]
mod test_util;
pub use api::Configuration;
pub use api::process_root;
pub use api::process_root_unconditionally;
use ascii_canvas::style;

View File

@ -0,0 +1,64 @@
#[derive(Clone)]
pub struct Log {
level: Level,
}
#[derive(Clone, PartialOrd, Ord, PartialEq, Eq)]
pub enum Level {
/// No updates unless an error arises.
Taciturn,
/// Timing and minimal progress.
Informative,
/// More details, but still stuff an end-user is likely to understand.
Verbose,
/// Everything you could ever want and then some more.
Debug,
}
impl Log {
pub fn new(level: Level) -> Log {
Log { level: level }
}
pub fn set_level(&mut self, level: Level) {
self.level = level;
}
pub fn log<M>(&self, level: Level, message: M)
where
M: FnOnce() -> String,
{
if self.level >= level {
println!("{}", message());
}
}
}
macro_rules! log {
($session:expr, $level:ident, $($args:expr),*) => {
$session.log(::log::Level::$level, || ::std::fmt::format(format_args!($($args),*)))
}
}
macro_rules! debug {
($($args:expr),*) => {
log!(::tls::Tls::session(), Debug, $($args),*)
}
}
macro_rules! profile {
($session:expr, $phase_name:expr, $action:expr) => {
{
log!($session, Verbose, "Phase `{}` begun", $phase_name);
let time_stamp = ::std::time::Instant::now();
let result = $action;
let elapsed = time_stamp.elapsed();
log!($session, Verbose, "Phase `{}` completed in {} seconds",
$phase_name, elapsed.as_secs() as f64 + elapsed.subsec_nanos() as f64 / 1000_000_000.0);
result
}
}
}

View File

@ -0,0 +1,357 @@
//! LR(1) state construction algorithm.
use collections::{map, Multimap};
use kernel_set;
use grammar::repr::*;
use lr1::core::*;
use lr1::first;
use lr1::lane_table::*;
use lr1::lookahead::*;
use std::rc::Rc;
use std::env;
use tls::Tls;
#[cfg(test)]
mod test;
fn build_lr1_states_legacy<'grammar>(
grammar: &'grammar Grammar,
start: NonterminalString,
) -> LR1Result<'grammar> {
let eof = TokenSet::eof();
let mut lr1: LR<'grammar, TokenSet> = LR::new(grammar, start, eof);
lr1.set_permit_early_stop(true);
lr1.build_states()
}
type ConstructionFunction<'grammar> =
fn(&'grammar Grammar, NonterminalString) -> LR1Result<'grammar>;
pub fn use_lane_table() -> bool {
match env::var("LALRPOP_LANE_TABLE") {
Ok(ref s) => s != "disabled",
_ => true,
}
}
pub fn build_lr1_states<'grammar>(
grammar: &'grammar Grammar,
start: NonterminalString,
) -> LR1Result<'grammar> {
let (method_name, method_fn) = if use_lane_table() {
("lane", build_lane_table_states as ConstructionFunction)
} else {
("legacy", build_lr1_states_legacy as ConstructionFunction)
};
profile! {
&Tls::session(),
format!("LR(1) state construction ({})", method_name),
{
method_fn(grammar, start)
}
}
}
pub fn build_lr0_states<'grammar>(
grammar: &'grammar Grammar,
start: NonterminalString,
) -> Result<Vec<LR0State<'grammar>>, LR0TableConstructionError<'grammar>> {
let lr1 = LR::new(grammar, start, Nil);
lr1.build_states()
}
pub struct LR<'grammar, L: LookaheadBuild> {
grammar: &'grammar Grammar,
first_sets: first::FirstSets,
start_nt: NonterminalString,
start_lookahead: L,
permit_early_stop: bool,
}
impl<'grammar, L: LookaheadBuild> LR<'grammar, L> {
fn new(grammar: &'grammar Grammar, start_nt: NonterminalString, start_lookahead: L) -> Self {
LR {
grammar: grammar,
first_sets: first::FirstSets::new(grammar),
start_nt: start_nt,
start_lookahead: start_lookahead,
permit_early_stop: false,
}
}
fn set_permit_early_stop(&mut self, v: bool) {
self.permit_early_stop = v;
}
fn build_states(&self) -> Result<Vec<State<'grammar, L>>, TableConstructionError<'grammar, L>> {
let session = Tls::session();
let mut kernel_set = kernel_set::KernelSet::new();
let mut states = vec![];
let mut conflicts = vec![];
// create the starting state
kernel_set.add_state(Kernel::start(self.items(
&self.start_nt,
0,
&self.start_lookahead,
)));
while let Some(Kernel { items: seed_items }) = kernel_set.next() {
let items = self.transitive_closure(seed_items);
let index = StateIndex(states.len());
if index.0 % 5000 == 0 && index.0 > 0 {
log!(session, Verbose, "{} states created so far.", index.0);
}
let mut this_state = State {
index: index,
items: items.clone(),
shifts: map(),
reductions: vec![],
gotos: map(),
};
// group the items that we can transition into by shifting
// over a term or nonterm
let transitions: Multimap<Symbol, Multimap<LR0Item<'grammar>, L>> = items
.vec
.iter()
.filter_map(|item| item.shifted_item())
.map(
|(
symbol,
Item {
production,
index,
lookahead,
},
)| { (symbol, (Item::lr0(production, index), lookahead)) },
)
.collect();
for (symbol, shifted_items) in transitions.into_iter() {
let shifted_items: Vec<Item<'grammar, L>> = shifted_items
.into_iter()
.map(|(lr0_item, lookahead)| lr0_item.with_lookahead(lookahead))
.collect();
// Not entirely obvious: if the original set of items
// is sorted to begin with (and it is), then this new
// set of shifted items is *also* sorted. This is
// because it is produced from the old items by simply
// incrementing the index by 1.
let next_state = kernel_set.add_state(Kernel::shifted(shifted_items));
match symbol {
Symbol::Terminal(s) => {
let prev = this_state.shifts.insert(s, next_state);
assert!(prev.is_none()); // cannot have a shift/shift conflict
}
Symbol::Nonterminal(s) => {
let prev = this_state.gotos.insert(s, next_state);
assert!(prev.is_none());
}
}
}
// finally, consider the reductions
for item in items.vec.iter().filter(|i| i.can_reduce()) {
this_state
.reductions
.push((item.lookahead.clone(), item.production));
}
// check for shift-reduce conflicts (reduce-reduce detected above)
conflicts.extend(L::conflicts(&this_state));
// extract a new state
states.push(this_state);
if self.permit_early_stop && session.stop_after(conflicts.len()) {
log!(
session,
Verbose,
"{} conflicts encountered, stopping.",
conflicts.len()
);
break;
}
}
if !conflicts.is_empty() {
Err(TableConstructionError {
states: states,
conflicts: conflicts,
})
} else {
Ok(states)
}
}
fn items(&self, id: &NonterminalString, index: usize, lookahead: &L) -> Vec<Item<'grammar, L>> {
self.grammar
.productions_for(id)
.iter()
.map(|production| {
debug_assert!(index <= production.symbols.len());
Item {
production: production,
index: index,
lookahead: lookahead.clone(),
}
})
.collect()
}
// expands `state` with epsilon moves
fn transitive_closure(&self, items: Vec<Item<'grammar, L>>) -> Items<'grammar, L> {
let mut stack: Vec<LR0Item<'grammar>> = items.iter().map(|item| item.to_lr0()).collect();
let mut map: Multimap<LR0Item<'grammar>, L> = items
.into_iter()
.map(|item| (item.to_lr0(), item.lookahead))
.collect();
while let Some(item) = stack.pop() {
let lookahead = map.get(&item).unwrap().clone();
let shift_symbol = item.shift_symbol();
// Check whether this is an item where the cursor
// is resting on a non-terminal:
//
// I = ... (*) X z... [lookahead]
//
// The `nt` will be X and the `remainder` will be `z...`.
let (nt, remainder) = match shift_symbol {
None => continue, // requires a reduce
Some((Symbol::Terminal(_), _)) => {
continue; // requires a shift
}
Some((Symbol::Nonterminal(nt), remainder)) => (nt, remainder),
};
// In that case, for each production of `X`, we are also
// in a state where the cursor rests at the start of that production:
//
// X = (*) a... [lookahead']
// X = (*) b... [lookahead']
//
// Here `lookahead'` is computed based on the `remainder` and our
// `lookahead`. In LR1 at least, it is the union of:
//
// (a) FIRST(remainder)
// (b) if remainder may match epsilon, also our lookahead.
for new_item in L::epsilon_moves(self, &nt, remainder, &lookahead) {
let new_item0 = new_item.to_lr0();
if map.push(new_item0, new_item.lookahead) {
stack.push(new_item0);
}
}
}
let final_items = map.into_iter()
.map(|(lr0_item, lookahead)| lr0_item.with_lookahead(lookahead))
.collect();
Items {
vec: Rc::new(final_items),
}
}
}
/// Except for the initial state, the kernel sets always contain
/// a set of "seed" items where something has been pushed (that is,
/// index > 0). In other words, items like this:
///
/// A = ...p (*) ...
///
/// where ...p is non-empty. We now have to expand to include any
/// epsilon moves:
///
/// A = ... (*) B ...
/// B = (*) ... // added by transitive_closure algorithm
///
/// But note that the state is completely identified by its
/// kernel set: the same kernel sets always expand to the
/// same transitive closures, and different kernel sets
/// always expand to different transitive closures. The
/// first point is obvious, but the latter point follows
/// because the transitive closure algorithm only adds
/// items where `index == 0`, and hence it can never add an
/// item found in a kernel set.
#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
struct Kernel<'grammar, L: LookaheadBuild> {
items: Vec<Item<'grammar, L>>,
}
impl<'grammar, L: LookaheadBuild> Kernel<'grammar, L> {
pub fn start(items: Vec<Item<'grammar, L>>) -> Kernel<'grammar, L> {
// In start state, kernel should have only items with `index == 0`.
debug_assert!(items.iter().all(|item| item.index == 0));
Kernel { items: items }
}
pub fn shifted(items: Vec<Item<'grammar, L>>) -> Kernel<'grammar, L> {
// Assert that this kernel consists only of shifted items
// where `index > 0`. This assertion could cost real time to
// check so only do it in debug mode.
debug_assert!(items.iter().all(|item| item.index > 0));
Kernel { items: items }
}
}
impl<'grammar, L: LookaheadBuild> kernel_set::Kernel for Kernel<'grammar, L> {
type Index = StateIndex;
fn index(c: usize) -> StateIndex {
StateIndex(c)
}
}
pub trait LookaheadBuild: Lookahead {
// Given that there exists an item
//
// X = ... (*) Y ...s [L]
//
// where `nt` is `Y`, `remainder` is `...s`, and `lookahead` is
// `L`, computes the new items resulting from epsilon moves (if
// any). The technique of doing this will depend on the amount of
// lookahead.
//
// For example, if we have an LR0 item, then for each `Y = ...`
// production, we just add an `Y = (*) ...` item. But for LR1
// items, we have to add multiple items where we consider the
// lookahead from `FIRST(...s, L)`.
fn epsilon_moves<'grammar>(
lr: &LR<'grammar, Self>,
nt: &NonterminalString,
remainder: &[Symbol],
lookahead: &Self,
) -> Vec<Item<'grammar, Self>>;
}
impl LookaheadBuild for Nil {
fn epsilon_moves<'grammar>(
lr: &LR<'grammar, Self>,
nt: &NonterminalString,
_remainder: &[Symbol],
lookahead: &Nil,
) -> Vec<LR0Item<'grammar>> {
lr.items(nt, 0, &lookahead)
}
}
impl LookaheadBuild for TokenSet {
fn epsilon_moves<'grammar>(
lr: &LR<'grammar, Self>,
nt: &NonterminalString,
remainder: &[Symbol],
lookahead: &Self,
) -> Vec<LR1Item<'grammar>> {
let first_set = lr.first_sets.first1(remainder, lookahead);
lr.items(nt, 0, &first_set)
}
}

View File

@ -0,0 +1,354 @@
use string_cache::DefaultAtom as Atom;
use generate;
use grammar::repr::*;
use test_util::{compare, expect_debug, normalized_grammar};
use lr1::core::*;
use lr1::interpret::interpret;
use lr1::lookahead::Token;
use lr1::lookahead::Token::EOF;
use lr1::lookahead::TokenSet;
use lr1::tls::Lr1Tls;
use tls::Tls;
use super::{use_lane_table, build_lr0_states, build_lr1_states, LR};
fn nt(t: &str) -> NonterminalString {
NonterminalString(Atom::from(t))
}
const ITERATIONS: usize = 22;
fn random_test<'g>(grammar: &Grammar, states: &'g [LR1State<'g>], start_symbol: NonterminalString) {
for i in 0..ITERATIONS {
let input_tree = generate::random_parse_tree(grammar, start_symbol.clone());
let output_tree = interpret(&states, input_tree.terminals()).unwrap();
println!("test {}", i);
println!("input_tree = {}", input_tree);
println!("output_tree = {}", output_tree);
compare(output_tree, input_tree);
}
}
macro_rules! tokens {
($($x:expr),*) => {
vec![$(TerminalString::quoted(Atom::from($x))),*]
}
}
fn items<'g>(grammar: &'g Grammar, nonterminal: &str, index: usize, la: Token) -> LR1Items<'g> {
let set = TokenSet::from(la);
let lr1: LR<TokenSet> = LR::new(&grammar, nt(nonterminal), set.clone());
let items = lr1.transitive_closure(lr1.items(&nt(nonterminal), index, &set));
items
}
#[test]
fn start_state() {
let grammar = normalized_grammar(
r#"
grammar;
extern { enum Tok { "C" => .., "D" => .. } }
A = B "C";
B: Option<u32> = {
"D" => Some(1),
() => None
};
"#,
);
let _lr1_tls = Lr1Tls::install(grammar.terminals.clone());
let items = items(&grammar, "A", 0, EOF);
expect_debug(
items.vec,
r#"[
A = (*) B "C" [EOF],
B = (*) ["C"],
B = (*) "D" ["C"]
]"#,
);
}
#[test]
fn start_state_1() {
let grammar = normalized_grammar(
r#"
grammar;
extern { enum Tok { "B1" => .., "C1" => .. } }
A = B C;
B: Option<u32> = {
"B1" => Some(1),
() => None
};
C: Option<u32> = {
"C1" => Some(1),
() => None
};
"#,
);
let _lr1_tls = Lr1Tls::install(grammar.terminals.clone());
expect_debug(
items(&grammar, "A", 0, EOF).vec,
r#"[
A = (*) B C [EOF],
B = (*) ["C1", EOF],
B = (*) "B1" ["C1", EOF]
]"#,
);
expect_debug(
items(&grammar, "A", 1, EOF).vec,
r#"[
A = B (*) C [EOF],
C = (*) [EOF],
C = (*) "C1" [EOF]
]"#,
);
}
#[test]
fn expr_grammar1() {
let _tls = Tls::test();
let grammar = normalized_grammar(
r#"
grammar;
extern { enum Tok { "-" => .., "N" => .., "(" => .., ")" => .. } }
S: () =
E => ();
E: () = {
E "-" T => (),
T => ()
};
T: () = {
"N" => (),
"(" E ")" => ()
};
"#,
);
let _lr1_tls = Lr1Tls::install(grammar.terminals.clone());
// for now, just test that process does not result in an error
// and yields expected number of states.
let states = build_lr1_states(&grammar, nt("S")).unwrap();
println!("{:#?}", states);
assert_eq!(states.len(), if use_lane_table() { 9 } else { 16 });
// execute it on some sample inputs.
let tree = interpret(&states, tokens!["N", "-", "(", "N", "-", "N", ")"]).unwrap();
assert_eq!(
&format!("{}", tree)[..],
r#"[S: [E: [E: [T: "N"]], "-", [T: "(", [E: [E: [T: "N"]], "-", [T: "N"]], ")"]]]"#
);
// incomplete:
assert!(interpret(&states, tokens!["N", "-", "(", "N", "-", "N"]).is_err());
// incomplete:
assert!(interpret(&states, tokens!["N", "-"]).is_err());
// unexpected character:
assert!(interpret(&states, tokens!["N", "-", ")", "N", "-", "N", "("]).is_err());
// parens first:
let tree = interpret(&states, tokens!["(", "N", "-", "N", ")", "-", "N"]).unwrap();
println!("{}", tree);
assert_eq!(
&format!("{}", tree)[..],
r#"[S: [E: [E: [T: "(", [E: [E: [T: "N"]], "-", [T: "N"]], ")"]], "-", [T: "N"]]]"#
);
// run some random tests
random_test(&grammar, &states, nt("S"));
}
#[test]
fn shift_reduce_conflict1() {
let _tls = Tls::test();
// This grammar gets a shift-reduce conflict because if the input
// is "&" (*) "L", then we see two possibilities, and we must decide
// between them:
//
// "&" (*) "L" E
// | | |
// +-------+--|
// |
// E
//
// or
//
// "&" (*) "L"
// | |
// | OPT_L E
// | | |
// +---+---+----+
// |
// E
//
// to some extent this may be a false conflict, in that inlined
// rules would address it, but it's an interesting one for
// producing a useful error message.
let grammar = normalized_grammar(
r#"
grammar;
extern { enum Tok { "L" => .., "&" => .., } }
E: () = {
"L",
"&" OPT_L E
};
OPT_L: () = {
(),
"L"
};
"#,
);
let _lr1_tls = Lr1Tls::install(grammar.terminals.clone());
assert!(build_lr1_states(&grammar, nt("E")).is_err());
}
/// One of the few grammars that IS LR(0).
#[test]
fn lr0_expr_grammar_with_explicit_eof() {
let _tls = Tls::test();
let grammar = normalized_grammar(
r#"
grammar;
S: () = E "$";
E: () = {
E "-" T,
T,
};
T: () = {
"N",
"(" E ")",
};
"#,
);
let _lr1_tls = Lr1Tls::install(grammar.terminals.clone());
// for now, just test that process does not result in an error
// and yields expected number of states.
let states = build_lr0_states(&grammar, nt("S")).unwrap();
assert_eq!(states.len(), 10);
}
/// Without the artifical '$', grammar is not LR(0).
#[test]
fn lr0_expr_grammar_with_implicit_eof() {
let _tls = Tls::test();
let grammar = normalized_grammar(
r#"
grammar;
S: () = E;
E: () = {
E "-" T,
T,
};
T: () = {
"N",
"(" E ")",
};
"#,
);
let _lr1_tls = Lr1Tls::install(grammar.terminals.clone());
build_lr0_states(&grammar, nt("S")).unwrap_err();
}
/// When we moved to storing items as (lr0 -> TokenSet) pairs, a bug
/// in the transitive closure routine could cause us to have `(Foo,
/// S0)` and `(Foo, S1)` as distinct items instead of `(Foo, S0|S1)`.
#[test]
fn issue_144() {
let _tls = Tls::test();
let grammar = normalized_grammar(
r##"
grammar;
pub ForeignItem: () = {
AttrsAndVis "item_foreign_fn",
AttrsAndVis "unsafe" "item_foreign_fn",
};
AttrsAndVis: () = {
MaybeOuterAttrs visibility,
};
MaybeOuterAttrs: () = {
OuterAttrs,
(),
};
visibility: () = {
"pub",
(),
};
OuterAttrs: () = {
OuterAttr,
OuterAttrs OuterAttr,
};
OuterAttr: () = {
"#" "[" "]",
};
Ident: () = {
"IDENT",
};
ty: () = {
"ty"
};
"##,
);
let _lr1_tls = Lr1Tls::install(grammar.terminals.clone());
build_lr1_states(&grammar, nt("ForeignItem")).unwrap();
}
// Not sure if this is the right spot
#[test]
fn match_grammar() {
let _tls = Tls::test();
let grammar = normalized_grammar(
r#"
grammar;
match {
r"(?i)select" => SELECT
} else {
_
}
pub Query = SELECT r"[a-z]+";
"#,
);
let _lr1_tls = Lr1Tls::install(grammar.terminals.clone());
let states = build_lr0_states(&grammar, nt("Query")).expect("build states");
println!("states: {:?}", states);
}

View File

@ -0,0 +1,163 @@
//! Mega naive LALR(1) generation algorithm.
use collections::{map, Map, Multimap};
use itertools::Itertools;
use lr1::build;
use lr1::core::*;
use lr1::lookahead::*;
use grammar::repr::*;
use std::rc::Rc;
use std::mem;
use tls::Tls;
#[cfg(test)]
mod test;
// Intermediate LALR(1) state. Identical to an LR(1) state, but that
// the items can be pushed to. We initially create these with an empty
// set of actions, as well.
struct LALR1State<'grammar> {
pub index: StateIndex,
pub items: Vec<LR1Item<'grammar>>,
pub shifts: Map<TerminalString, StateIndex>,
pub reductions: Multimap<&'grammar Production, TokenSet>,
pub gotos: Map<NonterminalString, StateIndex>,
}
pub fn build_lalr_states<'grammar>(
grammar: &'grammar Grammar,
start: NonterminalString,
) -> LR1Result<'grammar> {
// First build the LR(1) states
let lr_states = try!(build::build_lr1_states(grammar, start));
// With lane table, there is no reason to do state collapse
// for LALR. In fact, LALR is pointless!
if build::use_lane_table() {
println!("Warning: Now that the new lane-table algorithm is the default,");
println!(" #[lalr] mode has no effect and can be removed.");
return Ok(lr_states);
}
profile! {
&Tls::session(),
"LALR(1) state collapse",
collapse_to_lalr_states(&lr_states)
}
}
pub fn collapse_to_lalr_states<'grammar>(lr_states: &[LR1State<'grammar>]) -> LR1Result<'grammar> {
// Now compress them. This vector stores, for each state, the
// LALR(1) state to which we will remap it.
let mut remap: Vec<_> = (0..lr_states.len()).map(|_| StateIndex(0)).collect();
let mut lalr1_map: Map<Vec<LR0Item>, StateIndex> = map();
let mut lalr1_states: Vec<LALR1State> = vec![];
for (lr1_index, lr1_state) in lr_states.iter().enumerate() {
let lr0_kernel: Vec<_> = lr1_state
.items
.vec
.iter()
.map(|item| item.to_lr0())
.dedup()
.collect();
let lalr1_index = *lalr1_map.entry(lr0_kernel).or_insert_with(|| {
let index = StateIndex(lalr1_states.len());
lalr1_states.push(LALR1State {
index: index,
items: vec![],
shifts: map(),
reductions: Multimap::new(),
gotos: map(),
});
index
});
lalr1_states[lalr1_index.0]
.items
.extend(lr1_state.items.vec.iter().cloned());
remap[lr1_index] = lalr1_index;
}
// The reduction process can leave us with multiple
// overlapping LR(0) items, whose lookaheads must be
// unioned. e.g. we may now have:
//
// X = "(" (*) ")" ["Foo"]
// X = "(" (*) ")" ["Bar"]
//
// which we will convert to:
//
// X = "(" (*) ")" ["Foo", "Bar"]
for lalr1_state in &mut lalr1_states {
let items = mem::replace(&mut lalr1_state.items, vec![]);
let items: Multimap<LR0Item<'grammar>, TokenSet> = items
.into_iter()
.map(
|Item {
production,
index,
lookahead,
}| { (Item::lr0(production, index), lookahead) },
)
.collect();
lalr1_state.items = items
.into_iter()
.map(|(lr0_item, lookahead)| lr0_item.with_lookahead(lookahead))
.collect();
}
// Now that items are fully built, create the actions
for (lr1_index, lr1_state) in lr_states.iter().enumerate() {
let lalr1_index = remap[lr1_index];
let lalr1_state = &mut lalr1_states[lalr1_index.0];
for (terminal, &lr1_state) in &lr1_state.shifts {
let target_state = remap[lr1_state.0];
let prev = lalr1_state.shifts.insert(terminal.clone(), target_state);
assert!(prev.unwrap_or(target_state) == target_state);
}
for (nt, lr1_state) in &lr1_state.gotos {
let target_state = remap[lr1_state.0];
let prev = lalr1_state.gotos.insert(nt.clone(), target_state);
assert!(prev.unwrap_or(target_state) == target_state); // as above
}
for &(ref token_set, production) in &lr1_state.reductions {
lalr1_state.reductions.push(production, token_set.clone());
}
}
// Finally, create the new states and detect conflicts
let lr1_states: Vec<_> = lalr1_states
.into_iter()
.map(|lr| State {
index: lr.index,
items: Items {
vec: Rc::new(lr.items),
},
shifts: lr.shifts,
reductions: lr.reductions.into_iter().map(|(p, ts)| (ts, p)).collect(),
gotos: lr.gotos,
})
.collect();
let conflicts: Vec<_> = lr1_states
.iter()
.flat_map(|s| TokenSet::conflicts(s))
.collect();
if !conflicts.is_empty() {
Err(TableConstructionError {
states: lr1_states,
conflicts: conflicts,
})
} else {
Ok(lr1_states)
}
}

View File

@ -0,0 +1,49 @@
use string_cache::DefaultAtom as Atom;
use grammar::repr::*;
use lr1::tls::Lr1Tls;
use test_util::normalized_grammar;
use tls::Tls;
use super::build_lalr_states;
use super::super::interpret::interpret;
fn nt(t: &str) -> NonterminalString {
NonterminalString(Atom::from(t))
}
macro_rules! tokens {
($($x:expr),*) => {
vec![$(TerminalString::quoted(Atom::from($x))),*]
}
}
#[test]
fn figure9_23() {
let _tls = Tls::test();
let grammar = normalized_grammar(
r#"
grammar;
extern { enum Tok { "-" => .., "N" => .., "(" => .., ")" => .. } }
S: () = E => ();
E: () = {
E "-" T => (),
T => ()
};
T: () = {
"N" => (),
"(" E ")" => ()
};
"#,
);
let _lr1_tls = Lr1Tls::install(grammar.terminals.clone());
let states = build_lalr_states(&grammar, nt("S")).unwrap();
println!("{:#?}", states);
let tree = interpret(&states, tokens!["N", "-", "(", "N", "-", "N", ")"]).unwrap();
assert_eq!(
&format!("{:?}", tree)[..],
r#"[S: [E: [E: [T: "N"]], "-", [T: "(", [E: [E: [T: "N"]], "-", [T: "N"]], ")"]]]"#
);
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,279 @@
//! Base helper routines for a code generator.
use grammar::repr::*;
use lr1::core::*;
use rust::RustWrite;
use std::io::{self, Write};
use util::Sep;
/// Base struct for various kinds of code generator. The flavor of
/// code generator is customized by supplying distinct types for `C`
/// (e.g., `self::ascent::RecursiveAscent`).
pub struct CodeGenerator<'codegen, 'grammar: 'codegen, W: Write + 'codegen, C> {
/// the complete grammar
pub grammar: &'grammar Grammar,
/// some suitable prefix to separate our identifiers from the user's
pub prefix: &'grammar str,
/// types from the grammar
pub types: &'grammar Types,
/// the start symbol S the user specified
pub user_start_symbol: NonterminalString,
/// the synthetic start symbol S' that we specified
pub start_symbol: NonterminalString,
/// the vector of states
pub states: &'codegen [LR1State<'grammar>],
/// where we write output
pub out: &'codegen mut RustWrite<W>,
/// where to find the action routines (typically `super`)
pub action_module: String,
/// custom fields for the specific kind of codegenerator
/// (recursive ascent, table-driven, etc)
pub custom: C,
pub repeatable: bool,
}
impl<'codegen, 'grammar, W: Write, C> CodeGenerator<'codegen, 'grammar, W, C> {
pub fn new(
grammar: &'grammar Grammar,
user_start_symbol: NonterminalString,
start_symbol: NonterminalString,
states: &'codegen [LR1State<'grammar>],
out: &'codegen mut RustWrite<W>,
repeatable: bool,
action_module: &str,
custom: C,
) -> Self {
CodeGenerator {
grammar: grammar,
prefix: &grammar.prefix,
types: &grammar.types,
states: states,
user_start_symbol: user_start_symbol,
start_symbol: start_symbol,
out: out,
custom: custom,
repeatable: repeatable,
action_module: action_module.to_string(),
}
}
pub fn write_parse_mod<F>(&mut self, body: F) -> io::Result<()>
where
F: FnOnce(&mut Self) -> io::Result<()>,
{
rust!(self.out, "");
rust!(self.out, "#[cfg_attr(rustfmt, rustfmt_skip)]");
rust!(self.out, "mod {}parse{} {{", self.prefix, self.start_symbol);
// these stylistic lints are annoying for the generated code,
// which doesn't follow conventions:
rust!(
self.out,
"#![allow(non_snake_case, non_camel_case_types, unused_mut, unused_variables, \
unused_imports, unused_parens)]"
);
rust!(self.out, "");
try!(self.write_uses());
try!(body(self));
rust!(self.out, "}}");
Ok(())
}
pub fn write_uses(&mut self) -> io::Result<()> {
try!(
self.out
.write_uses(&format!("{}::", self.action_module), &self.grammar)
);
if self.grammar.intern_token.is_some() {
rust!(
self.out,
"use {}::{}intern_token::Token;",
self.action_module,
self.prefix
);
} else {
rust!(
self.out,
"use {}::{}ToTriple;",
self.action_module,
self.prefix
);
}
Ok(())
}
pub fn start_parser_fn(&mut self) -> io::Result<()> {
let error_type = self.types.error_type();
let parse_error_type = self.types.parse_error_type();
let (type_parameters, parameters, mut where_clauses);
let intern_token = self.grammar.intern_token.is_some();
if intern_token {
// if we are generating the tokenizer, we just need the
// input, and that has already been added as one of the
// user parameters
type_parameters = vec![];
parameters = vec![];
where_clauses = vec![];
} else {
// otherwise, we need an iterator of type `TOKENS`
let mut user_type_parameters = String::new();
for type_parameter in &self.grammar.type_parameters {
user_type_parameters.push_str(&format!("{}, ", type_parameter));
}
type_parameters = vec![
format!(
"{}TOKEN: {}ToTriple<{}Error={}>",
self.prefix, self.prefix, user_type_parameters, error_type
),
format!(
"{}TOKENS: IntoIterator<Item={}TOKEN>",
self.prefix, self.prefix
),
];
parameters = vec![format!("{}tokens0: {}TOKENS", self.prefix, self.prefix)];
where_clauses = vec![];
if self.repeatable {
where_clauses.push(format!("{}TOKENS: Clone", self.prefix));
}
}
rust!(
self.out,
"{}struct {}Parser {{",
self.grammar.nonterminals[&self.start_symbol].visibility,
self.user_start_symbol
);
if intern_token {
rust!(
self.out,
"builder: {1}::{0}intern_token::{0}MatcherBuilder,",
self.prefix,
self.action_module
);
}
rust!(self.out, "_priv: (),");
rust!(self.out, "}}");
rust!(self.out, "");
rust!(self.out, "impl {}Parser {{", self.user_start_symbol);
rust!(
self.out,
"{}fn new() -> {}Parser {{",
self.grammar.nonterminals[&self.start_symbol].visibility,
self.user_start_symbol
);
if intern_token {
rust!(
self.out,
"let {0}builder = {1}::{0}intern_token::{0}MatcherBuilder::new();",
self.prefix,
self.action_module
);
}
rust!(self.out, "{}Parser {{", self.user_start_symbol);
if intern_token {
rust!(self.out, "builder: {}builder,", self.prefix);
}
rust!(self.out, "_priv: (),");
rust!(self.out, "}}"); // Parser
rust!(self.out, "}}"); // new()
rust!(self.out, "");
rust!(self.out, "#[allow(dead_code)]");
try!(self.out.write_fn_header(
self.grammar,
&self.grammar.nonterminals[&self.start_symbol].visibility,
"parse".to_owned(),
type_parameters,
Some("&self".to_owned()),
parameters,
format!(
"Result<{}, {}>",
self.types.nonterminal_type(&self.start_symbol),
parse_error_type
),
where_clauses
));
rust!(self.out, "{{");
Ok(())
}
pub fn define_tokens(&mut self) -> io::Result<()> {
if self.grammar.intern_token.is_some() {
// if we are generating the tokenizer, create a matcher as our input iterator
rust!(
self.out,
"let mut {}tokens = self.builder.matcher(input);",
self.prefix
);
} else {
// otherwise, convert one from the `IntoIterator`
// supplied, using the `ToTriple` trait which inserts
// errors/locations etc if none are given
let clone_call = if self.repeatable { ".clone()" } else { "" };
rust!(
self.out,
"let {}tokens = {}tokens0{}.into_iter();",
self.prefix,
self.prefix,
clone_call
);
rust!(
self.out,
"let mut {}tokens = {}tokens.map(|t| {}ToTriple::to_triple(t));",
self.prefix,
self.prefix,
self.prefix
);
}
Ok(())
}
pub fn end_parser_fn(&mut self) -> io::Result<()> {
rust!(self.out, "}}"); // fn
rust!(self.out, "}}"); // impl
Ok(())
}
/// Returns phantom data type that captures the user-declared type
/// parameters in a phantom-data. This helps with ensuring that
/// all type parameters are constrained, even if they are not
/// used.
pub fn phantom_data_type(&self) -> String {
format!(
"::std::marker::PhantomData<({})>",
Sep(", ", &self.grammar.non_lifetime_type_parameters())
)
}
/// Returns expression that captures the user-declared type
/// parameters in a phantom-data. This helps with ensuring that
/// all type parameters are constrained, even if they are not
/// used.
pub fn phantom_data_expr(&self) -> String {
format!(
"::std::marker::PhantomData::<({})>",
Sep(", ", &self.grammar.non_lifetime_type_parameters())
)
}
}

View File

@ -0,0 +1,4 @@
pub mod ascent;
mod base;
pub mod parse_table;
pub mod test_all;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,147 @@
//! A compiler from an LR(1) table to a [recursive ascent] parser.
//!
//! [recursive ascent]: https://en.wikipedia.org/wiki/Recursive_ascent_parser
use grammar::repr::{Grammar, NonterminalString, TypeParameter};
use lr1::core::*;
use rust::RustWrite;
use std::io::{self, Write};
use util::Sep;
use super::base::CodeGenerator;
pub fn compile<'grammar, W: Write>(
grammar: &'grammar Grammar,
user_start_symbol: NonterminalString,
start_symbol: NonterminalString,
states: &[LR1State<'grammar>],
out: &mut RustWrite<W>,
) -> io::Result<()> {
let mut ascent =
CodeGenerator::new_test_all(grammar, user_start_symbol, start_symbol, states, out);
ascent.write()
}
struct TestAll;
impl<'ascent, 'grammar, W: Write> CodeGenerator<'ascent, 'grammar, W, TestAll> {
fn new_test_all(
grammar: &'grammar Grammar,
user_start_symbol: NonterminalString,
start_symbol: NonterminalString,
states: &'ascent [LR1State<'grammar>],
out: &'ascent mut RustWrite<W>,
) -> Self {
CodeGenerator::new(
grammar,
user_start_symbol,
start_symbol,
states,
out,
true,
"super",
TestAll,
)
}
fn write(&mut self) -> io::Result<()> {
self.write_parse_mod(|this| {
try!(this.write_parser_fn());
rust!(this.out, "#[cfg_attr(rustfmt, rustfmt_skip)]");
rust!(this.out, "mod {}ascent {{", this.prefix);
try!(super::ascent::compile(
this.grammar,
this.user_start_symbol.clone(),
this.start_symbol.clone(),
this.states,
"super::super::super",
this.out
));
let pub_use = format!(
"{}use self::{}parse{}::{}Parser;",
this.grammar.nonterminals[&this.user_start_symbol].visibility,
this.prefix,
this.start_symbol,
this.user_start_symbol
);
rust!(this.out, "{}", pub_use);
rust!(this.out, "}}");
rust!(this.out, "#[cfg_attr(rustfmt, rustfmt_skip)]");
rust!(this.out, "mod {}parse_table {{", this.prefix);
try!(super::parse_table::compile(
this.grammar,
this.user_start_symbol.clone(),
this.start_symbol.clone(),
this.states,
"super::super::super",
this.out
));
rust!(this.out, "{}", pub_use);
rust!(this.out, "}}");
Ok(())
})
}
fn write_parser_fn(&mut self) -> io::Result<()> {
try!(self.start_parser_fn());
if self.grammar.intern_token.is_some() {
rust!(self.out, "let _ = self.builder;");
}
// parse input using both methods:
try!(self.call_delegate("ascent"));
try!(self.call_delegate("parse_table"));
// check that result is the same either way:
rust!(
self.out,
"assert_eq!({}ascent, {}parse_table);",
self.prefix,
self.prefix
);
rust!(self.out, "return {}ascent;", self.prefix);
try!(self.end_parser_fn());
Ok(())
}
fn call_delegate(&mut self, delegate: &str) -> io::Result<()> {
let non_lifetimes: Vec<_> = self.grammar
.type_parameters
.iter()
.filter(|&tp| match *tp {
TypeParameter::Lifetime(_) => false,
TypeParameter::Id(_) => true,
})
.cloned()
.collect();
let parameters = if non_lifetimes.is_empty() {
String::new()
} else {
format!("::<{}>", Sep(", ", &non_lifetimes))
};
rust!(
self.out,
"let {}{} = {}{}::{}Parser::new().parse{}(",
self.prefix,
delegate,
self.prefix,
delegate,
self.user_start_symbol,
parameters
);
for parameter in &self.grammar.parameters {
rust!(self.out, "{},", parameter.name);
}
if self.grammar.intern_token.is_none() {
rust!(self.out, "{}tokens0.clone(),", self.prefix);
}
rust!(self.out, ");");
Ok(())
}
}

View File

@ -0,0 +1,333 @@
//! Core LR(1) types.
use collections::Map;
use grammar::repr::*;
use itertools::Itertools;
use std::fmt::{Debug, Display, Error, Formatter};
use std::rc::Rc;
use util::Prefix;
use super::lookahead::*;
#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub struct Item<'grammar, L: Lookahead> {
pub production: &'grammar Production,
/// the dot comes before `index`, so `index` would be 1 for X = A (*) B C
pub index: usize,
pub lookahead: L,
}
pub type LR0Item<'grammar> = Item<'grammar, Nil>;
pub type LR1Item<'grammar> = Item<'grammar, TokenSet>;
impl<'grammar> Item<'grammar, Nil> {
pub fn lr0(production: &'grammar Production, index: usize) -> Self {
Item {
production: production,
index: index,
lookahead: Nil,
}
}
}
impl<'grammar, L: Lookahead> Item<'grammar, L> {
pub fn with_lookahead<L1: Lookahead>(&self, l: L1) -> Item<'grammar, L1> {
Item {
production: self.production,
index: self.index,
lookahead: l,
}
}
pub fn prefix(&self) -> &'grammar [Symbol] {
&self.production.symbols[..self.index]
}
pub fn symbol_sets(&self) -> SymbolSets<'grammar> {
let symbols = &self.production.symbols;
if self.can_shift() {
SymbolSets {
prefix: &symbols[..self.index],
cursor: Some(&symbols[self.index]),
suffix: &symbols[self.index + 1..],
}
} else {
SymbolSets {
prefix: &symbols[..self.index],
cursor: None,
suffix: &[],
}
}
}
pub fn to_lr0(&self) -> LR0Item<'grammar> {
Item {
production: self.production,
index: self.index,
lookahead: Nil,
}
}
pub fn can_shift(&self) -> bool {
self.index < self.production.symbols.len()
}
pub fn can_shift_nonterminal(&self, nt: &NonterminalString) -> bool {
match self.shift_symbol() {
Some((Symbol::Nonterminal(shifted), _)) => shifted == *nt,
_ => false,
}
}
pub fn can_shift_terminal(&self, term: &TerminalString) -> bool {
match self.shift_symbol() {
Some((Symbol::Terminal(shifted), _)) => shifted == *term,
_ => false,
}
}
pub fn can_reduce(&self) -> bool {
self.index == self.production.symbols.len()
}
pub fn shifted_item(&self) -> Option<(Symbol, Item<'grammar, L>)> {
if self.can_shift() {
Some((
self.production.symbols[self.index].clone(),
Item {
production: self.production,
index: self.index + 1,
lookahead: self.lookahead.clone(),
},
))
} else {
None
}
}
pub fn shift_symbol(&self) -> Option<(Symbol, &[Symbol])> {
if self.can_shift() {
Some((
self.production.symbols[self.index].clone(),
&self.production.symbols[self.index + 1..],
))
} else {
None
}
}
}
#[derive(Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub struct StateIndex(pub usize);
#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub struct Items<'grammar, L: Lookahead> {
pub vec: Rc<Vec<Item<'grammar, L>>>,
}
#[allow(dead_code)]
pub type LR0Items<'grammar> = Items<'grammar, Nil>;
#[allow(dead_code)]
pub type LR1Items<'grammar> = Items<'grammar, TokenSet>;
#[derive(Clone, Debug)]
pub struct State<'grammar, L: Lookahead> {
pub index: StateIndex,
pub items: Items<'grammar, L>,
pub shifts: Map<TerminalString, StateIndex>,
pub reductions: Vec<(L, &'grammar Production)>,
pub gotos: Map<NonterminalString, StateIndex>,
}
pub type LR0State<'grammar> = State<'grammar, Nil>;
pub type LR1State<'grammar> = State<'grammar, TokenSet>;
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum Action<'grammar> {
Shift(TerminalString, StateIndex),
Reduce(&'grammar Production),
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Conflict<'grammar, L> {
// when in this state...
pub state: StateIndex,
// with the following lookahead...
pub lookahead: L,
// we can reduce...
pub production: &'grammar Production,
// but we can also...
pub action: Action<'grammar>,
}
#[allow(dead_code)]
pub type LR0Conflict<'grammar> = Conflict<'grammar, Nil>;
pub type LR1Conflict<'grammar> = Conflict<'grammar, TokenSet>;
#[derive(Debug)]
pub struct TableConstructionError<'grammar, L: Lookahead> {
// LR(1) state set, possibly incomplete if construction is
// configured to terminate early.
pub states: Vec<State<'grammar, L>>,
// Conflicts (non-empty) found in those states.
pub conflicts: Vec<Conflict<'grammar, L>>,
}
pub type LR0TableConstructionError<'grammar> = TableConstructionError<'grammar, Nil>;
pub type LR1TableConstructionError<'grammar> = TableConstructionError<'grammar, TokenSet>;
pub type LRResult<'grammar, L> =
Result<Vec<State<'grammar, L>>, TableConstructionError<'grammar, L>>;
pub type LR1Result<'grammar> = LRResult<'grammar, TokenSet>;
impl<'grammar, L: Lookahead> Debug for Item<'grammar, L> {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
try!(write!(
fmt,
"{} ={} (*){}",
self.production.nonterminal,
Prefix(" ", &self.production.symbols[..self.index]),
Prefix(" ", &self.production.symbols[self.index..])
));
self.lookahead.fmt_as_item_suffix(fmt)
}
}
impl Display for Token {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
match *self {
Token::EOF => write!(fmt, "EOF"),
Token::Error => write!(fmt, "Error"),
Token::Terminal(ref s) => write!(fmt, "{}", s),
}
}
}
impl Debug for Token {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
write!(fmt, "{}", self)
}
}
impl Debug for StateIndex {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
write!(fmt, "S{}", self.0)
}
}
impl Display for StateIndex {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
write!(fmt, "{}", self.0)
}
}
impl<'grammar, L: Lookahead> State<'grammar, L> {
/// Returns the set of symbols which must appear on the stack to
/// be in this state. This is the *maximum* prefix of any item,
/// basically.
pub fn max_prefix(&self) -> &'grammar [Symbol] {
// Each state fn takes as argument the longest prefix of any
// item. Note that all items must have compatible prefixes.
let prefix = self.items
.vec
.iter()
.map(|item| item.prefix())
.max_by_key(|symbols| symbols.len())
.unwrap();
debug_assert!(
self.items
.vec
.iter()
.all(|item| prefix.ends_with(&item.production.symbols[..item.index]))
);
prefix
}
/// Returns the set of symbols from the stack that must be popped
/// for this state to return. If we have a state like:
///
/// ```
/// X = A B C (*) C
/// Y = B C (*) C
/// C = (*) ...
/// ```
///
/// This would return `[B, C]`. For every state other than the
/// start state, this will return a list of length at least 1.
/// For the start state, returns `[]`.
pub fn will_pop(&self) -> &'grammar [Symbol] {
let prefix = self.items
.vec
.iter()
.filter(|item| item.index > 0)
.map(|item| item.prefix())
.min_by_key(|symbols| symbols.len())
.unwrap_or(&[]);
debug_assert!(
self.items
.vec
.iter()
.filter(|item| item.index > 0)
.all(|item| item.prefix().ends_with(prefix))
);
prefix
}
pub fn will_push(&self) -> &[Symbol] {
self.items
.vec
.iter()
.filter(|item| item.index > 0)
.map(|item| &item.production.symbols[item.index..])
.min_by_key(|symbols| symbols.len())
.unwrap_or(&[])
}
/// Returns the type of nonterminal that this state will produce;
/// if `None` is returned, then this state may produce more than
/// one kind of nonterminal.
///
/// FIXME -- currently, the start state returns `None` instead of
/// the goal symbol.
pub fn will_produce(&self) -> Option<NonterminalString> {
let mut returnable_nonterminals: Vec<_> = self.items
.vec
.iter()
.filter(|item| item.index > 0)
.map(|item| item.production.nonterminal.clone())
.dedup()
.collect();
if returnable_nonterminals.len() == 1 {
returnable_nonterminals.pop()
} else {
None
}
}
}
/// `A = B C (*) D E F` or `A = B C (*)`
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub struct SymbolSets<'grammar> {
pub prefix: &'grammar [Symbol], // both cases, [B, C]
pub cursor: Option<&'grammar Symbol>, // first [D], second []
pub suffix: &'grammar [Symbol], // first [E, F], second []
}
impl<'grammar> SymbolSets<'grammar> {
pub fn new() -> Self {
SymbolSets {
prefix: &[],
cursor: None,
suffix: &[],
}
}
}

View File

@ -0,0 +1,839 @@
//! Error reporting. For now very stupid and simplistic.
use collections::{set, Set};
use lr1::trace::Tracer;
use lr1::core::*;
use lr1::example::{Example, ExampleStyles, ExampleSymbol};
use lr1::first::FirstSets;
use lr1::lookahead::{Token, TokenSet};
use itertools::Itertools;
use grammar::repr::*;
use message::Message;
use message::builder::{BodyCharacter, Builder, Character, MessageBuilder};
use tls::Tls;
#[cfg(test)]
mod test;
pub fn report_error(grammar: &Grammar, error: &LR1TableConstructionError) -> Vec<Message> {
let mut cx = ErrorReportingCx::new(grammar, &error.states, &error.conflicts);
cx.report_errors()
}
struct ErrorReportingCx<'cx, 'grammar: 'cx> {
grammar: &'grammar Grammar,
first_sets: FirstSets,
states: &'cx [LR1State<'grammar>],
conflicts: &'cx [LR1Conflict<'grammar>],
}
#[derive(Debug)]
enum ConflictClassification {
/// The grammar is ambiguous. This means we have two examples of
/// precisely the same set of symbols which can be reduced in two
/// distinct ways.
Ambiguity { action: Example, reduce: Example },
/// The grammar is ambiguous, and moreover it looks like a
/// precedence error. This means that the reduction is to a
/// nonterminal `T` and the shift is some symbol sandwiched
/// between two instances of `T`.
Precedence {
shift: Example,
reduce: Example,
nonterminal: NonterminalString,
},
/// Suggest inlining `nonterminal`. Makes sense if there are two
/// levels in the reduction tree in both examples, and the suffix
/// after the inner reduction is the same in all cases.
SuggestInline {
shift: Example,
reduce: Example,
nonterminal: NonterminalString,
},
/// Like the previous, but suggest replacing `nonterminal` with
/// `symbol?`. Makes sense if the thing to be inlined consists of
/// two alternatives, `X = symbol | ()`.
SuggestQuestion {
shift: Example,
reduce: Example,
nonterminal: NonterminalString,
symbol: Symbol,
},
/// Can't say much beyond that a conflict occurred.
InsufficientLookahead { action: Example, reduce: Example },
/// Really can't say *ANYTHING*.
Naive,
}
type TokenConflict<'grammar> = Conflict<'grammar, Token>;
impl<'cx, 'grammar> ErrorReportingCx<'cx, 'grammar> {
fn new(
grammar: &'grammar Grammar,
states: &'cx [LR1State<'grammar>],
conflicts: &'cx [LR1Conflict<'grammar>],
) -> Self {
ErrorReportingCx {
grammar: grammar,
first_sets: FirstSets::new(grammar),
states: states,
conflicts: conflicts,
}
}
fn report_errors(&mut self) -> Vec<Message> {
token_conflicts(self.conflicts)
.iter()
.map(|conflict| self.report_error(conflict))
.collect()
}
fn report_error(&mut self, conflict: &TokenConflict<'grammar>) -> Message {
match self.classify(conflict) {
ConflictClassification::Ambiguity { action, reduce } => {
self.report_error_ambiguity(conflict, action, reduce)
}
ConflictClassification::Precedence {
shift,
reduce,
nonterminal,
} => self.report_error_precedence(conflict, shift, reduce, nonterminal),
ConflictClassification::SuggestInline {
shift,
reduce,
nonterminal,
} => self.report_error_suggest_inline(conflict, shift, reduce, nonterminal),
ConflictClassification::SuggestQuestion {
shift,
reduce,
nonterminal,
symbol,
} => self.report_error_suggest_question(conflict, shift, reduce, nonterminal, symbol),
ConflictClassification::InsufficientLookahead { action, reduce } => {
self.report_error_insufficient_lookahead(conflict, action, reduce)
}
ConflictClassification::Naive => self.report_error_naive(conflict),
}
}
fn report_error_ambiguity_core(
&self,
conflict: &TokenConflict<'grammar>,
shift: Example,
reduce: Example,
) -> Builder<BodyCharacter> {
let styles = ExampleStyles::ambig();
MessageBuilder::new(conflict.production.span)
.heading()
.text("Ambiguous grammar detected")
.end()
.body()
.begin_lines()
.wrap_text("The following symbols can be reduced in two ways:")
.push(reduce.to_symbol_list(reduce.symbols.len(), styles))
.end()
.begin_lines()
.wrap_text("They could be reduced like so:")
.push(reduce.into_picture(styles))
.end()
.begin_lines()
.wrap_text("Alternatively, they could be reduced like so:")
.push(shift.into_picture(styles))
.end()
}
fn report_error_ambiguity(
&self,
conflict: &TokenConflict<'grammar>,
shift: Example,
reduce: Example,
) -> Message {
self.report_error_ambiguity_core(conflict, shift, reduce)
.wrap_text(
"LALRPOP does not yet support ambiguous grammars. \
See the LALRPOP manual for advice on \
making your grammar unambiguous.",
)
.end()
.end()
}
fn report_error_precedence(
&self,
conflict: &TokenConflict<'grammar>,
shift: Example,
reduce: Example,
nonterminal: NonterminalString,
) -> Message {
self.report_error_ambiguity_core(conflict, shift, reduce)
.begin_wrap()
.text("Hint:")
.styled(Tls::session().hint_text)
.text("This looks like a precedence error related to")
.push(nonterminal)
.verbatimed()
.punctuated(".")
.text("See the LALRPOP manual for advice on encoding precedence.")
.end()
.end()
.end()
}
fn report_error_not_lr1_core(
&self,
conflict: &TokenConflict<'grammar>,
action: Example,
reduce: Example,
) -> Builder<BodyCharacter> {
let styles = ExampleStyles::new();
let builder = MessageBuilder::new(conflict.production.span)
.heading()
.text("Local ambiguity detected")
.end()
.body();
let builder = builder
.begin_lines()
.begin_wrap()
.text("The problem arises after having observed the following symbols")
.text("in the input:")
.end()
.push(if action.cursor >= reduce.cursor {
action.to_symbol_list(action.cursor, styles)
} else {
reduce.to_symbol_list(reduce.cursor, styles)
})
.begin_wrap();
let builder = match conflict.lookahead {
Token::Terminal(ref term) => builder
.text("At that point, if the next token is a")
.push(term.clone())
.verbatimed()
.styled(Tls::session().cursor_symbol)
.punctuated(","),
Token::Error => builder.text("If an error has been found,"),
Token::EOF => builder.text("If the end of the input is reached,"),
};
let builder = builder
.text("then the parser can proceed in two different ways.")
.end()
.end();
let builder = self.describe_reduce(builder, styles, conflict.production, reduce, "First");
match conflict.action {
Action::Shift(ref lookahead, _) => {
self.describe_shift(builder, styles, lookahead.clone(), action, "Alternatively")
}
Action::Reduce(production) => {
self.describe_reduce(builder, styles, production, action, "Alternatively")
}
}
}
fn describe_shift<C: Character>(
&self,
builder: Builder<C>,
styles: ExampleStyles,
lookahead: TerminalString,
example: Example,
intro_word: &str,
) -> Builder<C> {
// A shift example looks like:
//
// ...p1 ...p2 (*) L ...s2 ...s1
// | | | |
// | +-NT1-----------+ |
// | |
// | ... |
// | |
// +-NT2-----------------------+
let nt1 = example.reductions[0].nonterminal.clone();
builder
.begin_lines()
.begin_wrap()
.text(intro_word)
.punctuated(",")
.text("the parser could shift the")
.push(lookahead)
.verbatimed()
.text("token and later use it to construct a")
.push(nt1)
.verbatimed()
.punctuated(".")
.text("This might then yield a parse tree like")
.end()
.push(example.into_picture(styles))
.end()
}
fn describe_reduce<C: Character>(
&self,
builder: Builder<C>,
styles: ExampleStyles,
production: &Production,
example: Example,
intro_word: &str,
) -> Builder<C> {
builder
.begin_lines()
.begin_wrap()
.text(intro_word)
.punctuated(",")
.text("the parser could execute the production at")
.push(production.span)
.punctuated(",")
.text("which would consume the top")
.text(production.symbols.len())
.text("token(s) from the stack")
.text("and produce a")
.push(production.nonterminal.clone())
.verbatimed()
.punctuated(".")
.text("This might then yield a parse tree like")
.end()
.push(example.into_picture(styles))
.end()
}
fn report_error_suggest_inline(
&self,
conflict: &TokenConflict<'grammar>,
shift: Example,
reduce: Example,
nonterminal: NonterminalString,
) -> Message {
let builder = self.report_error_not_lr1_core(conflict, shift, reduce);
builder
.begin_wrap()
.text("Hint:")
.styled(Tls::session().hint_text)
.text("It appears you could resolve this problem by adding")
.text("the annotation `#[inline]` to the definition of")
.push(nonterminal)
.verbatimed()
.punctuated(".")
.text("For more information, see the section on inlining")
.text("in the LALRPOP manual.")
.end()
.end()
.end()
}
fn report_error_suggest_question(
&self,
conflict: &TokenConflict<'grammar>,
shift: Example,
reduce: Example,
nonterminal: NonterminalString,
symbol: Symbol,
) -> Message {
let builder = self.report_error_not_lr1_core(conflict, shift, reduce);
builder
.begin_wrap()
.text("Hint:")
.styled(Tls::session().hint_text)
.text("It appears you could resolve this problem by replacing")
.text("uses of")
.push(nonterminal.clone())
.verbatimed()
.text("with")
.text(symbol) // intentionally disable coloring here, looks better
.adjacent_text("`", "?`")
.text("(or, alternatively, by adding the annotation `#[inline]` \
to the definition of")
.push(nonterminal)
.punctuated(").")
.text("For more information, see the section on inlining")
.text("in the LALROP manual.")
.end()
.end()
.end()
}
fn report_error_insufficient_lookahead(
&self,
conflict: &TokenConflict<'grammar>,
action: Example,
reduce: Example,
) -> Message {
// The reduce example will look something like:
//
//
// ...p1 ...p2 (*) L ...s2 ...s1
// | | | |
// | +-NT1-----------+ |
// | | | |
// | +-...-----------+ |
// | | | |
// | +-NTn-----------+ |
// | |
// +-NTn+1---------------------+
//
// To solve the conflict, essentially, the user needs to
// modify the grammar so that `NTn` does not appear with `L`
// in its follow-set. How to guide them in this?
let builder = self.report_error_not_lr1_core(conflict, action, reduce);
builder
.wrap_text(
"See the LALRPOP manual for advice on \
making your grammar LR(1).",
)
.end()
.end()
}
/// Naive error reporting. This is a fallback path which (I think)
/// never actually executes.
fn report_error_naive(&self, conflict: &TokenConflict<'grammar>) -> Message {
let mut builder = MessageBuilder::new(conflict.production.span)
.heading()
.text("Conflict detected")
.end()
.body()
.begin_lines()
.wrap_text("when in this state:")
.indented();
for item in self.states[conflict.state.0].items.vec.iter() {
builder = builder.text(format!("{:?}", item));
}
let mut builder = builder
.end()
.begin_wrap()
.text(format!("and looking at a token `{:?}`", conflict.lookahead))
.text("we can reduce to a")
.push(conflict.production.nonterminal.clone())
.verbatimed();
builder = match conflict.action {
Action::Shift(..) => builder.text("but we can also shift"),
Action::Reduce(prod) => builder
.text("but we can also reduce to a")
.text(prod.nonterminal.clone())
.verbatimed(),
};
builder.end().end().end()
}
fn classify(&mut self, conflict: &TokenConflict<'grammar>) -> ConflictClassification {
// Find examples from the conflicting action (either a shift
// or a reduce).
let mut action_examples = match conflict.action {
Action::Shift(..) => self.shift_examples(conflict),
Action::Reduce(production) => {
self.reduce_examples(conflict.state, production, conflict.lookahead.clone())
}
};
// Find examples from the conflicting reduce.
let mut reduce_examples = self.reduce_examples(
conflict.state,
conflict.production,
conflict.lookahead.clone(),
);
// Prefer shorter examples to longer ones.
action_examples.sort_by(|e, f| e.symbols.len().cmp(&f.symbols.len()));
reduce_examples.sort_by(|e, f| e.symbols.len().cmp(&f.symbols.len()));
// This really shouldn't happen, but if we've failed to come
// up with examples, then report a "naive" error.
if action_examples.is_empty() || reduce_examples.is_empty() {
return ConflictClassification::Naive;
}
if let Some(classification) =
self.try_classify_ambiguity(conflict, &action_examples, &reduce_examples)
{
return classification;
}
if let Some(classification) =
self.try_classify_question(conflict, &action_examples, &reduce_examples)
{
return classification;
}
if let Some(classification) =
self.try_classify_inline(conflict, &action_examples, &reduce_examples)
{
return classification;
}
// Give up. Just grab an example from each and pair them up.
// If there aren't even two examples, something's pretty
// bogus, but we'll just call it naive.
action_examples
.into_iter()
.zip(reduce_examples)
.next()
.map(
|(action, reduce)| ConflictClassification::InsufficientLookahead {
action: action,
reduce: reduce,
},
)
.unwrap_or(ConflictClassification::Naive)
}
fn try_classify_ambiguity(
&self,
conflict: &TokenConflict<'grammar>,
action_examples: &[Example],
reduce_examples: &[Example],
) -> Option<ConflictClassification> {
action_examples
.iter()
.cartesian_product(reduce_examples)
.filter(|&(action, reduce)| action.symbols == reduce.symbols)
.filter(|&(action, reduce)| action.cursor == reduce.cursor)
.map(|(action, reduce)| {
// Consider whether to call this a precedence
// error. We do this if we are stuck between reducing
// `T = T S T` and shifting `S`.
if let Action::Shift(ref term, _) = conflict.action {
let nt = &conflict.production.nonterminal;
if conflict.production.symbols.len() == 3
&& conflict.production.symbols[0] == Symbol::Nonterminal(nt.clone())
&& conflict.production.symbols[1] == Symbol::Terminal(term.clone())
&& conflict.production.symbols[2] == Symbol::Nonterminal(nt.clone())
{
return ConflictClassification::Precedence {
shift: action.clone(),
reduce: reduce.clone(),
nonterminal: nt.clone(),
};
}
}
ConflictClassification::Ambiguity {
action: action.clone(),
reduce: reduce.clone(),
}
})
.next()
}
fn try_classify_question(
&self,
conflict: &TokenConflict<'grammar>,
action_examples: &[Example],
reduce_examples: &[Example],
) -> Option<ConflictClassification> {
// If we get a shift/reduce conflict and the reduce
// is of a nonterminal like:
//
// T = { () | U }
//
// then suggest replacing T with U?. I'm being a bit lenient
// here since I do not KNOW that it will help, but it often
// does, and it's better style anyhow.
if let Action::Reduce(_) = conflict.action {
return None;
}
debug!(
"try_classify_question: action_examples={:?}",
action_examples
);
debug!(
"try_classify_question: reduce_examples={:?}",
reduce_examples
);
let nt = &conflict.production.nonterminal;
let nt_productions = self.grammar.productions_for(nt);
if nt_productions.len() == 2 {
for &(i, j) in &[(0, 1), (1, 0)] {
if nt_productions[i].symbols.is_empty() && nt_productions[j].symbols.len() == 1 {
return Some(ConflictClassification::SuggestQuestion {
shift: action_examples[0].clone(),
reduce: reduce_examples[0].clone(),
nonterminal: nt.clone(),
symbol: nt_productions[j].symbols[0].clone(),
});
}
}
}
None
}
fn try_classify_inline(
&self,
conflict: &TokenConflict<'grammar>,
action_examples: &[Example],
reduce_examples: &[Example],
) -> Option<ConflictClassification> {
// Inlining can help resolve a shift/reduce conflict because
// it defers the need to reduce. In particular, if we inlined
// all the reductions up until the last one, then we would be
// able to *shift* the lookahead instead of having to reduce.
// This can be helpful if we can see that shifting would let
// us delay reducing until the lookahead diverges.
// Only applicable to shift/reduce:
if let Action::Reduce(_) = conflict.action {
return None;
}
// FIXME: The logic here finds the first example where inline
// would help; but maybe we want to restrict it to cases
// where inlining would help *all* the examples...?
action_examples
.iter()
.cartesian_product(reduce_examples)
.filter_map(|(shift, reduce)| {
if self.try_classify_inline_example(shift, reduce) {
let nt = &reduce.reductions[0].nonterminal;
Some(ConflictClassification::SuggestInline {
shift: shift.clone(),
reduce: reduce.clone(),
nonterminal: nt.clone(),
})
} else {
None
}
})
.next()
}
fn try_classify_inline_example<'ex>(&self, shift: &Example, reduce: &Example) -> bool {
debug!("try_classify_inline_example({:?}, {:?})", shift, reduce);
// In the case of shift, the example will look like
//
// ```
// ... ... (*) L ...s1 ...
// | | | |
// | +-R0----------+ |
// | ... |
// +-Rn------------------+
// ```
//
// We want to extract the symbols ...s1: these are the
// things we are able to shift before being forced to
// make our next hard decision (to reduce R0 or not).
let shift_upcoming = &shift.symbols[shift.cursor + 1..shift.reductions[0].end];
debug!(
"try_classify_inline_example: shift_upcoming={:?}",
shift_upcoming
);
// For the reduce, the example might look like
//
// ```
// ... ... (*) ...s ...
// | | | | |
// | | +-R0-+ |
// | | ... | |
// | +--Ri--+ |
// | ... |
// +-R(i+1)----------+
// ```
//
// where Ri is the last reduction that requires
// shifting no additional symbols. In this case, if we
// inlined R0...Ri, then we know we can shift L.
let r0_end = reduce.reductions[0].end;
let i = reduce.reductions.iter().position(|r| r.end != r0_end);
let i = match i {
Some(v) => v,
None => return false,
};
let ri = &reduce.reductions[i];
let reduce_upcoming = &reduce.symbols[r0_end..ri.end];
debug!(
"try_classify_inline_example: reduce_upcoming={:?} i={:?}",
reduce_upcoming, i
);
// For now, we only suggest inlining a single nonterminal,
// mostly because I am too lazy to weak the suggestion struct
// and error messages (but the rest of the code below doesn't
// make this assumption for the most part).
if i != 1 {
return false;
}
// Make sure that all the things we are suggesting inlining
// are distinct so that we are not introducing a cycle.
let mut duplicates = set();
if reduce.reductions[0..i + 1]
.iter()
.any(|r| !duplicates.insert(r.nonterminal.clone()))
{
return false;
}
// Compare the two suffixes to see whether they
// diverge at some point.
shift_upcoming
.iter()
.zip(reduce_upcoming)
.filter_map(|(shift_sym, reduce_sym)| match (shift_sym, reduce_sym) {
(&ExampleSymbol::Symbol(ref shift_sym), &ExampleSymbol::Symbol(ref reduce_sym)) => {
if shift_sym == reduce_sym {
// same symbol on both; we'll be able to shift them
None
} else {
// different symbols: for this to work, must
// have disjoint first sets. Note that we
// consider a suffix matching epsilon to be
// potentially overlapping, though we could
// supply the actual lookahead for more precision.
let shift_first = self.first_sets.first0(&[shift_sym.clone()]);
let reduce_first = self.first_sets.first0(&[reduce_sym.clone()]);
if shift_first.is_disjoint(&reduce_first) {
Some(true)
} else {
Some(false)
}
}
}
_ => {
// we don't expect to encounter any
// epsilons, I don't think, because those
// only occur with an empty reduce at the
// top level
Some(false)
}
})
.next()
.unwrap_or(false)
}
fn shift_examples(&self, conflict: &TokenConflict<'grammar>) -> Vec<Example> {
log!(Tls::session(), Verbose, "Gathering shift examples");
let state = &self.states[conflict.state.0];
let conflicting_items = self.conflicting_shift_items(state, conflict);
conflicting_items
.into_iter()
.flat_map(|item| {
let tracer = Tracer::new(&self.first_sets, self.states);
let shift_trace = tracer.backtrace_shift(conflict.state, item);
let local_examples: Vec<Example> = shift_trace.lr0_examples(item).collect();
local_examples
})
.collect()
}
fn reduce_examples(
&self,
state: StateIndex,
production: &'grammar Production,
lookahead: Token,
) -> Vec<Example> {
log!(Tls::session(), Verbose, "Gathering reduce examples");
let item = Item {
production: production,
index: production.symbols.len(),
lookahead: TokenSet::from(lookahead),
};
let tracer = Tracer::new(&self.first_sets, self.states);
let reduce_trace = tracer.backtrace_reduce(state, item.to_lr0());
reduce_trace.lr1_examples(&self.first_sets, &item).collect()
}
fn conflicting_shift_items(
&self,
state: &LR1State<'grammar>,
conflict: &TokenConflict<'grammar>,
) -> Set<LR0Item<'grammar>> {
// Lookahead must be a terminal, not EOF.
// Find an item J like `Bar = ... (*) L ...`.
let lookahead = Symbol::Terminal(conflict.lookahead.unwrap_terminal().clone());
state
.items
.vec
.iter()
.filter(|i| i.can_shift())
.filter(|i| i.production.symbols[i.index] == lookahead)
.map(|i| i.to_lr0())
.collect()
}
}
fn token_conflicts<'grammar>(
conflicts: &[Conflict<'grammar, TokenSet>],
) -> Vec<TokenConflict<'grammar>> {
conflicts
.iter()
.flat_map(|conflict| {
conflict.lookahead.iter().map(move |token| Conflict {
state: conflict.state,
lookahead: token,
production: conflict.production,
action: conflict.action.clone(),
})
})
.collect()
}
//fn choose_example<'grammar>(states: &[State<'grammar>],
// lookahead: Token,
// conflict: &TokenConflict<'grammar>)
//{
// // Whenever we have a conflict in state S, there is always:
// // - a given lookahead L that permits some reduction, due to
// // an item I like `Foo = ... (*) [L]`
// // - another action that conflicts with R1.
// //
// // The backtrace code can give context to this item `I`, but the
// // problem is that it often results in many different contexts,
// // and we need to try and narrow those down to the one that will
// // help the user understand the problem.
// //
// // For that, we turn to the conflicting action, which can either be
// // a shift or reduce. Let's consider those two cases.
// //
// // ### Shift
// //
// // If the conflicting action is a shift, then there is at least
// // one item J in the state S like `Bar = ... (*) L ...`. We can
// // produce a backtrace from J and enumerate examples. We want to
// // find a pair of examples from I and J that share a common
// // prefix.
// //
// // ### Reduce
// //
// // If the conflicting action is a reduce, then there is at least
// // one item J in S like `Bar = ... (*) [L]`. We can produce a
// // backtrace for J and then search for an example that shares a
// // common prefix.
//
//}
//
//fn conflicting_item<'grammar>(state: &State<'grammar>,
// lookahead: Token,
// conflict: &TokenConflict<'grammar>)
// -> Item<'grammar>
//{
// match conflict.action {
// Action::Shift(_) => {
// }
// Action::Reduce(production) => {
// // Must be at least some other item J in S like `Bar = ... (*) [L]`.
// state.items.vec.iter()
// .filter(|i| i.can_reduce())
// .filter(|i| i.lookahead == lookahead)
// .filter(|i| i.production == production)
// .cloned()
// .next()
// .unwrap()
// }
// }
//}

View File

@ -0,0 +1,187 @@
use string_cache::DefaultAtom as Atom;
use grammar::repr::*;
use lr1::build_states;
use lr1::tls::Lr1Tls;
use test_util::normalized_grammar;
use tls::Tls;
use super::{ConflictClassification, ErrorReportingCx};
fn nt(t: &str) -> NonterminalString {
NonterminalString(Atom::from(t))
}
#[test]
fn priority_conflict() {
let _tls = Tls::test();
let grammar = normalized_grammar(
r#"
grammar;
pub Ty: () = {
"int" => (),
"bool" => (),
<t1:Ty> "->" <t2:Ty> => (),
};
"#,
);
let _lr1_tls = Lr1Tls::install(grammar.terminals.clone());
let err = build_states(&grammar, nt("Ty")).unwrap_err();
let mut cx = ErrorReportingCx::new(&grammar, &err.states, &err.conflicts);
let conflicts = super::token_conflicts(&err.conflicts);
let conflict = &conflicts[0];
println!("conflict={:?}", conflict);
match cx.classify(conflict) {
ConflictClassification::Precedence {
shift,
reduce,
nonterminal,
} => {
println!(
"shift={:#?}, reduce={:#?}, nonterminal={:?}",
shift, reduce, nonterminal
);
assert_eq!(shift.symbols.len(), 5); // Ty -> Ty -> Ty
assert_eq!(shift.cursor, 3); // Ty -> Ty -> Ty
assert_eq!(shift.symbols, reduce.symbols);
assert_eq!(shift.cursor, reduce.cursor);
assert_eq!(nonterminal, nt("Ty"));
}
r => panic!("wrong classification {:#?}", r),
}
}
#[test]
fn expr_braced_conflict() {
let _tls = Tls::test();
let grammar = normalized_grammar(
r#"
grammar;
pub Expr: () = {
"Id" => (),
"Id" "{" "}" => (),
"Expr" "+" "Id" => (),
"if" Expr "{" "}" => (),
};
"#,
);
let _lr1_tls = Lr1Tls::install(grammar.terminals.clone());
let err = build_states(&grammar, nt("Expr")).unwrap_err();
let mut cx = ErrorReportingCx::new(&grammar, &err.states, &err.conflicts);
let conflicts = super::token_conflicts(&err.conflicts);
let conflict = &conflicts[0];
println!("conflict={:?}", conflict);
match cx.classify(conflict) {
ConflictClassification::InsufficientLookahead { .. } => {}
r => panic!("wrong classification {:#?}", r),
}
}
#[test]
fn suggest_question_conflict() {
let _tls = Tls::test();
let grammar = normalized_grammar(
r#"
grammar;
pub E: () = {
"L",
"&" OPT_L E
};
OPT_L: () = {
(),
"L"
};
"#,
);
let _lr1_tls = Lr1Tls::install(grammar.terminals.clone());
let err = build_states(&grammar, nt("E")).unwrap_err();
let mut cx = ErrorReportingCx::new(&grammar, &err.states, &err.conflicts);
let conflicts = super::token_conflicts(&err.conflicts);
let conflict = &conflicts[0];
println!("conflict={:?}", conflict);
match cx.classify(conflict) {
ConflictClassification::SuggestQuestion {
shift: _,
reduce: _,
nonterminal,
symbol,
} => {
assert_eq!(nonterminal, nt("OPT_L"));
assert_eq!(
symbol,
Symbol::Terminal(TerminalString::quoted(Atom::from("L")))
);
}
r => panic!("wrong classification {:#?}", r),
}
}
#[test]
fn suggest_inline_conflict() {
let _tls = Tls::test();
let grammar = normalized_grammar(
r##"
grammar;
pub ImportDecl: () = {
"import" <Path> ";" => (),
"import" <Path> "." "*" ";" => (),
};
Path: () = {
<head: Ident> <tail: ("." <Ident>)*> => ()
};
Ident = r#"[a-zA-Z][a-zA-Z0-9]*"#;
"##,
);
let _lr1_tls = Lr1Tls::install(grammar.terminals.clone());
let err = build_states(&grammar, nt("ImportDecl")).unwrap_err();
let mut cx = ErrorReportingCx::new(&grammar, &err.states, &err.conflicts);
let conflicts = super::token_conflicts(&err.conflicts);
let conflict = &conflicts[0];
println!("conflict={:?}", conflict);
match cx.classify(conflict) {
ConflictClassification::SuggestInline {
shift: _,
reduce: _,
nonterminal,
} => {
assert_eq!(nonterminal, nt("Path"));
}
r => panic!("wrong classification {:#?}", r),
}
}
/// This example used to cause an out-of-bounds error.
#[test]
fn issue_249() {
let _tls = Tls::test();
let grammar = normalized_grammar(
r##"
grammar;
pub Func = StructDecl* VarDecl*;
StructDecl = "<" StructParameter* ">";
StructParameter = "may_dangle"?;
VarDecl = "let";
"##,
);
let _lr1_tls = Lr1Tls::install(grammar.terminals.clone());
let err = build_states(&grammar, nt("Func")).unwrap_err();
let mut cx = ErrorReportingCx::new(&grammar, &err.states, &err.conflicts);
let conflicts = super::token_conflicts(&err.conflicts);
for conflict in &conflicts {
println!("conflict={:?}", conflict);
cx.classify(conflict);
}
}

View File

@ -0,0 +1,409 @@
//! Code to compute example inputs given a backtrace.
use ascii_canvas::AsciiView;
use message::Content;
use message::builder::InlineBuilder;
use grammar::repr::*;
use std::fmt::{Debug, Error, Formatter};
use style::Style;
use tls::Tls;
#[cfg(test)]
mod test;
/// An "example" input and the way it was derived. This can be
/// serialized into useful text. For example, it might represent
/// something like this:
///
/// ```
/// Looking at
/// |
/// v
/// Ty "->" Ty "->" Ty
/// | | |
/// +-Ty-----+ |
/// | |
/// +-Ty-------------+
/// ```
///
/// The top-line is the `symbols` vector. The groupings below are
/// stored in the `reductions` vector, in order from smallest to
/// largest (they are always properly nested). The `cursor` field
/// indicates the current lookahead token.
///
/// The `symbols` vector is actually `Option<Symbol>` to account
/// for empty reductions:
///
/// ```
/// A B
/// | | | |
/// | +-Y-+ |
/// +-Z-----+
/// ```
///
/// The "empty space" between A and B would be represented as `None`.
#[derive(Clone, Debug)]
pub struct Example {
pub symbols: Vec<ExampleSymbol>,
pub cursor: usize,
pub reductions: Vec<Reduction>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum ExampleSymbol {
Symbol(Symbol),
Epsilon,
}
#[derive(Copy, Clone, Default)]
pub struct ExampleStyles {
pub before_cursor: Style,
pub on_cursor: Style,
pub after_cursor: Style,
}
#[derive(Clone, Debug)]
pub struct Reduction {
pub start: usize,
pub end: usize,
pub nonterminal: NonterminalString,
}
impl Example {
/// Length of each symbol. Each will need *at least* that amount
/// of space. :) Measure in characters, under the assumption of a
/// mono-spaced font. Also add a final `0` marker which will serve
/// as the end position.
fn lengths(&self) -> Vec<usize> {
self.symbols
.iter()
.map(|s| match *s {
ExampleSymbol::Symbol(ref s) => format!("{}", s).chars().count(),
ExampleSymbol::Epsilon => 1, // display as " "
})
.chain(Some(0))
.collect()
}
/// Extract a prefix of the list of symbols from this `Example`
/// and make a styled list of them, like:
///
/// Ty "->" Ty -> "Ty"
pub fn to_symbol_list(&self, length: usize, styles: ExampleStyles) -> Box<Content> {
let mut builder = InlineBuilder::new().begin_spaced();
for (index, symbol) in self.symbols[..length].iter().enumerate() {
let style = if index < self.cursor {
styles.before_cursor
} else if index > self.cursor {
styles.after_cursor
} else {
match *symbol {
ExampleSymbol::Symbol(Symbol::Terminal(_)) => styles.on_cursor,
ExampleSymbol::Symbol(Symbol::Nonterminal(_)) => styles.after_cursor,
ExampleSymbol::Epsilon => styles.after_cursor,
}
};
if let &ExampleSymbol::Symbol(ref s) = symbol {
builder = builder.push(s.clone()).styled(style);
}
}
builder.end().indented().end()
}
/// Render the example into a styled diagram suitable for
/// embedding in an error message.
pub fn into_picture(self, styles: ExampleStyles) -> Box<Content> {
let lengths = self.lengths();
let positions = self.positions(&lengths);
InlineBuilder::new()
.push(Box::new(ExamplePicture {
example: self,
positions: positions,
styles: styles,
}))
.indented()
.end()
}
fn starting_positions(&self, lengths: &[usize]) -> Vec<usize> {
lengths
.iter()
.scan(0, |counter, &len| {
let start = *counter;
// Leave space for "NT " (if "NT" is the name
// of the nonterminal).
*counter = start + len + 1;
Some(start)
})
.collect()
}
/// Start index where each symbol in the example should appear,
/// measured in characters. These are spaced to leave enough room
/// for the reductions below.
fn positions(&self, lengths: &[usize]) -> Vec<usize> {
// Initially, position each symbol with one space in between,
// like:
//
// X Y Z
let mut positions = self.starting_positions(lengths);
// Adjust spacing to account for the nonterminal labels
// we will have to add. It will display
// like this:
//
// A1 B2 C3 D4 E5 F6
// | |
// +-Label---+
//
// But if the label is long we may have to adjust the spacing
// of the covered items (here, we changed them to two spaces,
// except the first gap, which got 3 spaces):
//
// A1 B2 C3 D4 E5 F6
// | |
// +-LongLabel22-+
for &Reduction {
start,
end,
ref nonterminal,
} in &self.reductions
{
let nt_len = format!("{}", nonterminal).chars().count();
// Number of symbols we are reducing. This should always
// be non-zero because even in the case of a \epsilon
// rule, we ought to be have a `None` entry in the symbol array.
let num_syms = end - start;
assert!(num_syms > 0);
// Let's use the expansion from above as our running example.
// We start out with positions like this:
//
// A1 B2 C3 D4 E5 F6
// | |
// +-LongLabel22-+
//
// But we want LongLabel to end at D4. No good.
// Start of first symbol to be reduced. Here, 0.
//
// A1 B2 C3 D4
// ^ here
let start_position = positions[start];
// End of last symbol to be reduced. Here, 11.
//
// A1 B2 C3 D4 E5
// ^ positions[end]
// ^ here -- positions[end] - 1
let end_position = positions[end] - 1;
// We need space to draw `+-Label-+` between
// start_position and end_position.
let required_len = nt_len + 4; // here, 15
let actual_len = end_position - start_position; // here, 10
if required_len < actual_len {
continue; // Got enough space, all set.
}
// Have to add `difference` characters altogether.
let difference = required_len - actual_len; // here, 4
// Increment over everything that is not part of this nonterminal.
// In the example above, that is E5 and F6.
shift(&mut positions[end..], difference);
if num_syms > 1 {
// If there is just one symbol being reduced here,
// then we have shifted over the things that follow
// it, and we are done. This would be a case like:
//
// X Y Z
// | |
// +-Label-+
//
// (which maybe ought to be rendered slightly
// differently).
//
// But if there are multiple symbols, we're not quite
// done, because there would be an unsightly gap:
//
// (gaps)
// | | |
// v v v
// A1 B2 C3 D4 E5 F6
// | |
// +-LongLabel22-+
//
// we'd like to make things line up, so we have to
// distribute that extra space internally by
// increasing the "gaps" (marked above) as evenly as
// possible (basically, full justification).
//
// We do this by dividing up the spaces evenly and
// then taking the remainder `N` and distributing 1
// extra to the first N.
let num_gaps = num_syms - 1; // number of gaps we can adjust. Here, 3.
let amount = difference / num_gaps; // what to add to each gap. Here, 1.
let extra = difference % num_gaps; // the remainder. Here, 1.
// For the first `extra` symbols, give them amount + 1
// extra space. After that, just amount. (O(n^2). Sue me.)
for i in 0..extra {
shift(&mut positions[start + 1 + i..end], amount + 1);
}
for i in extra..num_gaps {
shift(&mut positions[start + 1 + i..end], amount);
}
}
}
positions
}
#[cfg(test)]
pub fn paint_unstyled(&self) -> Vec<::ascii_canvas::Row> {
use std::default::Default;
let this = self.clone();
let content = this.into_picture(ExampleStyles::default());
let min_width = content.min_width();
let canvas = content.emit_to_canvas(min_width);
canvas.to_strings()
}
fn paint_on(&self, styles: &ExampleStyles, positions: &[usize], view: &mut AsciiView) {
// Draw the brackets for each reduction:
for (index, reduction) in self.reductions.iter().enumerate() {
let start_column = positions[reduction.start];
let end_column = positions[reduction.end] - 1;
let row = 1 + index;
view.draw_vertical_line(0..row + 1, start_column);
view.draw_vertical_line(0..row + 1, end_column - 1);
view.draw_horizontal_line(row, start_column..end_column);
}
// Write the labels for each reduction. Do this after the
// brackets so that ascii canvas can convert `|` to `+`
// without interfering with the text (in case of weird overlap).
let session = Tls::session();
for (index, reduction) in self.reductions.iter().enumerate() {
let column = positions[reduction.start] + 2;
let row = 1 + index;
view.write_chars(
row,
column,
reduction.nonterminal.to_string().chars(),
session.nonterminal_symbol,
);
}
// Write the labels on top:
// A1 B2 C3 D4 E5 F6
self.paint_symbols_on(&self.symbols, &positions, styles, view);
}
fn paint_symbols_on(
&self,
symbols: &[ExampleSymbol],
positions: &[usize],
styles: &ExampleStyles,
view: &mut AsciiView,
) {
let session = Tls::session();
for (index, ex_symbol) in symbols.iter().enumerate() {
let style = if index < self.cursor {
styles.before_cursor
} else if index == self.cursor {
// Only display actual terminals in the "on-cursor"
// font, because it might be misleading to show a
// nonterminal that way. Really it'd be nice to expand
// so that the cursor is always a terminal.
match *ex_symbol {
ExampleSymbol::Symbol(Symbol::Terminal(_)) => styles.on_cursor,
_ => styles.after_cursor,
}
} else {
styles.after_cursor
};
let column = positions[index];
match *ex_symbol {
ExampleSymbol::Symbol(Symbol::Terminal(ref term)) => {
view.write_chars(
0,
column,
term.to_string().chars(),
style.with(session.terminal_symbol),
);
}
ExampleSymbol::Symbol(Symbol::Nonterminal(ref nt)) => {
view.write_chars(
0,
column,
nt.to_string().chars(),
style.with(session.nonterminal_symbol),
);
}
ExampleSymbol::Epsilon => {}
}
}
}
}
struct ExamplePicture {
example: Example,
positions: Vec<usize>,
styles: ExampleStyles,
}
impl Content for ExamplePicture {
fn min_width(&self) -> usize {
*self.positions.last().unwrap()
}
fn emit(&self, view: &mut AsciiView) {
self.example.paint_on(&self.styles, &self.positions, view);
}
fn into_wrap_items(self: Box<Self>, wrap_items: &mut Vec<Box<Content>>) {
wrap_items.push(self);
}
}
impl Debug for ExamplePicture {
fn fmt(&self, fmt: &mut Formatter) -> Result<(), Error> {
Debug::fmt(&self.example, fmt)
}
}
fn shift(positions: &mut [usize], amount: usize) {
for position in positions {
*position += amount;
}
}
impl ExampleStyles {
pub fn ambig() -> Self {
let session = Tls::session();
ExampleStyles {
before_cursor: session.ambig_symbols,
on_cursor: session.ambig_symbols,
after_cursor: session.ambig_symbols,
}
}
pub fn new() -> Self {
let session = Tls::session();
ExampleStyles {
before_cursor: session.observed_symbols,
on_cursor: session.cursor_symbol,
after_cursor: session.unobserved_symbols,
}
}
}

Some files were not shown because too many files have changed in this diff Show More