From 7c9919256c599cb02d386132b91d149825c1f712 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Mon, 28 Jun 2021 10:49:41 +0800 Subject: [PATCH 001/131] begin refactoring --- Cargo.lock | 534 +++++----- nac3core/Cargo.toml | 2 + nac3core/src/expression_inference.rs | 922 ------------------ nac3core/src/lib.rs | 586 +---------- .../context/inference_context.rs | 2 +- nac3core/src/{ => typecheck}/context/mod.rs | 0 .../context/top_level_context.rs | 2 +- .../src/{ => typecheck}/inference_core.rs | 8 +- nac3core/src/{ => typecheck}/magic_methods.rs | 26 +- nac3core/src/typecheck/mod.rs | 6 + nac3core/src/{ => typecheck}/primitives.rs | 2 +- nac3core/src/{ => typecheck}/typedef.rs | 0 12 files changed, 309 insertions(+), 1781 deletions(-) delete mode 100644 nac3core/src/expression_inference.rs rename nac3core/src/{ => typecheck}/context/inference_context.rs (99%) rename nac3core/src/{ => typecheck}/context/mod.rs (100%) rename nac3core/src/{ => typecheck}/context/top_level_context.rs (99%) rename nac3core/src/{ => typecheck}/inference_core.rs (98%) rename nac3core/src/{ => typecheck}/magic_methods.rs (66%) create mode 100644 nac3core/src/typecheck/mod.rs rename nac3core/src/{ => typecheck}/primitives.rs (99%) rename nac3core/src/{ => typecheck}/typedef.rs (100%) diff --git a/Cargo.lock b/Cargo.lock index f16dee50..93a535ea 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,31 +1,30 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. [[package]] -name = "aho-corasick" -version = "0.7.15" +name = "ahash" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5" +checksum = "43bb833f0bf979d8475d38fbf09ed3b8a55e1885fe93ad3f93239fc6a4f17b98" +dependencies = [ + "getrandom 0.2.3", + "once_cell", + "version_check", +] + +[[package]] +name = "aho-corasick" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" dependencies = [ "memchr", ] -[[package]] -name = "arrayref" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544" - -[[package]] -name = "arrayvec" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" - [[package]] name = "ascii-canvas" -version = "2.0.0" +version = "3.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff8eb72df928aafb99fe5d37b383f2fe25bd2a765e3e5f7c365916b6f2463a29" +checksum = "8824ecca2e851cec16968d54a01dd372ef8f95b244fb84b84e70128be347c3c6" dependencies = [ "term", ] @@ -47,12 +46,6 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" -[[package]] -name = "base64" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" - [[package]] name = "bit-set" version = "0.5.2" @@ -69,60 +62,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" [[package]] -name = "blake2b_simd" -version = "0.5.11" +name = "bitflags" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afa748e348ad3be8263be728124b24a24f268266f6f5d58af9d75f6a40b5c587" -dependencies = [ - "arrayref", - "arrayvec", - "constant_time_eq", -] - -[[package]] -name = "block-buffer" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0940dc441f31689269e10ac70eb1002a3a1d3ad1390e030043662eb7fe4688b" -dependencies = [ - "block-padding", - "byte-tools", - "byteorder", - "generic-array", -] - -[[package]] -name = "block-padding" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa79dedbb091f449f1f39e53edf88d5dbe95f895dae6135a8d7b881fb5af73f5" -dependencies = [ - "byte-tools", -] - -[[package]] -name = "byte-tools" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3b5ca7a04898ad4bcd41c90c5285445ff5b791899bb1b0abdd2a2aa791211d7" - -[[package]] -name = "byteorder" -version = "1.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" +checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" [[package]] name = "cc" -version = "1.0.66" +version = "1.0.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c0496836a84f8d0495758516b8621a622beb77c0fed418570e50764093ced48" - -[[package]] -name = "cfg-if" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" +checksum = "4a72c244c1ff497a746a7e1fb3d14bd08420ecda70c8f25c7112f2781652d787" [[package]] name = "cfg-if" @@ -131,27 +80,26 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] -name = "constant_time_eq" -version = "0.1.5" +name = "codespan-reporting" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" - -[[package]] -name = "crossbeam-utils" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02d96d1e189ef58269ebe5b97953da3274d83a93af647c2ddd6f9dab28cedb8d" +checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e" dependencies = [ - "autocfg", - "cfg-if 1.0.0", - "lazy_static", + "termcolor", + "unicode-width", ] [[package]] -name = "ctor" -version = "0.1.16" +name = "crunchy" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fbaabec2c953050352311293be5c6aba8e141ba19d6811862b232d6fd020484" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + +[[package]] +name = "ctor" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e98e2ad1a782e33928b96fc3948e7c355e5af34ba4de7670fe8bac2a3b2006d" dependencies = [ "quote", "syn", @@ -164,37 +112,26 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0e25ea47919b1560c4e3b7fe0aaab9becf5b84a10325ddf7db0f0ba5e1026499" [[package]] -name = "digest" -version = "0.8.1" +name = "dirs-next" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3d0c8c8752312f9713efd397ff63acb9f85585afbf179282e720e7704954dd5" +checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1" dependencies = [ - "generic-array", + "cfg-if", + "dirs-sys-next", ] [[package]] -name = "dirs" -version = "1.0.5" +name = "dirs-sys-next" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fd78930633bd1c6e35c4b42b1df7b0cbc6bc191146e512bb3bedf243fcc3901" +checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" dependencies = [ "libc", "redox_users", "winapi", ] -[[package]] -name = "docopt" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f525a586d310c87df72ebcd98009e57f1cc030c8c268305287a476beb653969" -dependencies = [ - "lazy_static", - "regex", - "serde", - "strsim", -] - [[package]] name = "either" version = "1.6.1" @@ -210,12 +147,6 @@ dependencies = [ "log", ] -[[package]] -name = "fake-simd" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e88a8acf291dafb59c2d96e8f59828f3838bb1a70398823ade51a84de6a6deed" - [[package]] name = "fixedbitset" version = "0.2.0" @@ -223,23 +154,25 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37ab347416e802de484e4d03c7316c48f1ecb56574dfd4a46a80f173ce1de04d" [[package]] -name = "generic-array" -version = "0.12.3" +name = "getrandom" +version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c68f0274ae0e023facc3c97b2e00f076be70e254bc851d972503b328db79b2ec" +checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" dependencies = [ - "typenum", + "cfg-if", + "libc", + "wasi 0.9.0+wasi-snapshot-preview1", ] [[package]] name = "getrandom" -version = "0.1.15" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc587bc0ec293155d5bfa6b9891ec18a1e330c234f896ea47fbada4cadbe47e6" +checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" dependencies = [ - "cfg-if 0.1.10", + "cfg-if", "libc", - "wasi", + "wasi 0.10.2+wasi-snapshot-preview1", ] [[package]] @@ -261,18 +194,18 @@ checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04" [[package]] name = "hermit-abi" -version = "0.1.17" +version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aca5565f760fb5b220e499d72710ed156fdb74e631659e99377d9ebfbd13ae8" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" dependencies = [ "libc", ] [[package]] name = "indexmap" -version = "1.6.1" +version = "1.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fb1fa934250de4de8aef298d81c729a7d33d8c239daa3a7575e6b92bfc7313b" +checksum = "824845a0bf897a9042383849b02c1bc219c2383772efcd5c6f9766fa4b81aef3" dependencies = [ "autocfg", "hashbrown", @@ -304,7 +237,7 @@ dependencies = [ [[package]] name = "inkwell" version = "0.1.0" -source = "git+https://github.com/TheDan64/inkwell#3eab4db479c2ca9d20b191f431a6d36835093108" +source = "git+https://github.com/TheDan64/inkwell#aa4de1d78471a3d2f0fda1f56801177ddf80f3bf" dependencies = [ "either", "inkwell_internals", @@ -317,8 +250,8 @@ dependencies = [ [[package]] name = "inkwell_internals" -version = "0.2.0" -source = "git+https://github.com/TheDan64/inkwell#3eab4db479c2ca9d20b191f431a6d36835093108" +version = "0.3.0" +source = "git+https://github.com/TheDan64/inkwell#aa4de1d78471a3d2f0fda1f56801177ddf80f3bf" dependencies = [ "proc-macro2", "quote", @@ -331,7 +264,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61124eeebbd69b8190558df225adf7e4caafce0d743919e5d6b19652314ec5ec" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", ] [[package]] @@ -358,43 +291,44 @@ dependencies = [ [[package]] name = "itertools" -version = "0.9.0" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b" +checksum = "69ddb889f9d0d08a67338271fa9b62996bc788c7796a5c18cf057420aaed5eaf" dependencies = [ "either", ] [[package]] name = "lalrpop" -version = "0.19.1" +version = "0.19.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60fb56191fb8ed5311597e5750debe6779c9fdb487dbaa5ff302592897d7a2c8" +checksum = "b15174f1c529af5bf1283c3bc0058266b483a67156f79589fab2a25e23cf8988" dependencies = [ "ascii-canvas", "atty", "bit-set", "diff", - "docopt", "ena", "itertools", "lalrpop-util", "petgraph", + "pico-args", "regex", "regex-syntax", - "serde", - "serde_derive", - "sha2", "string_cache", "term", + "tiny-keccak", "unicode-xid", ] [[package]] name = "lalrpop-util" -version = "0.19.1" +version = "0.19.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6771161eff561647fad8bb7e745e002c304864fb8f436b52b30acda51fca4408" +checksum = "d3e58cce361efcc90ba8a0a5f982c741ff86b603495bb15a998412e957dcd278" +dependencies = [ + "regex", +] [[package]] name = "lazy_static" @@ -404,15 +338,15 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.81" +version = "0.2.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1482821306169ec4d07f6aca392a4681f66c75c9918aa49641a2595db64053cb" +checksum = "12b8adadd720df158f4d70dfe7ccc6adb0472d7c55ca83445f6a5ab3e36f8fb6" [[package]] name = "llvm-sys" -version = "100.2.0" +version = "100.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9109e19fbfac3458f2970189719fa19f1007c6fd4e08c44fdebf4be0ddbe261d" +checksum = "15d9c00ce56221b2150e2d4d51887ff139fce5a0e50346c744861d1e66d2f7c4" dependencies = [ "cc", "lazy_static", @@ -423,34 +357,35 @@ dependencies = [ [[package]] name = "lock_api" -version = "0.4.2" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd96ffd135b2fd7b973ac026d28085defbe8983df057ced3eb4f2130b0831312" +checksum = "0382880606dff6d15c9476c416d18690b72742aa7b605bb6dd6ec9030fbf07eb" dependencies = [ "scopeguard", ] [[package]] name = "log" -version = "0.4.11" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fabed175da42fed1fa0746b0ea71f412aa9d35e76e95e59b192c64b9dc2bf8b" +checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" dependencies = [ - "cfg-if 0.1.10", + "cfg-if", ] [[package]] name = "memchr" -version = "2.3.4" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525" +checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc" [[package]] name = "nac3core" version = "0.1.0" dependencies = [ + "codespan-reporting", "inkwell", - "num-bigint", + "num-bigint 0.3.2", "num-traits", "rustpython-parser", ] @@ -482,9 +417,20 @@ checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54" [[package]] name = "num-bigint" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e9a41747ae4633fce5adffb4d2e81ffc5e89593cb19917f8fb2cc5ff76507bf" +checksum = "7d0a3d5e207573f948a9e5376662aa743a2ea13f7c50a554d7af443a73fbfeba" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e0d047c1062aa51e256408c560894e5251f08925980e53cf1aa5bd00eec6512" dependencies = [ "autocfg", "num-integer", @@ -512,15 +458,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.5.2" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13bd41f508810a131401606d54ac32a467c97172d74ba7662562ebba5ad07fa0" - -[[package]] -name = "opaque-debug" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2839e79665f131bdb5782e51f2c6c9599c133c6098982a54c794358bf432529c" +checksum = "692fcb63b64b1758029e0a96ee63e049ce8c5948587f2f7208df04625e5f6b56" [[package]] name = "parking_lot" @@ -535,11 +475,11 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.8.1" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7c6d9b8427445284a09c55be860a15855ab580a417ccad9da88f5a06787ced0" +checksum = "fa7a782938e745763fe6907fc6ba86946d72f49fe7e21de074e08128a99fb018" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "instant", "libc", "redox_syscall", @@ -576,6 +516,41 @@ dependencies = [ "indexmap", ] +[[package]] +name = "phf" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12" +dependencies = [ + "phf_macros", + "phf_shared", + "proc-macro-hack", +] + +[[package]] +name = "phf_generator" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526" +dependencies = [ + "phf_shared", + "rand", +] + +[[package]] +name = "phf_macros" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f6fde18ff429ffc8fe78e2bf7f8b7a5a5a6e2a8b58bc5a9ac69198bbda9189c" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro-hack", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "phf_shared" version = "0.8.0" @@ -585,6 +560,18 @@ dependencies = [ "siphasher", ] +[[package]] +name = "pico-args" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db8bcd96cb740d03149cbad5518db9fd87126a10ab519c011893b1754134c468" + +[[package]] +name = "ppv-lite86" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857" + [[package]] name = "precomputed-hash" version = "0.1.1" @@ -599,9 +586,9 @@ checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" [[package]] name = "proc-macro2" -version = "1.0.24" +version = "1.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71" +checksum = "f0d8caf72986c1a598726adc988bb5984792ef84f5ee5aa50209145ee8077038" dependencies = [ "unicode-xid", ] @@ -646,84 +633,132 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.7" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa563d17ecb180e500da1cfd2b028310ac758de548efdd203e18f283af693f37" +checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7" dependencies = [ "proc-macro2", ] [[package]] -name = "redox_syscall" -version = "0.1.57" +name = "rand" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce" +checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" +dependencies = [ + "getrandom 0.1.16", + "libc", + "rand_chacha", + "rand_core", + "rand_hc", + "rand_pcg", +] + +[[package]] +name = "rand_chacha" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" +dependencies = [ + "getrandom 0.1.16", +] + +[[package]] +name = "rand_hc" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" +dependencies = [ + "rand_core", +] + +[[package]] +name = "rand_pcg" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429" +dependencies = [ + "rand_core", +] + +[[package]] +name = "redox_syscall" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ab49abadf3f9e1c4bc499e8845e152ad87d2ad2d30371841171169e9d75feee" +dependencies = [ + "bitflags", +] [[package]] name = "redox_users" -version = "0.3.5" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de0737333e7a9502c789a36d7c7fa6092a49895d4faa31ca5df163857ded2e9d" +checksum = "528532f3d801c87aec9def2add9ca802fe569e44a544afe633765267840abe64" dependencies = [ - "getrandom", + "getrandom 0.2.3", "redox_syscall", - "rust-argon2", ] [[package]] name = "regex" -version = "1.4.2" +version = "1.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38cf2c13ed4745de91a5eb834e11c00bcc3709e773173b2ce4c56c9fbde04b9c" +checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461" dependencies = [ "aho-corasick", "memchr", "regex-syntax", - "thread_local", ] [[package]] name = "regex-syntax" -version = "0.6.21" +version = "0.6.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b181ba2dcf07aaccad5448e8ead58db5b742cf85dfe035e2227f137a539a189" - -[[package]] -name = "rust-argon2" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b18820d944b33caa75a71378964ac46f58517c92b6ae5f762636247c09e78fb" -dependencies = [ - "base64", - "blake2b_simd", - "constant_time_eq", - "crossbeam-utils", -] +checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" [[package]] name = "rustpython-ast" version = "0.1.0" -source = "git+https://github.com/RustPython/RustPython#b01cca97f4f94af6c1e15f161283f1ac9e0617b1" +source = "git+https://github.com/RustPython/RustPython#bee5794b6e2b777ee343c7277954b73d06b5cb7d" dependencies = [ - "num-bigint", + "num-bigint 0.4.0", ] [[package]] name = "rustpython-parser" version = "0.1.2" -source = "git+https://github.com/RustPython/RustPython#b01cca97f4f94af6c1e15f161283f1ac9e0617b1" +source = "git+https://github.com/RustPython/RustPython#bee5794b6e2b777ee343c7277954b73d06b5cb7d" dependencies = [ + "ahash", "lalrpop", "lalrpop-util", "log", - "num-bigint", + "num-bigint 0.4.0", "num-traits", + "phf", "rustpython-ast", "unic-emoji-char", "unic-ucd-ident", "unicode_names2", ] +[[package]] +name = "rustversion" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61b3909d758bb75c79f23d4736fac9433868679d3ad2ea7a61e3c25cfda9a088" + [[package]] name = "scopeguard" version = "1.1.0" @@ -745,49 +780,17 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" -[[package]] -name = "serde" -version = "1.0.118" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06c64263859d87aa2eb554587e2d23183398d617427327cf2b3d0ed8c69e4800" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_derive" -version = "1.0.118" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c84d3526699cd55261af4b941e4e725444df67aa4f9e6a3564f18030d12672df" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "sha2" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a256f46ea78a0c0d9ff00077504903ac881a1dafdc20da66545699e7776b3e69" -dependencies = [ - "block-buffer", - "digest", - "fake-simd", - "opaque-debug", -] - [[package]] name = "siphasher" -version = "0.3.3" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa8f3741c7372e75519bd9346068370c9cdaabcc1f9599cbcf2a2719352286b7" +checksum = "cbce6d4507c7e4a3962091436e56e95290cb71fa302d0d270e32130b75fbff27" [[package]] name = "smallvec" -version = "1.5.1" +version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae524f056d7d770e174287294f562e95044c68e88dec909a00d2094805db9d75" +checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e" [[package]] name = "string_cache" @@ -799,20 +802,13 @@ dependencies = [ "new_debug_unreachable", "phf_shared", "precomputed-hash", - "serde", ] -[[package]] -name = "strsim" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6446ced80d6c486436db5c078dde11a9f73d42b57fb273121e160b84f63d894c" - [[package]] name = "syn" -version = "1.0.54" +version = "1.0.73" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a2af957a63d6bd42255c359c93d9bfdb97076bd3b820897ce55ffbfbf107f44" +checksum = "f71489ff30030d2ae598524f61326b902466f72a0fb1a8564c001cc63425bcc7" dependencies = [ "proc-macro2", "quote", @@ -821,29 +817,32 @@ dependencies = [ [[package]] name = "term" -version = "0.5.2" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edd106a334b7657c10b7c540a0106114feadeb4dc314513e97df481d5d966f42" +checksum = "c59df8ac95d96ff9bede18eb7300b0fda5e5d8d90960e76f8e14ae765eedbf1f" dependencies = [ - "byteorder", - "dirs", + "dirs-next", + "rustversion", "winapi", ] [[package]] -name = "thread_local" -version = "1.0.1" +name = "termcolor" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14" +checksum = "2dfed899f0eb03f32ee8c6a0aabdb8a7949659e3466561fc0adf54e26d88c5f4" dependencies = [ - "lazy_static", + "winapi-util", ] [[package]] -name = "typenum" -version = "1.12.0" +name = "tiny-keccak" +version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "373c8a200f9e67a0c95e62a4f52fbf80c23b4381c05a17845531982fa99e6b33" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] [[package]] name = "unic-char-property" @@ -898,10 +897,16 @@ dependencies = [ ] [[package]] -name = "unicode-xid" -version = "0.2.1" +name = "unicode-width" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" +checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3" + +[[package]] +name = "unicode-xid" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" [[package]] name = "unicode_names2" @@ -915,12 +920,24 @@ version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f14ee04d9415b52b3aeab06258a3f07093182b88ba0f9b8d203f211a7a7d41c7" +[[package]] +name = "version_check" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" + [[package]] name = "wasi" version = "0.9.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" +[[package]] +name = "wasi" +version = "0.10.2+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" + [[package]] name = "winapi" version = "0.3.9" @@ -937,6 +954,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" diff --git a/nac3core/Cargo.toml b/nac3core/Cargo.toml index 62af7c29..04025e53 100644 --- a/nac3core/Cargo.toml +++ b/nac3core/Cargo.toml @@ -9,3 +9,5 @@ num-bigint = "0.3" num-traits = "0.2" inkwell = { git = "https://github.com/TheDan64/inkwell", branch = "master", features = ["llvm10-0"] } rustpython-parser = { git = "https://github.com/RustPython/RustPython", branch = "master" } +codespan-reporting = "0.11.1" + diff --git a/nac3core/src/expression_inference.rs b/nac3core/src/expression_inference.rs deleted file mode 100644 index fafe04e5..00000000 --- a/nac3core/src/expression_inference.rs +++ /dev/null @@ -1,922 +0,0 @@ -use crate::context::InferenceContext; -use crate::inference_core::resolve_call; -use crate::magic_methods::*; -use crate::primitives::*; -use crate::typedef::{Type, TypeEnum::*}; -use rustpython_parser::ast::{ - Comparison, Comprehension, ComprehensionKind, Expression, ExpressionType, Operator, - UnaryOperator, -}; -use std::convert::TryInto; - -type ParserResult = Result, String>; - -pub fn infer_expr<'b: 'a, 'a>( - ctx: &mut InferenceContext<'a>, - expr: &'b Expression, -) -> ParserResult { - match &expr.node { - ExpressionType::Number { value } => infer_constant(ctx, value), - ExpressionType::Identifier { name } => infer_identifier(ctx, name), - ExpressionType::List { elements } => infer_list(ctx, elements), - ExpressionType::Tuple { elements } => infer_tuple(ctx, elements), - ExpressionType::Attribute { value, name } => infer_attribute(ctx, value, name), - ExpressionType::BoolOp { values, .. } => infer_bool_ops(ctx, values), - ExpressionType::Binop { a, b, op } => infer_bin_ops(ctx, op, a, b), - ExpressionType::Unop { op, a } => infer_unary_ops(ctx, op, a), - ExpressionType::Compare { vals, ops } => infer_compare(ctx, vals, ops), - ExpressionType::Call { - args, - function, - keywords, - } => { - if !keywords.is_empty() { - Err("keyword is not supported".into()) - } else { - infer_call(ctx, &args, &function) - } - } - ExpressionType::Subscript { a, b } => infer_subscript(ctx, a, b), - ExpressionType::IfExpression { test, body, orelse } => { - infer_if_expr(ctx, &test, &body, orelse) - } - ExpressionType::Comprehension { kind, generators } => match kind.as_ref() { - ComprehensionKind::List { element } => { - if generators.len() == 1 { - infer_list_comprehension(ctx, element, &generators[0]) - } else { - Err("only 1 generator statement is supported".into()) - } - } - _ => Err("only list comprehension is supported".into()), - }, - ExpressionType::True | ExpressionType::False => Ok(Some(ctx.get_primitive(BOOL_TYPE))), - _ => Err("not supported".into()), - } -} - -fn infer_constant( - ctx: &mut InferenceContext, - value: &rustpython_parser::ast::Number, -) -> ParserResult { - use rustpython_parser::ast::Number; - match value { - Number::Integer { value } => { - let int32: Result = value.try_into(); - if int32.is_ok() { - Ok(Some(ctx.get_primitive(INT32_TYPE))) - } else { - Err("integer out of range".into()) - } - } - Number::Float { .. } => Ok(Some(ctx.get_primitive(FLOAT_TYPE))), - _ => Err("not supported".into()), - } -} - -fn infer_identifier(ctx: &mut InferenceContext, name: &str) -> ParserResult { - Ok(Some(ctx.resolve(name)?)) -} - -fn infer_list<'b: 'a, 'a>( - ctx: &mut InferenceContext<'a>, - elements: &'b [Expression], -) -> ParserResult { - if elements.is_empty() { - return Ok(Some(ParametricType(LIST_TYPE, vec![BotType.into()]).into())); - } - - let mut types = elements.iter().map(|v| infer_expr(ctx, v)); - - let head = types.next().unwrap()?; - if head.is_none() { - return Err("list elements must have some type".into()); - } - for v in types { - // TODO: try virtual type... - if v? != head { - return Err("inhomogeneous list is not allowed".into()); - } - } - Ok(Some(ParametricType(LIST_TYPE, vec![head.unwrap()]).into())) -} - -fn infer_tuple<'b: 'a, 'a>( - ctx: &mut InferenceContext<'a>, - elements: &'b [Expression], -) -> ParserResult { - let types: Result>, String> = - elements.iter().map(|v| infer_expr(ctx, v)).collect(); - if let Some(t) = types? { - Ok(Some(ParametricType(TUPLE_TYPE, t).into())) - } else { - Err("tuple elements must have some type".into()) - } -} - -fn infer_attribute<'a>( - ctx: &mut InferenceContext<'a>, - value: &'a Expression, - name: &str, -) -> ParserResult { - let value = infer_expr(ctx, value)?.ok_or_else(|| "no value".to_string())?; - if let TypeVariable(_) = value.as_ref() { - return Err("no fields for type variable".into()); - } - - value - .get_base(ctx) - .and_then(|b| b.fields.get(name).cloned()) - .map_or_else(|| Err("no such field".to_string()), |v| Ok(Some(v))) -} - -fn infer_bool_ops<'a>(ctx: &mut InferenceContext<'a>, values: &'a [Expression]) -> ParserResult { - assert_eq!(values.len(), 2); - let left = infer_expr(ctx, &values[0])?.ok_or_else(|| "no value".to_string())?; - let right = infer_expr(ctx, &values[1])?.ok_or_else(|| "no value".to_string())?; - - let b = ctx.get_primitive(BOOL_TYPE); - if left == b && right == b { - Ok(Some(b)) - } else { - Err("bool operands must be bool".into()) - } -} - -fn infer_bin_ops<'b: 'a, 'a>( - ctx: &mut InferenceContext<'a>, - op: &Operator, - left: &'b Expression, - right: &'b Expression, -) -> ParserResult { - let left = infer_expr(ctx, left)?.ok_or_else(|| "no value".to_string())?; - let right = infer_expr(ctx, right)?.ok_or_else(|| "no value".to_string())?; - let fun = binop_name(op); - resolve_call(ctx, Some(left), fun, &[right]) -} - -fn infer_unary_ops<'b: 'a, 'a>( - ctx: &mut InferenceContext<'a>, - op: &UnaryOperator, - obj: &'b Expression, -) -> ParserResult { - let ty = infer_expr(ctx, obj)?.ok_or_else(|| "no value".to_string())?; - if let UnaryOperator::Not = op { - if ty == ctx.get_primitive(BOOL_TYPE) { - Ok(Some(ty)) - } else { - Err("logical not must be applied to bool".into()) - } - } else { - resolve_call(ctx, Some(ty), unaryop_name(op), &[]) - } -} - -fn infer_compare<'b: 'a, 'a>( - ctx: &mut InferenceContext<'a>, - vals: &'b [Expression], - ops: &'b [Comparison], -) -> ParserResult { - let types: Result>, _> = vals.iter().map(|v| infer_expr(ctx, v)).collect(); - let types = types?; - if types.is_none() { - return Err("comparison operands must have type".into()); - } - let types = types.unwrap(); - let boolean = ctx.get_primitive(BOOL_TYPE); - let left = &types[..types.len() - 1]; - let right = &types[1..]; - - for ((a, b), op) in left.iter().zip(right.iter()).zip(ops.iter()) { - let fun = comparison_name(op).ok_or_else(|| "unsupported comparison".to_string())?; - let ty = resolve_call(ctx, Some(a.clone()), fun, &[b.clone()])?; - if ty.is_none() || ty.unwrap() != boolean { - return Err("comparison result must be boolean".into()); - } - } - Ok(Some(boolean)) -} - -fn infer_call<'b: 'a, 'a>( - ctx: &mut InferenceContext<'a>, - args: &'b [Expression], - function: &'b Expression, -) -> ParserResult { - // TODO: special handling for int64 constant - let types: Result>, _> = args.iter().map(|v| infer_expr(ctx, v)).collect(); - let types = types?; - if types.is_none() { - return Err("function params must have type".into()); - } - - let (obj, fun) = match &function.node { - ExpressionType::Identifier { name } => (None, name), - ExpressionType::Attribute { value, name } => ( - Some(infer_expr(ctx, &value)?.ok_or_else(|| "no value".to_string())?), - name, - ), - _ => return Err("not supported".into()), - }; - resolve_call(ctx, obj, fun.as_str(), &types.unwrap()) -} - -fn infer_subscript<'b: 'a, 'a>( - ctx: &mut InferenceContext<'a>, - a: &'b Expression, - b: &'b Expression, -) -> ParserResult { - let a = infer_expr(ctx, a)?.ok_or_else(|| "no value".to_string())?; - let t = if let ParametricType(LIST_TYPE, ls) = a.as_ref() { - ls[0].clone() - } else { - return Err("subscript is not supported for types other than list".into()); - }; - - match &b.node { - ExpressionType::Slice { elements } => { - let int32 = ctx.get_primitive(INT32_TYPE); - let types: Result>, _> = elements - .iter() - .map(|v| { - if let ExpressionType::None = v.node { - Ok(Some(int32.clone())) - } else { - infer_expr(ctx, v) - } - }) - .collect(); - let types = types?.ok_or_else(|| "slice must have type".to_string())?; - if types.iter().all(|v| v == &int32) { - Ok(Some(a)) - } else { - Err("slice must be int32 type".into()) - } - } - _ => { - let b = infer_expr(ctx, b)?.ok_or_else(|| "no value".to_string())?; - if b == ctx.get_primitive(INT32_TYPE) { - Ok(Some(t)) - } else { - Err("index must be either slice or int32".into()) - } - } - } -} - -fn infer_if_expr<'b: 'a, 'a>( - ctx: &mut InferenceContext<'a>, - test: &'b Expression, - body: &'b Expression, - orelse: &'b Expression, -) -> ParserResult { - let test = infer_expr(ctx, test)?.ok_or_else(|| "no value".to_string())?; - if test != ctx.get_primitive(BOOL_TYPE) { - return Err("test should be bool".into()); - } - - let body = infer_expr(ctx, body)?; - let orelse = infer_expr(ctx, orelse)?; - if body.as_ref() == orelse.as_ref() { - Ok(body) - } else { - Err("divergent type".into()) - } -} - -fn infer_simple_binding<'a: 'b, 'b>( - ctx: &mut InferenceContext<'b>, - name: &'a Expression, - ty: Type, -) -> Result<(), String> { - match &name.node { - ExpressionType::Identifier { name } => { - if name == "_" { - Ok(()) - } else if ctx.defined(name.as_str()) { - Err("duplicated naming".into()) - } else { - ctx.assign(name.as_str(), ty)?; - Ok(()) - } - } - ExpressionType::Tuple { elements } => { - if let ParametricType(TUPLE_TYPE, ls) = ty.as_ref() { - if elements.len() == ls.len() { - for (a, b) in elements.iter().zip(ls.iter()) { - infer_simple_binding(ctx, a, b.clone())?; - } - Ok(()) - } else { - Err("different length".into()) - } - } else { - Err("not supported".into()) - } - } - _ => Err("not supported".into()), - } -} - -fn infer_list_comprehension<'b: 'a, 'a>( - ctx: &mut InferenceContext<'a>, - element: &'b Expression, - comprehension: &'b Comprehension, -) -> ParserResult { - if comprehension.is_async { - return Err("async is not supported".into()); - } - - let iter = infer_expr(ctx, &comprehension.iter)?.ok_or_else(|| "no value".to_string())?; - if let ParametricType(LIST_TYPE, ls) = iter.as_ref() { - ctx.with_scope(|ctx| { - infer_simple_binding(ctx, &comprehension.target, ls[0].clone())?; - - let boolean = ctx.get_primitive(BOOL_TYPE); - for test in comprehension.ifs.iter() { - let result = - infer_expr(ctx, test)?.ok_or_else(|| "no value in test".to_string())?; - if result != boolean { - return Err("test must be bool".into()); - } - } - let result = infer_expr(ctx, element)?.ok_or_else(|| "no value")?; - Ok(Some(ParametricType(LIST_TYPE, vec![result]).into())) - }) - .1 - } else { - Err("iteration is supported for list only".into()) - } -} - -#[cfg(test)] -mod test { - use super::*; - use crate::context::*; - use crate::typedef::*; - use rustpython_parser::parser::parse_expression; - use std::collections::HashMap; - use std::rc::Rc; - - fn get_inference_context(ctx: TopLevelContext) -> InferenceContext { - InferenceContext::new(ctx, Box::new(|_| Err("unbounded identifier".into()))) - } - - #[test] - fn test_constants() { - let ctx = basic_ctx(); - let mut ctx = get_inference_context(ctx); - - let ast = parse_expression("123").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result.unwrap().unwrap(), ctx.get_primitive(INT32_TYPE)); - - let ast = parse_expression("2147483647").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result.unwrap().unwrap(), ctx.get_primitive(INT32_TYPE)); - - let ast = parse_expression("2147483648").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result, Err("integer out of range".into())); - // - // let ast = parse_expression("2147483648").unwrap(); - // let result = infer_expr(&mut ctx, &ast); - // assert_eq!(result.unwrap().unwrap(), ctx.get_primitive(INT64_TYPE)); - - // let ast = parse_expression("9223372036854775807").unwrap(); - // let result = infer_expr(&mut ctx, &ast); - // assert_eq!(result.unwrap().unwrap(), ctx.get_primitive(INT64_TYPE)); - - // let ast = parse_expression("9223372036854775808").unwrap(); - // let result = infer_expr(&mut ctx, &ast); - // assert_eq!(result, Err("integer out of range".into())); - - let ast = parse_expression("123.456").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result.unwrap().unwrap(), ctx.get_primitive(FLOAT_TYPE)); - - let ast = parse_expression("True").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result.unwrap().unwrap(), ctx.get_primitive(BOOL_TYPE)); - - let ast = parse_expression("False").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result.unwrap().unwrap(), ctx.get_primitive(BOOL_TYPE)); - } - - #[test] - fn test_identifier() { - let ctx = basic_ctx(); - let mut ctx = get_inference_context(ctx); - ctx.assign("abc", ctx.get_primitive(INT32_TYPE)).unwrap(); - - let ast = parse_expression("abc").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result.unwrap().unwrap(), ctx.get_primitive(INT32_TYPE)); - - let ast = parse_expression("ab").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result, Err("unbounded identifier".into())); - } - - #[test] - fn test_list() { - let mut ctx = basic_ctx(); - ctx.add_fn( - "foo", - FnDef { - args: vec![], - result: None, - }, - ); - let mut ctx = get_inference_context(ctx); - ctx.assign("abc", ctx.get_primitive(INT32_TYPE)).unwrap(); - // def is reserved... - ctx.assign("efg", ctx.get_primitive(INT32_TYPE)).unwrap(); - ctx.assign("xyz", ctx.get_primitive(FLOAT_TYPE)).unwrap(); - - let ast = parse_expression("[]").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!( - result.unwrap().unwrap(), - ParametricType(LIST_TYPE, vec![BotType.into()]).into() - ); - - let ast = parse_expression("[abc]").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!( - result.unwrap().unwrap(), - ParametricType(LIST_TYPE, vec![ctx.get_primitive(INT32_TYPE)]).into() - ); - - let ast = parse_expression("[abc, efg]").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!( - result.unwrap().unwrap(), - ParametricType(LIST_TYPE, vec![ctx.get_primitive(INT32_TYPE)]).into() - ); - - let ast = parse_expression("[abc, efg, xyz]").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result, Err("inhomogeneous list is not allowed".into())); - - let ast = parse_expression("[foo()]").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result, Err("list elements must have some type".into())); - } - - #[test] - fn test_tuple() { - let mut ctx = basic_ctx(); - ctx.add_fn( - "foo", - FnDef { - args: vec![], - result: None, - }, - ); - let mut ctx = get_inference_context(ctx); - ctx.assign("abc", ctx.get_primitive(INT32_TYPE)).unwrap(); - ctx.assign("efg", ctx.get_primitive(FLOAT_TYPE)).unwrap(); - - let ast = parse_expression("(abc, efg)").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!( - result.unwrap().unwrap(), - ParametricType( - TUPLE_TYPE, - vec![ctx.get_primitive(INT32_TYPE), ctx.get_primitive(FLOAT_TYPE)] - ) - .into() - ); - - let ast = parse_expression("(abc, efg, foo())").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result, Err("tuple elements must have some type".into())); - } - - #[test] - fn test_attribute() { - let mut ctx = basic_ctx(); - ctx.add_fn( - "none", - FnDef { - args: vec![], - result: None, - }, - ); - let int32 = ctx.get_primitive(INT32_TYPE); - let float = ctx.get_primitive(FLOAT_TYPE); - - let foo = ctx.add_class(ClassDef { - base: TypeDef { - name: "Foo", - fields: HashMap::new(), - methods: HashMap::new(), - }, - parents: vec![], - }); - let foo_def = ctx.get_class_def_mut(foo); - foo_def.base.fields.insert("a", int32.clone()); - foo_def.base.fields.insert("b", ClassType(foo).into()); - foo_def.base.fields.insert("c", int32.clone()); - - let bar = ctx.add_class(ClassDef { - base: TypeDef { - name: "Bar", - fields: HashMap::new(), - methods: HashMap::new(), - }, - parents: vec![], - }); - let bar_def = ctx.get_class_def_mut(bar); - bar_def.base.fields.insert("a", int32); - bar_def.base.fields.insert("b", ClassType(bar).into()); - bar_def.base.fields.insert("c", float); - - let v0 = ctx.add_variable(VarDef { - name: "v0", - bound: vec![], - }); - - let v1 = ctx.add_variable(VarDef { - name: "v1", - bound: vec![ClassType(foo).into(), ClassType(bar).into()], - }); - - let mut ctx = get_inference_context(ctx); - ctx.assign("foo", Rc::new(ClassType(foo))).unwrap(); - ctx.assign("bar", Rc::new(ClassType(bar))).unwrap(); - ctx.assign("foobar", Rc::new(VirtualClassType(foo))) - .unwrap(); - ctx.assign("v0", ctx.get_variable(v0)).unwrap(); - ctx.assign("v1", ctx.get_variable(v1)).unwrap(); - ctx.assign("bot", Rc::new(BotType)).unwrap(); - - let ast = parse_expression("foo.a").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result.unwrap().unwrap(), ctx.get_primitive(INT32_TYPE)); - - let ast = parse_expression("foo.d").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result, Err("no such field".into())); - - let ast = parse_expression("foobar.a").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result.unwrap().unwrap(), ctx.get_primitive(INT32_TYPE)); - - let ast = parse_expression("v0.a").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result, Err("no fields for type variable".into())); - - let ast = parse_expression("v1.a").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result, Err("no fields for type variable".into())); - - let ast = parse_expression("none().a").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result, Err("no value".into())); - - let ast = parse_expression("bot.a").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result, Err("no such field".into())); - } - - #[test] - fn test_bool_ops() { - let mut ctx = basic_ctx(); - ctx.add_fn( - "none", - FnDef { - args: vec![], - result: None, - }, - ); - let mut ctx = get_inference_context(ctx); - - let ast = parse_expression("True and False").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result.unwrap().unwrap(), ctx.get_primitive(BOOL_TYPE)); - - let ast = parse_expression("True and none()").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result, Err("no value".into())); - - let ast = parse_expression("True and 123").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result, Err("bool operands must be bool".into())); - } - - #[test] - fn test_bin_ops() { - let mut ctx = basic_ctx(); - let v0 = ctx.add_variable(VarDef { - name: "v0", - bound: vec![ctx.get_primitive(INT32_TYPE), ctx.get_primitive(INT64_TYPE)], - }); - let mut ctx = get_inference_context(ctx); - ctx.assign("a", TypeVariable(v0).into()).unwrap(); - - let ast = parse_expression("1 + 2 + 3").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result.unwrap().unwrap(), ctx.get_primitive(INT32_TYPE)); - - let ast = parse_expression("a + a + a").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result, Err("not supported".into())); - } - - #[test] - fn test_unary_ops() { - let mut ctx = basic_ctx(); - let v0 = ctx.add_variable(VarDef { - name: "v0", - bound: vec![ctx.get_primitive(INT32_TYPE), ctx.get_primitive(INT64_TYPE)], - }); - let mut ctx = get_inference_context(ctx); - ctx.assign("a", TypeVariable(v0).into()).unwrap(); - - let ast = parse_expression("-(123)").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result.unwrap().unwrap(), ctx.get_primitive(INT32_TYPE)); - - let ast = parse_expression("-a").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result, Err("not supported".into())); - - let ast = parse_expression("not True").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result.unwrap().unwrap(), ctx.get_primitive(BOOL_TYPE)); - - let ast = parse_expression("not (1)").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result, Err("logical not must be applied to bool".into())); - } - - #[test] - fn test_compare() { - let mut ctx = basic_ctx(); - let v0 = ctx.add_variable(VarDef { - name: "v0", - bound: vec![ctx.get_primitive(INT32_TYPE), ctx.get_primitive(INT64_TYPE)], - }); - let mut ctx = get_inference_context(ctx); - ctx.assign("a", TypeVariable(v0).into()).unwrap(); - - let ast = parse_expression("a == a == a").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result, Err("not supported".into())); - - let ast = parse_expression("a == a == 1").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result, Err("not supported".into())); - - let ast = parse_expression("True > False").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result, Err("no such function".into())); - - let ast = parse_expression("True in False").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result, Err("unsupported comparison".into())); - } - - #[test] - fn test_call() { - let mut ctx = basic_ctx(); - ctx.add_fn( - "none", - FnDef { - args: vec![], - result: None, - }, - ); - - let foo = ctx.add_class(ClassDef { - base: TypeDef { - name: "Foo", - fields: HashMap::new(), - methods: HashMap::new(), - }, - parents: vec![], - }); - let foo_def = ctx.get_class_def_mut(foo); - foo_def.base.methods.insert( - "a", - FnDef { - args: vec![], - result: Some(Rc::new(ClassType(foo))), - }, - ); - - let bar = ctx.add_class(ClassDef { - base: TypeDef { - name: "Bar", - fields: HashMap::new(), - methods: HashMap::new(), - }, - parents: vec![], - }); - let bar_def = ctx.get_class_def_mut(bar); - bar_def.base.methods.insert( - "a", - FnDef { - args: vec![], - result: Some(Rc::new(ClassType(bar))), - }, - ); - - let v0 = ctx.add_variable(VarDef { - name: "v0", - bound: vec![], - }); - let v1 = ctx.add_variable(VarDef { - name: "v1", - bound: vec![ClassType(foo).into(), ClassType(bar).into()], - }); - let v2 = ctx.add_variable(VarDef { - name: "v2", - bound: vec![ - ClassType(foo).into(), - ClassType(bar).into(), - ctx.get_primitive(INT32_TYPE), - ], - }); - let mut ctx = get_inference_context(ctx); - ctx.assign("foo", Rc::new(ClassType(foo))).unwrap(); - ctx.assign("bar", Rc::new(ClassType(bar))).unwrap(); - ctx.assign("foobar", Rc::new(VirtualClassType(foo))) - .unwrap(); - ctx.assign("v0", ctx.get_variable(v0)).unwrap(); - ctx.assign("v1", ctx.get_variable(v1)).unwrap(); - ctx.assign("v2", ctx.get_variable(v2)).unwrap(); - ctx.assign("bot", Rc::new(BotType)).unwrap(); - - let ast = parse_expression("foo.a()").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result.unwrap().unwrap(), ClassType(foo).into()); - - let ast = parse_expression("v1.a()").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result, Err("not supported".into())); - - let ast = parse_expression("foobar.a()").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result.unwrap().unwrap(), ClassType(foo).into()); - - let ast = parse_expression("none().a()").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result, Err("no value".into())); - - let ast = parse_expression("bot.a()").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result, Err("not supported".into())); - - let ast = parse_expression("[][0].a()").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result, Err("not supported".into())); - } - - #[test] - fn infer_subscript() { - let mut ctx = basic_ctx(); - ctx.add_fn( - "none", - FnDef { - args: vec![], - result: None, - }, - ); - let mut ctx = get_inference_context(ctx); - - let ast = parse_expression("[1, 2, 3][0]").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result.unwrap().unwrap(), ctx.get_primitive(INT32_TYPE)); - - let ast = parse_expression("[[1]][0][0]").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result.unwrap().unwrap(), ctx.get_primitive(INT32_TYPE)); - - let ast = parse_expression("[1, 2, 3][1:2]").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!( - result.unwrap().unwrap(), - ParametricType(LIST_TYPE, vec![ctx.get_primitive(INT32_TYPE)]).into() - ); - - let ast = parse_expression("[1, 2, 3][1:2:2]").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!( - result.unwrap().unwrap(), - ParametricType(LIST_TYPE, vec![ctx.get_primitive(INT32_TYPE)]).into() - ); - - let ast = parse_expression("[1, 2, 3][1:1.2]").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result, Err("slice must be int32 type".into())); - - let ast = parse_expression("[1, 2, 3][1:none()]").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result, Err("slice must have type".into())); - - let ast = parse_expression("[1, 2, 3][1.2]").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result, Err("index must be either slice or int32".into())); - - let ast = parse_expression("[1, 2, 3][none()]").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result, Err("no value".into())); - - let ast = parse_expression("none()[1.2]").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result, Err("no value".into())); - - let ast = parse_expression("123[1]").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!( - result, - Err("subscript is not supported for types other than list".into()) - ); - } - - #[test] - fn test_if_expr() { - let mut ctx = basic_ctx(); - ctx.add_fn( - "none", - FnDef { - args: vec![], - result: None, - }, - ); - let mut ctx = get_inference_context(ctx); - - let ast = parse_expression("1 if True else 0").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result.unwrap().unwrap(), ctx.get_primitive(INT32_TYPE)); - - let ast = parse_expression("none() if True else none()").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result.unwrap(), None); - - let ast = parse_expression("none() if 1 else none()").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result, Err("test should be bool".into())); - - let ast = parse_expression("1 if True else none()").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result, Err("divergent type".into())); - } - - #[test] - fn test_list_comp() { - let mut ctx = basic_ctx(); - ctx.add_fn( - "none", - FnDef { - args: vec![], - result: None, - }, - ); - let int32 = ctx.get_primitive(INT32_TYPE); - let mut ctx = get_inference_context(ctx); - ctx.assign("z", int32.clone()).unwrap(); - - let ast = parse_expression("[x for x in [(1, 2), (2, 3), (3, 4)]][0]").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!( - result.unwrap().unwrap(), - ParametricType(TUPLE_TYPE, vec![int32.clone(), int32.clone()]).into() - ); - - let ast = parse_expression("[x for (x, y) in [(1, 2), (2, 3), (3, 4)]][0]").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result.unwrap().unwrap(), int32); - - let ast = - parse_expression("[x for (x, y) in [(1, 2), (2, 3), (3, 4)] if x > 0][0]").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result.unwrap().unwrap(), int32); - - let ast = parse_expression("[x for (x, y) in [(1, 2), (2, 3), (3, 4)] if x][0]").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result, Err("test must be bool".into())); - - let ast = parse_expression("[y for x in []][0]").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result, Err("unbounded identifier".into())); - - let ast = parse_expression("[none() for x in []][0]").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result, Err("no value".into())); - - let ast = parse_expression("[z for z in []][0]").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!(result, Err("duplicated naming".into())); - - let ast = parse_expression("[x for x in [] for y in []]").unwrap(); - let result = infer_expr(&mut ctx, &ast); - assert_eq!( - result, - Err("only 1 generator statement is supported".into()) - ); - } -} diff --git a/nac3core/src/lib.rs b/nac3core/src/lib.rs index 5cc15c3c..a87ce31c 100644 --- a/nac3core/src/lib.rs +++ b/nac3core/src/lib.rs @@ -5,589 +5,5 @@ extern crate num_bigint; extern crate inkwell; extern crate rustpython_parser; -pub mod expression_inference; -pub mod inference_core; -mod magic_methods; -pub mod primitives; -pub mod typedef; -pub mod context; +mod typecheck; -use std::error::Error; -use std::fmt; -use std::path::Path; -use std::collections::HashMap; - -use num_traits::cast::ToPrimitive; - -use rustpython_parser::ast; - -use inkwell::OptimizationLevel; -use inkwell::builder::Builder; -use inkwell::context::Context; -use inkwell::module::Module; -use inkwell::targets::*; -use inkwell::types; -use inkwell::types::BasicType; -use inkwell::values; -use inkwell::{IntPredicate, FloatPredicate}; -use inkwell::basic_block; -use inkwell::passes; - - -#[derive(Debug)] -enum CompileErrorKind { - Unsupported(&'static str), - MissingTypeAnnotation, - UnknownTypeAnnotation, - IncompatibleTypes, - UnboundIdentifier, - BreakOutsideLoop, - Internal(&'static str) -} - -impl fmt::Display for CompileErrorKind { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - CompileErrorKind::Unsupported(feature) - => write!(f, "The following Python feature is not supported by NAC3: {}", feature), - CompileErrorKind::MissingTypeAnnotation - => write!(f, "Missing type annotation"), - CompileErrorKind::UnknownTypeAnnotation - => write!(f, "Unknown type annotation"), - CompileErrorKind::IncompatibleTypes - => write!(f, "Incompatible types"), - CompileErrorKind::UnboundIdentifier - => write!(f, "Unbound identifier"), - CompileErrorKind::BreakOutsideLoop - => write!(f, "Break outside loop"), - CompileErrorKind::Internal(details) - => write!(f, "Internal compiler error: {}", details), - } - } -} - -#[derive(Debug)] -pub struct CompileError { - location: ast::Location, - kind: CompileErrorKind, -} - -impl fmt::Display for CompileError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}, at {}", self.kind, self.location) - } -} - -impl Error for CompileError {} - -type CompileResult = Result; - -pub struct CodeGen<'ctx> { - context: &'ctx Context, - module: Module<'ctx>, - pass_manager: passes::PassManager>, - builder: Builder<'ctx>, - current_source_location: ast::Location, - namespace: HashMap>, - break_bb: Option>, -} - -impl<'ctx> CodeGen<'ctx> { - pub fn new(context: &'ctx Context) -> CodeGen<'ctx> { - let module = context.create_module("kernel"); - - let pass_manager = passes::PassManager::create(&module); - pass_manager.add_instruction_combining_pass(); - pass_manager.add_reassociate_pass(); - pass_manager.add_gvn_pass(); - pass_manager.add_cfg_simplification_pass(); - pass_manager.add_basic_alias_analysis_pass(); - pass_manager.add_promote_memory_to_register_pass(); - pass_manager.add_instruction_combining_pass(); - pass_manager.add_reassociate_pass(); - pass_manager.initialize(); - - let i32_type = context.i32_type(); - let fn_type = i32_type.fn_type(&[i32_type.into()], false); - module.add_function("output", fn_type, None); - - CodeGen { - context, module, pass_manager, - builder: context.create_builder(), - current_source_location: ast::Location::default(), - namespace: HashMap::new(), - break_bb: None, - } - } - - fn set_source_location(&mut self, location: ast::Location) { - self.current_source_location = location; - } - - fn compile_error(&self, kind: CompileErrorKind) -> CompileError { - CompileError { - location: self.current_source_location, - kind - } - } - - fn get_basic_type(&self, name: &str) -> CompileResult> { - match name { - "bool" => Ok(self.context.bool_type().into()), - "int32" => Ok(self.context.i32_type().into()), - "int64" => Ok(self.context.i64_type().into()), - "float32" => Ok(self.context.f32_type().into()), - "float64" => Ok(self.context.f64_type().into()), - _ => Err(self.compile_error(CompileErrorKind::UnknownTypeAnnotation)) - } - } - - fn compile_function_def( - &mut self, - name: &str, - args: &ast::Parameters, - body: &ast::Suite, - decorator_list: &[ast::Expression], - returns: &Option, - is_async: bool, - ) -> CompileResult> { - if is_async { - return Err(self.compile_error(CompileErrorKind::Unsupported("async functions"))) - } - for decorator in decorator_list.iter() { - self.set_source_location(decorator.location); - if let ast::ExpressionType::Identifier { name } = &decorator.node { - if name != "kernel" && name != "portable" { - return Err(self.compile_error(CompileErrorKind::Unsupported("custom decorators"))) - } - } else { - return Err(self.compile_error(CompileErrorKind::Unsupported("decorator must be an identifier"))) - } - } - - let args_type = args.args.iter().map(|val| { - self.set_source_location(val.location); - if let Some(annotation) = &val.annotation { - if let ast::ExpressionType::Identifier { name } = &annotation.node { - Ok(self.get_basic_type(&name)?) - } else { - Err(self.compile_error(CompileErrorKind::Unsupported("type annotation must be an identifier"))) - } - } else { - Err(self.compile_error(CompileErrorKind::MissingTypeAnnotation)) - } - }).collect::>>()?; - let return_type = if let Some(returns) = returns { - self.set_source_location(returns.location); - if let ast::ExpressionType::Identifier { name } = &returns.node { - if name == "None" { None } else { Some(self.get_basic_type(name)?) } - } else { - return Err(self.compile_error(CompileErrorKind::Unsupported("type annotation must be an identifier"))) - } - } else { - None - }; - - let fn_type = match return_type { - Some(ty) => ty.fn_type(&args_type, false), - None => self.context.void_type().fn_type(&args_type, false) - }; - - let function = self.module.add_function(name, fn_type, None); - let basic_block = self.context.append_basic_block(function, "entry"); - self.builder.position_at_end(basic_block); - - for (n, arg) in args.args.iter().enumerate() { - let param = function.get_nth_param(n as u32).unwrap(); - let alloca = self.builder.build_alloca(param.get_type(), &arg.arg); - self.builder.build_store(alloca, param); - self.namespace.insert(arg.arg.clone(), alloca); - } - - self.compile_suite(body, return_type)?; - - Ok(function) - } - - fn compile_expression( - &mut self, - expression: &ast::Expression - ) -> CompileResult> { - self.set_source_location(expression.location); - - match &expression.node { - ast::ExpressionType::True => Ok(self.context.bool_type().const_int(1, false).into()), - ast::ExpressionType::False => Ok(self.context.bool_type().const_int(0, false).into()), - ast::ExpressionType::Number { value: ast::Number::Integer { value } } => { - let mut bits = value.bits(); - if value.sign() == num_bigint::Sign::Minus { - bits += 1; - } - match bits { - 0..=32 => Ok(self.context.i32_type().const_int(value.to_i32().unwrap() as _, true).into()), - 33..=64 => Ok(self.context.i64_type().const_int(value.to_i64().unwrap() as _, true).into()), - _ => Err(self.compile_error(CompileErrorKind::Unsupported("integers larger than 64 bits"))) - } - }, - ast::ExpressionType::Number { value: ast::Number::Float { value } } => { - Ok(self.context.f64_type().const_float(*value).into()) - }, - ast::ExpressionType::Identifier { name } => { - match self.namespace.get(name) { - Some(value) => Ok(self.builder.build_load(*value, name).into()), - None => Err(self.compile_error(CompileErrorKind::UnboundIdentifier)) - } - }, - ast::ExpressionType::Unop { op, a } => { - let a = self.compile_expression(&a)?; - match (op, a) { - (ast::UnaryOperator::Pos, values::BasicValueEnum::IntValue(a)) - => Ok(a.into()), - (ast::UnaryOperator::Pos, values::BasicValueEnum::FloatValue(a)) - => Ok(a.into()), - (ast::UnaryOperator::Neg, values::BasicValueEnum::IntValue(a)) - => Ok(self.builder.build_int_neg(a, "tmpneg").into()), - (ast::UnaryOperator::Neg, values::BasicValueEnum::FloatValue(a)) - => Ok(self.builder.build_float_neg(a, "tmpneg").into()), - (ast::UnaryOperator::Inv, values::BasicValueEnum::IntValue(a)) - => Ok(self.builder.build_not(a, "tmpnot").into()), - (ast::UnaryOperator::Not, values::BasicValueEnum::IntValue(a)) => { - // boolean "not" - if a.get_type().get_bit_width() != 1 { - Err(self.compile_error(CompileErrorKind::Unsupported("unimplemented unary operation"))) - } else { - Ok(self.builder.build_not(a, "tmpnot").into()) - } - }, - _ => Err(self.compile_error(CompileErrorKind::Unsupported("unimplemented unary operation"))), - } - }, - ast::ExpressionType::Binop { a, op, b } => { - let a = self.compile_expression(&a)?; - let b = self.compile_expression(&b)?; - if a.get_type() != b.get_type() { - return Err(self.compile_error(CompileErrorKind::IncompatibleTypes)); - } - use ast::Operator::*; - match (op, a, b) { - (Add, values::BasicValueEnum::IntValue(a), values::BasicValueEnum::IntValue(b)) - => Ok(self.builder.build_int_add(a, b, "tmpadd").into()), - (Sub, values::BasicValueEnum::IntValue(a), values::BasicValueEnum::IntValue(b)) - => Ok(self.builder.build_int_sub(a, b, "tmpsub").into()), - (Mult, values::BasicValueEnum::IntValue(a), values::BasicValueEnum::IntValue(b)) - => Ok(self.builder.build_int_mul(a, b, "tmpmul").into()), - - (Add, values::BasicValueEnum::FloatValue(a), values::BasicValueEnum::FloatValue(b)) - => Ok(self.builder.build_float_add(a, b, "tmpadd").into()), - (Sub, values::BasicValueEnum::FloatValue(a), values::BasicValueEnum::FloatValue(b)) - => Ok(self.builder.build_float_sub(a, b, "tmpsub").into()), - (Mult, values::BasicValueEnum::FloatValue(a), values::BasicValueEnum::FloatValue(b)) - => Ok(self.builder.build_float_mul(a, b, "tmpmul").into()), - - (Div, values::BasicValueEnum::FloatValue(a), values::BasicValueEnum::FloatValue(b)) - => Ok(self.builder.build_float_div(a, b, "tmpdiv").into()), - (FloorDiv, values::BasicValueEnum::IntValue(a), values::BasicValueEnum::IntValue(b)) - => Ok(self.builder.build_int_signed_div(a, b, "tmpdiv").into()), - _ => Err(self.compile_error(CompileErrorKind::Unsupported("unimplemented binary operation"))), - } - }, - ast::ExpressionType::Compare { vals, ops } => { - let mut vals = vals.iter(); - let mut ops = ops.iter(); - - let mut result = None; - let mut a = self.compile_expression(vals.next().unwrap())?; - loop { - if let Some(op) = ops.next() { - let b = self.compile_expression(vals.next().unwrap())?; - if a.get_type() != b.get_type() { - return Err(self.compile_error(CompileErrorKind::IncompatibleTypes)); - } - let this_result = match (a, b) { - (values::BasicValueEnum::IntValue(a), values::BasicValueEnum::IntValue(b)) => { - match op { - ast::Comparison::Equal - => self.builder.build_int_compare(IntPredicate::EQ, a, b, "tmpeq"), - ast::Comparison::NotEqual - => self.builder.build_int_compare(IntPredicate::NE, a, b, "tmpne"), - ast::Comparison::Less - => self.builder.build_int_compare(IntPredicate::SLT, a, b, "tmpslt"), - ast::Comparison::LessOrEqual - => self.builder.build_int_compare(IntPredicate::SLE, a, b, "tmpsle"), - ast::Comparison::Greater - => self.builder.build_int_compare(IntPredicate::SGT, a, b, "tmpsgt"), - ast::Comparison::GreaterOrEqual - => self.builder.build_int_compare(IntPredicate::SGE, a, b, "tmpsge"), - _ => return Err(self.compile_error(CompileErrorKind::Unsupported("special comparison"))), - } - }, - (values::BasicValueEnum::FloatValue(a), values::BasicValueEnum::FloatValue(b)) => { - match op { - ast::Comparison::Equal - => self.builder.build_float_compare(FloatPredicate::OEQ, a, b, "tmpoeq"), - ast::Comparison::NotEqual - => self.builder.build_float_compare(FloatPredicate::UNE, a, b, "tmpune"), - ast::Comparison::Less - => self.builder.build_float_compare(FloatPredicate::OLT, a, b, "tmpolt"), - ast::Comparison::LessOrEqual - => self.builder.build_float_compare(FloatPredicate::OLE, a, b, "tmpole"), - ast::Comparison::Greater - => self.builder.build_float_compare(FloatPredicate::OGT, a, b, "tmpogt"), - ast::Comparison::GreaterOrEqual - => self.builder.build_float_compare(FloatPredicate::OGE, a, b, "tmpoge"), - _ => return Err(self.compile_error(CompileErrorKind::Unsupported("special comparison"))), - } - }, - _ => return Err(self.compile_error(CompileErrorKind::Unsupported("comparison of non-numerical types"))), - }; - match result { - Some(last) => { - result = Some(self.builder.build_and(last, this_result, "tmpand")); - } - None => { - result = Some(this_result); - } - } - a = b; - } else { - return Ok(result.unwrap().into()) - } - } - }, - ast::ExpressionType::Call { function, args, keywords } => { - if !keywords.is_empty() { - return Err(self.compile_error(CompileErrorKind::Unsupported("keyword arguments"))) - } - let args = args.iter().map(|val| self.compile_expression(val)) - .collect::>>()?; - self.set_source_location(expression.location); - if let ast::ExpressionType::Identifier { name } = &function.node { - match (name.as_str(), args[0]) { - ("int32", values::BasicValueEnum::IntValue(a)) => { - let nbits = a.get_type().get_bit_width(); - if nbits < 32 { - Ok(self.builder.build_int_s_extend(a, self.context.i32_type(), "tmpsext").into()) - } else if nbits > 32 { - Ok(self.builder.build_int_truncate(a, self.context.i32_type(), "tmptrunc").into()) - } else { - Ok(a.into()) - } - }, - ("int64", values::BasicValueEnum::IntValue(a)) => { - let nbits = a.get_type().get_bit_width(); - if nbits < 64 { - Ok(self.builder.build_int_s_extend(a, self.context.i64_type(), "tmpsext").into()) - } else { - Ok(a.into()) - } - }, - ("int32", values::BasicValueEnum::FloatValue(a)) => { - Ok(self.builder.build_float_to_signed_int(a, self.context.i32_type(), "tmpfptosi").into()) - }, - ("int64", values::BasicValueEnum::FloatValue(a)) => { - Ok(self.builder.build_float_to_signed_int(a, self.context.i64_type(), "tmpfptosi").into()) - }, - ("float32", values::BasicValueEnum::IntValue(a)) => { - Ok(self.builder.build_signed_int_to_float(a, self.context.f32_type(), "tmpsitofp").into()) - }, - ("float64", values::BasicValueEnum::IntValue(a)) => { - Ok(self.builder.build_signed_int_to_float(a, self.context.f64_type(), "tmpsitofp").into()) - }, - ("float32", values::BasicValueEnum::FloatValue(a)) => { - if a.get_type() == self.context.f64_type() { - Ok(self.builder.build_float_trunc(a, self.context.f32_type(), "tmptrunc").into()) - } else { - Ok(a.into()) - } - }, - ("float64", values::BasicValueEnum::FloatValue(a)) => { - if a.get_type() == self.context.f32_type() { - Ok(self.builder.build_float_ext(a, self.context.f64_type(), "tmpext").into()) - } else { - Ok(a.into()) - } - }, - - ("output", values::BasicValueEnum::IntValue(a)) => { - let fn_value = self.module.get_function("output").unwrap(); - Ok(self.builder.build_call(fn_value, &[a.into()], "call") - .try_as_basic_value().left().unwrap()) - }, - _ => Err(self.compile_error(CompileErrorKind::Unsupported("unrecognized call"))) - } - } else { - return Err(self.compile_error(CompileErrorKind::Unsupported("function must be an identifier"))) - } - }, - _ => return Err(self.compile_error(CompileErrorKind::Unsupported("unimplemented expression"))), - } - } - - fn compile_statement( - &mut self, - statement: &ast::Statement, - return_type: Option - ) -> CompileResult<()> { - self.set_source_location(statement.location); - - use ast::StatementType::*; - match &statement.node { - Assign { targets, value } => { - let value = self.compile_expression(value)?; - for target in targets.iter() { - self.set_source_location(target.location); - if let ast::ExpressionType::Identifier { name } = &target.node { - let builder = &self.builder; - let target = self.namespace.entry(name.clone()).or_insert_with( - || builder.build_alloca(value.get_type(), name)); - if target.get_type() != value.get_type().ptr_type(inkwell::AddressSpace::Generic) { - return Err(self.compile_error(CompileErrorKind::IncompatibleTypes)); - } - builder.build_store(*target, value); - } else { - return Err(self.compile_error(CompileErrorKind::Unsupported("assignment target must be an identifier"))) - } - } - }, - Expression { expression } => { self.compile_expression(expression)?; }, - If { test, body, orelse } => { - let test = self.compile_expression(test)?; - if test.get_type() != self.context.bool_type().into() { - return Err(self.compile_error(CompileErrorKind::IncompatibleTypes)); - } - - let parent = self.builder.get_insert_block().unwrap().get_parent().unwrap(); - let then_bb = self.context.append_basic_block(parent, "then"); - let else_bb = self.context.append_basic_block(parent, "else"); - let cont_bb = self.context.append_basic_block(parent, "ifcont"); - self.builder.build_conditional_branch(test.into_int_value(), then_bb, else_bb); - - self.builder.position_at_end(then_bb); - self.compile_suite(body, return_type)?; - self.builder.build_unconditional_branch(cont_bb); - - self.builder.position_at_end(else_bb); - if let Some(orelse) = orelse { - self.compile_suite(orelse, return_type)?; - } - self.builder.build_unconditional_branch(cont_bb); - self.builder.position_at_end(cont_bb); - }, - While { test, body, orelse } => { - let parent = self.builder.get_insert_block().unwrap().get_parent().unwrap(); - let test_bb = self.context.append_basic_block(parent, "test"); - self.builder.build_unconditional_branch(test_bb); - self.builder.position_at_end(test_bb); - let test = self.compile_expression(test)?; - if test.get_type() != self.context.bool_type().into() { - return Err(self.compile_error(CompileErrorKind::IncompatibleTypes)); - } - - let then_bb = self.context.append_basic_block(parent, "then"); - let else_bb = self.context.append_basic_block(parent, "else"); - let cont_bb = self.context.append_basic_block(parent, "ifcont"); - self.builder.build_conditional_branch(test.into_int_value(), then_bb, else_bb); - - self.break_bb = Some(cont_bb); - - self.builder.position_at_end(then_bb); - self.compile_suite(body, return_type)?; - self.builder.build_unconditional_branch(test_bb); - - self.builder.position_at_end(else_bb); - if let Some(orelse) = orelse { - self.compile_suite(orelse, return_type)?; - } - self.builder.build_unconditional_branch(cont_bb); - self.builder.position_at_end(cont_bb); - - self.break_bb = None; - }, - Break => { - if let Some(bb) = self.break_bb { - self.builder.build_unconditional_branch(bb); - let parent = self.builder.get_insert_block().unwrap().get_parent().unwrap(); - let unreachable_bb = self.context.append_basic_block(parent, "unreachable"); - self.builder.position_at_end(unreachable_bb); - } else { - return Err(self.compile_error(CompileErrorKind::BreakOutsideLoop)); - } - } - Return { value: Some(value) } => { - if let Some(return_type) = return_type { - let value = self.compile_expression(value)?; - if value.get_type() != return_type { - return Err(self.compile_error(CompileErrorKind::IncompatibleTypes)); - } - self.builder.build_return(Some(&value)); - } else { - return Err(self.compile_error(CompileErrorKind::IncompatibleTypes)); - } - }, - Return { value: None } => { - if !return_type.is_none() { - return Err(self.compile_error(CompileErrorKind::IncompatibleTypes)); - } - self.builder.build_return(None); - }, - Pass => (), - _ => return Err(self.compile_error(CompileErrorKind::Unsupported("special statement"))), - } - Ok(()) - } - - fn compile_suite( - &mut self, - suite: &ast::Suite, - return_type: Option - ) -> CompileResult<()> { - for statement in suite.iter() { - self.compile_statement(statement, return_type)?; - } - Ok(()) - } - - pub fn compile_toplevel(&mut self, statement: &ast::Statement) -> CompileResult<()> { - self.set_source_location(statement.location); - if let ast::StatementType::FunctionDef { - is_async, - name, - args, - body, - decorator_list, - returns, - } = &statement.node { - let function = self.compile_function_def(name, args, body, decorator_list, returns, *is_async)?; - self.pass_manager.run_on(&function); - Ok(()) - } else { - Err(self.compile_error(CompileErrorKind::Internal("top-level is not a function definition"))) - } - } - - pub fn print_ir(&self) { - self.module.print_to_stderr(); - } - - pub fn output(&self, filename: &str) { - //let triple = TargetTriple::create("riscv32-none-linux-gnu"); - let triple = TargetMachine::get_default_triple(); - let target = Target::from_triple(&triple) - .expect("couldn't create target from target triple"); - - let target_machine = target - .create_target_machine( - &triple, - "", - "", - OptimizationLevel::Default, - RelocMode::Default, - CodeModel::Default, - ) - .expect("couldn't create target machine"); - - target_machine - .write_to_file(&self.module, FileType::Object, Path::new(filename)) - .expect("couldn't write module to file"); - } -} diff --git a/nac3core/src/context/inference_context.rs b/nac3core/src/typecheck/context/inference_context.rs similarity index 99% rename from nac3core/src/context/inference_context.rs rename to nac3core/src/typecheck/context/inference_context.rs index e1fbff2f..b4d2644f 100644 --- a/nac3core/src/context/inference_context.rs +++ b/nac3core/src/typecheck/context/inference_context.rs @@ -1,5 +1,5 @@ use super::TopLevelContext; -use crate::typedef::*; +use super::super::typedef::*; use std::boxed::Box; use std::collections::HashMap; diff --git a/nac3core/src/context/mod.rs b/nac3core/src/typecheck/context/mod.rs similarity index 100% rename from nac3core/src/context/mod.rs rename to nac3core/src/typecheck/context/mod.rs diff --git a/nac3core/src/context/top_level_context.rs b/nac3core/src/typecheck/context/top_level_context.rs similarity index 99% rename from nac3core/src/context/top_level_context.rs rename to nac3core/src/typecheck/context/top_level_context.rs index 004b271e..d7c4ca23 100644 --- a/nac3core/src/context/top_level_context.rs +++ b/nac3core/src/typecheck/context/top_level_context.rs @@ -1,4 +1,4 @@ -use crate::typedef::*; +use super::super::typedef::*; use std::collections::HashMap; use std::rc::Rc; diff --git a/nac3core/src/inference_core.rs b/nac3core/src/typecheck/inference_core.rs similarity index 98% rename from nac3core/src/inference_core.rs rename to nac3core/src/typecheck/inference_core.rs index 3b6b7d06..5190a164 100644 --- a/nac3core/src/inference_core.rs +++ b/nac3core/src/typecheck/inference_core.rs @@ -1,5 +1,5 @@ -use crate::context::InferenceContext; -use crate::typedef::{TypeEnum::*, *}; +use super::context::InferenceContext; +use super::typedef::{TypeEnum::*, *}; use std::collections::HashMap; fn find_subst( @@ -168,8 +168,8 @@ pub fn resolve_call( #[cfg(test)] mod tests { use super::*; - use crate::context::TopLevelContext; - use crate::primitives::*; + use super::super::context::TopLevelContext; + use super::super::primitives::*; use std::rc::Rc; fn get_inference_context(ctx: TopLevelContext) -> InferenceContext { diff --git a/nac3core/src/magic_methods.rs b/nac3core/src/typecheck/magic_methods.rs similarity index 66% rename from nac3core/src/magic_methods.rs rename to nac3core/src/typecheck/magic_methods.rs index b0c248b4..7e2955f5 100644 --- a/nac3core/src/magic_methods.rs +++ b/nac3core/src/typecheck/magic_methods.rs @@ -1,4 +1,4 @@ -use rustpython_parser::ast::{Comparison, Operator, UnaryOperator}; +use rustpython_parser::ast::{Cmpop, Operator, Unaryop}; pub fn binop_name(op: &Operator) -> &'static str { match op { @@ -36,23 +36,23 @@ pub fn binop_assign_name(op: &Operator) -> &'static str { } } -pub fn unaryop_name(op: &UnaryOperator) -> &'static str { +pub fn unaryop_name(op: &Unaryop) -> &'static str { match op { - UnaryOperator::Pos => "__pos__", - UnaryOperator::Neg => "__neg__", - UnaryOperator::Not => "__not__", - UnaryOperator::Inv => "__inv__", + Unaryop::UAdd => "__pos__", + Unaryop::USub => "__neg__", + Unaryop::Not => "__not__", + Unaryop::Invert => "__inv__", } } -pub fn comparison_name(op: &Comparison) -> Option<&'static str> { +pub fn comparison_name(op: &Cmpop) -> Option<&'static str> { match op { - Comparison::Less => Some("__lt__"), - Comparison::LessOrEqual => Some("__le__"), - Comparison::Greater => Some("__gt__"), - Comparison::GreaterOrEqual => Some("__ge__"), - Comparison::Equal => Some("__eq__"), - Comparison::NotEqual => Some("__ne__"), + Cmpop::Lt => Some("__lt__"), + Cmpop::LtE => Some("__le__"), + Cmpop::Gt => Some("__gt__"), + Cmpop::GtE => Some("__ge__"), + Cmpop::Eq => Some("__eq__"), + Cmpop::NotEq => Some("__ne__"), _ => None, } } diff --git a/nac3core/src/typecheck/mod.rs b/nac3core/src/typecheck/mod.rs new file mode 100644 index 00000000..952907c2 --- /dev/null +++ b/nac3core/src/typecheck/mod.rs @@ -0,0 +1,6 @@ +pub mod inference_core; +pub mod magic_methods; +pub mod primitives; +pub mod typedef; +pub mod context; + diff --git a/nac3core/src/primitives.rs b/nac3core/src/typecheck/primitives.rs similarity index 99% rename from nac3core/src/primitives.rs rename to nac3core/src/typecheck/primitives.rs index e7777491..cd6a36b5 100644 --- a/nac3core/src/primitives.rs +++ b/nac3core/src/typecheck/primitives.rs @@ -1,5 +1,5 @@ use super::typedef::{TypeEnum::*, *}; -use crate::context::*; +use super::context::*; use std::collections::HashMap; pub const TUPLE_TYPE: ParamId = ParamId(0); diff --git a/nac3core/src/typedef.rs b/nac3core/src/typecheck/typedef.rs similarity index 100% rename from nac3core/src/typedef.rs rename to nac3core/src/typecheck/typedef.rs From ed04cef431a2e705729a762fd58410b7f3070f13 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Mon, 28 Jun 2021 14:48:04 +0800 Subject: [PATCH 002/131] added symbol resolver... --- Cargo.lock | 47 +++--------- nac3core/Cargo.toml | 2 +- nac3core/src/lib.rs | 1 + ...top_level_context.rs => global_context.rs} | 35 ++------- .../typecheck/context/inference_context.rs | 71 ++++++++----------- nac3core/src/typecheck/context/mod.rs | 4 +- nac3core/src/typecheck/inference_core.rs | 4 +- nac3core/src/typecheck/location.rs | 31 ++++++++ nac3core/src/typecheck/mod.rs | 5 +- nac3core/src/typecheck/primitives.rs | 4 +- nac3core/src/typecheck/symbol_resolver.rs | 23 ++++++ 11 files changed, 108 insertions(+), 119 deletions(-) rename nac3core/src/typecheck/context/{top_level_context.rs => global_context.rs} (74%) create mode 100644 nac3core/src/typecheck/location.rs create mode 100644 nac3core/src/typecheck/symbol_resolver.rs diff --git a/Cargo.lock b/Cargo.lock index 93a535ea..a3b73348 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -79,16 +79,6 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" -[[package]] -name = "codespan-reporting" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e" -dependencies = [ - "termcolor", - "unicode-width", -] - [[package]] name = "crunchy" version = "0.2.2" @@ -221,6 +211,15 @@ dependencies = [ "proc-macro-hack", ] +[[package]] +name = "indoc" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5a75aeaaef0ce18b58056d306c27b07436fbb34b8816c53094b76dd81803136" +dependencies = [ + "unindent", +] + [[package]] name = "indoc-impl" version = "0.3.6" @@ -383,7 +382,7 @@ checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc" name = "nac3core" version = "0.1.0" dependencies = [ - "codespan-reporting", + "indoc 1.0.3", "inkwell", "num-bigint 0.3.2", "num-traits", @@ -600,7 +599,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf6bbbe8f70d179260b3728e5d04eb012f4f0c7988e58c11433dd689cecaa72e" dependencies = [ "ctor", - "indoc", + "indoc 0.3.6", "inventory", "libc", "parking_lot", @@ -826,15 +825,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "termcolor" -version = "1.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dfed899f0eb03f32ee8c6a0aabdb8a7949659e3466561fc0adf54e26d88c5f4" -dependencies = [ - "winapi-util", -] - [[package]] name = "tiny-keccak" version = "2.0.2" @@ -896,12 +886,6 @@ dependencies = [ "unic-common", ] -[[package]] -name = "unicode-width" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3" - [[package]] name = "unicode-xid" version = "0.2.2" @@ -954,15 +938,6 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" -[[package]] -name = "winapi-util" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" -dependencies = [ - "winapi", -] - [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" diff --git a/nac3core/Cargo.toml b/nac3core/Cargo.toml index 04025e53..af84b255 100644 --- a/nac3core/Cargo.toml +++ b/nac3core/Cargo.toml @@ -9,5 +9,5 @@ num-bigint = "0.3" num-traits = "0.2" inkwell = { git = "https://github.com/TheDan64/inkwell", branch = "master", features = ["llvm10-0"] } rustpython-parser = { git = "https://github.com/RustPython/RustPython", branch = "master" } -codespan-reporting = "0.11.1" +indoc = "1.0" diff --git a/nac3core/src/lib.rs b/nac3core/src/lib.rs index a87ce31c..ff64158d 100644 --- a/nac3core/src/lib.rs +++ b/nac3core/src/lib.rs @@ -4,6 +4,7 @@ extern crate num_bigint; extern crate inkwell; extern crate rustpython_parser; +extern crate indoc; mod typecheck; diff --git a/nac3core/src/typecheck/context/top_level_context.rs b/nac3core/src/typecheck/context/global_context.rs similarity index 74% rename from nac3core/src/typecheck/context/top_level_context.rs rename to nac3core/src/typecheck/context/global_context.rs index d7c4ca23..b322d7ea 100644 --- a/nac3core/src/typecheck/context/top_level_context.rs +++ b/nac3core/src/typecheck/context/global_context.rs @@ -5,7 +5,7 @@ use std::rc::Rc; /// Structure for storing top-level type definitions. /// Used for collecting type signature from source code. /// Can be converted to `InferenceContext` for type inference in functions. -pub struct TopLevelContext<'a> { +pub struct GlobalContext<'a> { /// List of primitive definitions. pub(super) primitive_defs: Vec>, /// List of class definitions. @@ -16,61 +16,39 @@ pub struct TopLevelContext<'a> { pub(super) var_defs: Vec>, /// Function name to signature mapping. pub(super) fn_table: HashMap<&'a str, FnDef>, - /// Type name to type mapping. - pub(super) sym_table: HashMap<&'a str, Type>, primitives: Vec, variables: Vec, } -impl<'a> TopLevelContext<'a> { - pub fn new(primitive_defs: Vec>) -> TopLevelContext { - let mut sym_table = HashMap::new(); +impl<'a> GlobalContext<'a> { + pub fn new(primitive_defs: Vec>) -> GlobalContext { let mut primitives = Vec::new(); for (i, t) in primitive_defs.iter().enumerate() { primitives.push(TypeEnum::PrimitiveType(PrimitiveId(i)).into()); - sym_table.insert(t.name, TypeEnum::PrimitiveType(PrimitiveId(i)).into()); } - TopLevelContext { + GlobalContext { primitive_defs, class_defs: Vec::new(), parametric_defs: Vec::new(), var_defs: Vec::new(), fn_table: HashMap::new(), - sym_table, primitives, variables: Vec::new(), } } pub fn add_class(&mut self, def: ClassDef<'a>) -> ClassId { - self.sym_table.insert( - def.base.name, - TypeEnum::ClassType(ClassId(self.class_defs.len())).into(), - ); self.class_defs.push(def); ClassId(self.class_defs.len() - 1) } pub fn add_parametric(&mut self, def: ParametricDef<'a>) -> ParamId { - let params = def - .params - .iter() - .map(|&v| Rc::new(TypeEnum::TypeVariable(v))) - .collect(); - self.sym_table.insert( - def.base.name, - TypeEnum::ParametricType(ParamId(self.parametric_defs.len()), params).into(), - ); self.parametric_defs.push(def); ParamId(self.parametric_defs.len() - 1) } pub fn add_variable(&mut self, def: VarDef<'a>) -> VariableId { - self.sym_table.insert( - def.name, - TypeEnum::TypeVariable(VariableId(self.var_defs.len())).into(), - ); self.add_variable_private(def) } @@ -128,9 +106,4 @@ impl<'a> TopLevelContext<'a> { pub fn get_variable(&self, id: VariableId) -> Type { self.variables.get(id.0).unwrap().clone() } - - pub fn get_type(&self, name: &str) -> Option { - // TODO: handle parametric types - self.sym_table.get(name).cloned() - } } diff --git a/nac3core/src/typecheck/context/inference_context.rs b/nac3core/src/typecheck/context/inference_context.rs index b4d2644f..ac643315 100644 --- a/nac3core/src/typecheck/context/inference_context.rs +++ b/nac3core/src/typecheck/context/inference_context.rs @@ -1,22 +1,22 @@ -use super::TopLevelContext; +use super::super::symbol_resolver::*; use super::super::typedef::*; +use super::GlobalContext; use std::boxed::Box; use std::collections::HashMap; struct ContextStack<'a> { /// stack level, starts from 0 level: u32, - /// stack of variable definitions containing (id, def, level) where `def` is the original - /// definition in `level-1`. - var_defs: Vec<(usize, VarDef<'a>, u32)>, /// stack of symbol definitions containing (name, level) where `level` is the smallest level /// where the name is assigned a value sym_def: Vec<(&'a str, u32)>, } pub struct InferenceContext<'a> { - /// top level context - top_level: TopLevelContext<'a>, + /// global context + global: GlobalContext<'a>, + /// per source symbol resolver + resolver: Box, /// list of primitive instances primitives: Vec, @@ -26,8 +26,6 @@ pub struct InferenceContext<'a> { /// an identifier might be defined earlier but has no value (for some code path), thus not /// readable. sym_table: HashMap<&'a str, (Type, bool)>, - /// resolution function reference, that may resolve unbounded identifiers to some type - resolution_fn: Box Result>, /// stack stack: ContextStack<'a>, } @@ -35,25 +33,21 @@ pub struct InferenceContext<'a> { // non-trivial implementations here impl<'a> InferenceContext<'a> { /// return a new `InferenceContext` from `TopLevelContext` and resolution function. - pub fn new( - top_level: TopLevelContext, - resolution_fn: Box Result>, - ) -> InferenceContext { - let primitives = (0..top_level.primitive_defs.len()) + pub fn new(global: GlobalContext, resolver: Box) -> InferenceContext { + let primitives = (0..global.primitive_defs.len()) .map(|v| TypeEnum::PrimitiveType(PrimitiveId(v)).into()) .collect(); - let variables = (0..top_level.var_defs.len()) + let variables = (0..global.var_defs.len()) .map(|v| TypeEnum::TypeVariable(VariableId(v)).into()) .collect(); InferenceContext { - top_level, + global, + resolver, primitives, variables, sym_table: HashMap::new(), - resolution_fn, stack: ContextStack { level: 0, - var_defs: Vec::new(), sym_def: Vec::new(), }, } @@ -61,7 +55,6 @@ impl<'a> InferenceContext<'a> { /// execute the function with new scope. /// variable assignment would be limited within the scope (not readable outside), and type - /// variable type guard would be limited within the scope. /// returns the list of variables assigned within the scope, and the result of the function pub fn with_scope(&mut self, f: F) -> (Vec<&'a str>, R) where @@ -70,15 +63,6 @@ impl<'a> InferenceContext<'a> { self.stack.level += 1; let result = f(self); self.stack.level -= 1; - while !self.stack.var_defs.is_empty() { - let (_, _, level) = self.stack.var_defs.last().unwrap(); - if *level > self.stack.level { - let (id, def, _) = self.stack.var_defs.pop().unwrap(); - self.top_level.var_defs[id] = def; - } else { - break; - } - } let mut poped_names = Vec::new(); while !self.stack.sym_def.is_empty() { let (_, level) = self.stack.sym_def.last().unwrap(); @@ -126,19 +110,16 @@ impl<'a> InferenceContext<'a> { if *x { Ok(t.clone()) } else { - Err("may not have value".into()) + Err("unbounded identifier".into()) } } else { - self.resolution_fn.as_mut()(name) + match self.resolver.get_symbol_type(name) { + Some(SymbolType::Identifier(t)) => Ok(t), + Some(SymbolType::TypeName(_)) => Err("is not a value".into()), + _ => Err("unbounded identifier".into()), + } } } - - /// restrict the bound of a type variable by replacing its definition. - /// used for implementing type guard - pub fn restrict(&mut self, id: VariableId, mut def: VarDef<'a>) { - std::mem::swap(self.top_level.var_defs.get_mut(id.0).unwrap(), &mut def); - self.stack.var_defs.push((id.0, def, self.stack.level)); - } } // trivial getters: @@ -151,22 +132,26 @@ impl<'a> InferenceContext<'a> { } pub fn get_fn_def(&self, name: &str) -> Option<&FnDef> { - self.top_level.fn_table.get(name) + self.global.fn_table.get(name) } pub fn get_primitive_def(&self, id: PrimitiveId) -> &TypeDef { - self.top_level.primitive_defs.get(id.0).unwrap() + self.global.primitive_defs.get(id.0).unwrap() } pub fn get_class_def(&self, id: ClassId) -> &ClassDef { - self.top_level.class_defs.get(id.0).unwrap() + self.global.class_defs.get(id.0).unwrap() } pub fn get_parametric_def(&self, id: ParamId) -> &ParametricDef { - self.top_level.parametric_defs.get(id.0).unwrap() + self.global.parametric_defs.get(id.0).unwrap() } pub fn get_variable_def(&self, id: VariableId) -> &VarDef { - self.top_level.var_defs.get(id.0).unwrap() + self.global.var_defs.get(id.0).unwrap() } - pub fn get_type(&self, name: &str) -> Option { - self.top_level.get_type(name) + pub fn get_type(&self, name: &str) -> Result { + match self.resolver.get_symbol_type(name) { + Some(SymbolType::TypeName(t)) => Ok(t), + Some(SymbolType::Identifier(_)) => Err("not a type".into()), + _ => Err("unbounded identifier".into()), + } } } diff --git a/nac3core/src/typecheck/context/mod.rs b/nac3core/src/typecheck/context/mod.rs index f59140d9..3a5d8d11 100644 --- a/nac3core/src/typecheck/context/mod.rs +++ b/nac3core/src/typecheck/context/mod.rs @@ -1,4 +1,4 @@ mod inference_context; -mod top_level_context; +mod global_context; pub use inference_context::InferenceContext; -pub use top_level_context::TopLevelContext; +pub use global_context::GlobalContext; diff --git a/nac3core/src/typecheck/inference_core.rs b/nac3core/src/typecheck/inference_core.rs index 5190a164..679c04c8 100644 --- a/nac3core/src/typecheck/inference_core.rs +++ b/nac3core/src/typecheck/inference_core.rs @@ -168,11 +168,11 @@ pub fn resolve_call( #[cfg(test)] mod tests { use super::*; - use super::super::context::TopLevelContext; + use super::super::context::GlobalContext; use super::super::primitives::*; use std::rc::Rc; - fn get_inference_context(ctx: TopLevelContext) -> InferenceContext { + fn get_inference_context(ctx: GlobalContext) -> InferenceContext { InferenceContext::new(ctx, Box::new(|_| Err("unbounded identifier".into()))) } diff --git a/nac3core/src/typecheck/location.rs b/nac3core/src/typecheck/location.rs new file mode 100644 index 00000000..0165ef0a --- /dev/null +++ b/nac3core/src/typecheck/location.rs @@ -0,0 +1,31 @@ +use rustpython_parser::ast; +use std::vec::Vec; + +#[derive(Clone, Copy, PartialEq)] +pub struct FileID(u32); + +#[derive(Clone, Copy, PartialEq)] +pub enum Location { + CodeRange(FileID, ast::Location), + Builtin +} + +pub struct FileRegistry { + files: Vec, +} + +impl FileRegistry { + pub fn new() -> FileRegistry { + FileRegistry { files: Vec::new() } + } + + pub fn add_file(&mut self, path: &str) -> FileID { + let index = self.files.len() as u32; + self.files.push(path.to_owned()); + FileID(index) + } + + pub fn query_file(&self, id: FileID) -> &str { + &self.files[id.0 as usize] + } +} diff --git a/nac3core/src/typecheck/mod.rs b/nac3core/src/typecheck/mod.rs index 952907c2..a3be5925 100644 --- a/nac3core/src/typecheck/mod.rs +++ b/nac3core/src/typecheck/mod.rs @@ -1,6 +1,7 @@ +pub mod context; pub mod inference_core; +pub mod location; pub mod magic_methods; pub mod primitives; +pub mod symbol_resolver; pub mod typedef; -pub mod context; - diff --git a/nac3core/src/typecheck/primitives.rs b/nac3core/src/typecheck/primitives.rs index cd6a36b5..94e76ee7 100644 --- a/nac3core/src/typecheck/primitives.rs +++ b/nac3core/src/typecheck/primitives.rs @@ -78,7 +78,7 @@ fn impl_order(def: &mut TypeDef, ty: &Type) { def.methods.insert("__ge__", fun); } -pub fn basic_ctx() -> TopLevelContext<'static> { +pub fn basic_ctx() -> GlobalContext<'static> { let primitives = [ TypeDef { name: "bool", @@ -102,7 +102,7 @@ pub fn basic_ctx() -> TopLevelContext<'static> { }, ] .to_vec(); - let mut ctx = TopLevelContext::new(primitives); + let mut ctx = GlobalContext::new(primitives); let b = ctx.get_primitive(BOOL_TYPE); let b_def = ctx.get_primitive_def_mut(BOOL_TYPE); diff --git a/nac3core/src/typecheck/symbol_resolver.rs b/nac3core/src/typecheck/symbol_resolver.rs new file mode 100644 index 00000000..a6eff440 --- /dev/null +++ b/nac3core/src/typecheck/symbol_resolver.rs @@ -0,0 +1,23 @@ +use super::typedef::Type; +use super::location::Location; + +pub enum SymbolType { + TypeName(Type), + Identifier(Type), +} + +pub enum SymbolValue<'a> { + I32(i32), + I64(i64), + Double(f64), + Bool(bool), + Tuple(&'a [SymbolValue<'a>]), + Bytes(&'a [u8]), +} + +pub trait SymbolResolver { + fn get_symbol_type(&self, str: &str) -> Option; + fn get_symbol_value(&self, str: &str) -> Option; + fn get_symbol_location(&self, str: &str) -> Option; + // handle function call etc. +} From e72d96f165a5401fde58e95ff57b5c927396f0c2 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Mon, 28 Jun 2021 15:05:05 +0800 Subject: [PATCH 003/131] added location information for diagnostics --- .../typecheck/context/inference_context.rs | 63 ++++++++++--------- 1 file changed, 33 insertions(+), 30 deletions(-) diff --git a/nac3core/src/typecheck/context/inference_context.rs b/nac3core/src/typecheck/context/inference_context.rs index ac643315..44c67b68 100644 --- a/nac3core/src/typecheck/context/inference_context.rs +++ b/nac3core/src/typecheck/context/inference_context.rs @@ -1,6 +1,8 @@ +use super::super::location::{FileID, Location}; use super::super::symbol_resolver::*; use super::super::typedef::*; use super::GlobalContext; +use rustpython_parser::ast; use std::boxed::Box; use std::collections::HashMap; @@ -17,34 +19,28 @@ pub struct InferenceContext<'a> { global: GlobalContext<'a>, /// per source symbol resolver resolver: Box, + /// File ID + file: FileID, - /// list of primitive instances - primitives: Vec, - /// list of variable instances - variables: Vec, /// identifier to (type, readable) mapping. /// an identifier might be defined earlier but has no value (for some code path), thus not /// readable. - sym_table: HashMap<&'a str, (Type, bool)>, + sym_table: HashMap<&'a str, (Type, bool, Location)>, /// stack stack: ContextStack<'a>, } // non-trivial implementations here impl<'a> InferenceContext<'a> { - /// return a new `InferenceContext` from `TopLevelContext` and resolution function. - pub fn new(global: GlobalContext, resolver: Box) -> InferenceContext { - let primitives = (0..global.primitive_defs.len()) - .map(|v| TypeEnum::PrimitiveType(PrimitiveId(v)).into()) - .collect(); - let variables = (0..global.var_defs.len()) - .map(|v| TypeEnum::TypeVariable(VariableId(v)).into()) - .collect(); + pub fn new( + global: GlobalContext, + resolver: Box, + file: FileID, + ) -> InferenceContext { InferenceContext { global, resolver, - primitives, - variables, + file, sym_table: HashMap::new(), stack: ContextStack { level: 0, @@ -56,7 +52,7 @@ impl<'a> InferenceContext<'a> { /// execute the function with new scope. /// variable assignment would be limited within the scope (not readable outside), and type /// returns the list of variables assigned within the scope, and the result of the function - pub fn with_scope(&mut self, f: F) -> (Vec<&'a str>, R) + pub fn with_scope(&mut self, f: F) -> (Vec<(&'a str, Type, Location)>, R) where F: FnOnce(&mut Self) -> R, { @@ -68,8 +64,8 @@ impl<'a> InferenceContext<'a> { let (_, level) = self.stack.sym_def.last().unwrap(); if *level > self.stack.level { let (name, _) = self.stack.sym_def.pop().unwrap(); - self.sym_table.remove(name).unwrap(); - poped_names.push(name); + let (t, _, l) = self.sym_table.remove(name).unwrap(); + poped_names.push((name, t, l)); } else { break; } @@ -79,8 +75,8 @@ impl<'a> InferenceContext<'a> { /// assign a type to an identifier. /// may return error if the identifier was defined but with different type - pub fn assign(&mut self, name: &'a str, ty: Type) -> Result { - if let Some((t, x)) = self.sym_table.get_mut(name) { + pub fn assign(&mut self, name: &'a str, ty: Type, loc: ast::Location) -> Result { + if let Some((t, x, _)) = self.sym_table.get_mut(name) { if t == &ty { if !*x { self.stack.sym_def.push((name, self.stack.level)); @@ -92,21 +88,19 @@ impl<'a> InferenceContext<'a> { } } else { self.stack.sym_def.push((name, self.stack.level)); - self.sym_table.insert(name, (ty.clone(), true)); + self.sym_table.insert( + name, + (ty.clone(), true, Location::CodeRange(self.file, loc)), + ); Ok(ty) } } - /// check if an identifier is already defined - pub fn defined(&self, name: &str) -> bool { - self.sym_table.get(name).is_some() - } - /// get the type of an identifier /// may return error if the identifier is not defined, and cannot be resolved with the /// resolution function. - pub fn resolve(&mut self, name: &str) -> Result { - if let Some((t, x)) = self.sym_table.get(name) { + pub fn resolve(&self, name: &str) -> Result { + if let Some((t, x, _)) = self.sym_table.get(name) { if *x { Ok(t.clone()) } else { @@ -120,15 +114,24 @@ impl<'a> InferenceContext<'a> { } } } + + pub fn get_location(&self, name: &str) -> Option { + if let Some((_, _, l)) = self.sym_table.get(name) { + Some(*l) + } else { + self.resolver.get_symbol_location(name) + } + } } // trivial getters: impl<'a> InferenceContext<'a> { pub fn get_primitive(&self, id: PrimitiveId) -> Type { - self.primitives.get(id.0).unwrap().clone() + TypeEnum::PrimitiveType(id).into() } + pub fn get_variable(&self, id: VariableId) -> Type { - self.variables.get(id.0).unwrap().clone() + TypeEnum::TypeVariable(id).into() } pub fn get_fn_def(&self, name: &str) -> Option<&FnDef> { From f43e225845bc09ad203b13ccdd7cb40d26141164 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Mon, 28 Jun 2021 16:32:46 +0800 Subject: [PATCH 004/131] updated inference_context rules --- nac3core/src/typecheck/context/inference_context.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/nac3core/src/typecheck/context/inference_context.rs b/nac3core/src/typecheck/context/inference_context.rs index 44c67b68..235f6370 100644 --- a/nac3core/src/typecheck/context/inference_context.rs +++ b/nac3core/src/typecheck/context/inference_context.rs @@ -64,8 +64,10 @@ impl<'a> InferenceContext<'a> { let (_, level) = self.stack.sym_def.last().unwrap(); if *level > self.stack.level { let (name, _) = self.stack.sym_def.pop().unwrap(); - let (t, _, l) = self.sym_table.remove(name).unwrap(); - poped_names.push((name, t, l)); + let (t, b, l) = self.sym_table.get_mut(name).unwrap(); + // set it to be unreadable + *b = false; + poped_names.push((name, t.clone(), *l)); } else { break; } From eb12f6f08213f7e4e18c25935c63a2963a633a01 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Mon, 28 Jun 2021 16:36:24 +0800 Subject: [PATCH 005/131] updated inference_context error msg --- nac3core/src/typecheck/context/inference_context.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nac3core/src/typecheck/context/inference_context.rs b/nac3core/src/typecheck/context/inference_context.rs index 235f6370..d1c76a36 100644 --- a/nac3core/src/typecheck/context/inference_context.rs +++ b/nac3core/src/typecheck/context/inference_context.rs @@ -106,7 +106,7 @@ impl<'a> InferenceContext<'a> { if *x { Ok(t.clone()) } else { - Err("unbounded identifier".into()) + Err("may not be defined".into()) } } else { match self.resolver.get_symbol_type(name) { From 52a82e8a39d702eb74de38fcef65a49c7eebe91e Mon Sep 17 00:00:00 2001 From: pca006132 Date: Mon, 28 Jun 2021 17:17:08 +0800 Subject: [PATCH 006/131] updated readme --- README.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 23c93519..00f7dcaf 100644 --- a/README.md +++ b/README.md @@ -15,20 +15,20 @@ caller to specify which methods should be compiled). After type checking, the compiler would analyse the set of functions/classes that are used and perform code generation. - -Symbol resolver: -- Str -> Nac3Type -- Str -> Value - value could be integer values, boolean values, bytes (for memcpy), function ID (full name + concrete type) ## Current Plan -1. Write out the syntax-directed type checking/inferencing rules. Fix the rule - for type variable instantiation. -2. Update the library dependencies and rewrite some of the type checking code. -3. Design the symbol resolver API. -4. Move tests from code to external files to cleanup the code. +Type checking: + +- [x] Basic interface for symbol resolver. +- [x] Track location information in context object (for diagnostics). +- [ ] Refactor old expression and statement type inference code. (anto) +- [ ] Error diagnostics utilities. (pca) +- [ ] Move tests to external files, write scripts for testing. (pca) +- [ ] Implement function type checking (instantiate bounded type parameters), + loop unrolling, type inference for lists with virtual objects. (pca) + From 2985b883519913ab40ee52d86cf8fe50ec52290e Mon Sep 17 00:00:00 2001 From: pca006132 Date: Wed, 30 Jun 2021 16:28:18 +0800 Subject: [PATCH 007/131] refactor for HM style inference... --- nac3core/src/typecheck/context.rs | 163 ++++++ .../src/typecheck/context/global_context.rs | 109 ---- .../typecheck/context/inference_context.rs | 202 ------- nac3core/src/typecheck/context/mod.rs | 4 - nac3core/src/typecheck/inference_core.rs | 525 ------------------ nac3core/src/typecheck/mod.rs | 7 +- nac3core/src/typecheck/primitives.rs | 276 +++++---- nac3core/src/typecheck/typedef.rs | 74 +-- 8 files changed, 335 insertions(+), 1025 deletions(-) create mode 100644 nac3core/src/typecheck/context.rs delete mode 100644 nac3core/src/typecheck/context/global_context.rs delete mode 100644 nac3core/src/typecheck/context/inference_context.rs delete mode 100644 nac3core/src/typecheck/context/mod.rs delete mode 100644 nac3core/src/typecheck/inference_core.rs diff --git a/nac3core/src/typecheck/context.rs b/nac3core/src/typecheck/context.rs new file mode 100644 index 00000000..c1b8bb07 --- /dev/null +++ b/nac3core/src/typecheck/context.rs @@ -0,0 +1,163 @@ +use std::collections::HashMap; + +use super::primitives::get_var; +use super::symbol_resolver::*; +use super::typedef::*; +use rustpython_parser::ast::Location; + +/// Structure for storing top-level type definitions. +/// Used for collecting type signature from source code. +/// Can be converted to `InferenceContext` for type inference in functions. +#[derive(Clone)] +pub struct GlobalContext<'a> { + /// List of type definitions. + pub type_defs: Vec>, + /// List of type variable definitions. + pub var_defs: Vec>, +} + +impl<'a> GlobalContext<'a> { + pub fn new(type_defs: Vec>) -> GlobalContext { + GlobalContext { + type_defs, + var_defs: Vec::new(), + } + } + + pub fn add_type(&mut self, def: TypeDef<'a>) -> TypeId { + self.type_defs.push(def); + TypeId(self.type_defs.len() - 1) + } + + pub fn add_variable(&mut self, def: VarDef<'a>) -> VariableId { + self.var_defs.push(def); + VariableId(self.var_defs.len() - 1) + } + + pub fn get_type_def_mut(&mut self, id: TypeId) -> &mut TypeDef<'a> { + self.type_defs.get_mut(id.0).unwrap() + } + + pub fn get_type_def(&self, id: TypeId) -> &TypeDef { + self.type_defs.get(id.0).unwrap() + } + + pub fn get_var_def(&self, id: VariableId) -> &VarDef { + self.var_defs.get(id.0).unwrap() + } + + pub fn get_var_count(&self) -> usize { + self.var_defs.len() + } +} + +pub struct InferenceContext<'a> { + // a: (i, x) means that a.i = x + pub fields_assignment: HashMap>, + pub constraints: Vec<(Type, Type)>, + global: GlobalContext<'a>, + resolver: Box, + local_identifiers: HashMap<&'a str, Type>, + local_variables: Vec>, + fresh_var_id: usize, +} + +impl<'a> InferenceContext<'a> { + pub fn new( + global: GlobalContext<'a>, + resolver: Box, + ) -> InferenceContext<'a> { + let id = global.get_var_count(); + InferenceContext { + global, + fields_assignment: HashMap::new(), + constraints: Vec::new(), + resolver, + local_identifiers: HashMap::new(), + local_variables: Vec::new(), + fresh_var_id: id, + } + } + + fn get_fresh_var(&mut self) -> VariableId { + self.local_variables.push(VarDef { + name: None, + bound: Vec::new(), + }); + let id = self.fresh_var_id; + self.fresh_var_id += 1; + VariableId(id) + } + + pub fn assign_identifier(&mut self, identifier: &'a str) -> Type { + if let Some(t) = self.local_identifiers.get(identifier) { + t.clone() + } else if let Some(SymbolType::Identifier(t)) = self.resolver.get_symbol_type(identifier) { + t + } else { + get_var(self.get_fresh_var()) + } + } + + pub fn get_identifier_type(&self, identifier: &'a str) -> Result { + if let Some(t) = self.local_identifiers.get(identifier) { + Ok(t.clone()) + } else if let Some(SymbolType::Identifier(t)) = self.resolver.get_symbol_type(identifier) { + Ok(t) + } else { + Err("unbounded identifier".into()) + } + } + + pub fn get_attribute_type( + &mut self, + expr: Type, + identifier: &'a str, + location: Location, + ) -> Result { + match expr.as_ref() { + TypeEnum::TypeVariable(id) => { + if !self.fields_assignment.contains_key(id) { + self.fields_assignment.insert(*id, Vec::new()); + } + let var_id = VariableId(self.fresh_var_id); + let entry = self.fields_assignment.get_mut(&id).unwrap(); + for (attr, t, _) in entry.iter() { + if *attr == identifier { + return Ok(get_var(*t)); + } + } + entry.push((identifier, var_id, location)); + self.local_variables.push(VarDef { + name: None, + bound: Vec::new(), + }); + self.fresh_var_id += 1; + Ok(get_var(var_id)) + } + TypeEnum::ClassType(id, params) => { + let type_def = self.global.get_type_def(*id); + let field = type_def + .base + .fields + .get(identifier) + .map_or_else(|| Err("no such field".to_owned()), |v| Ok(v))?; + // function and tuple can have 0 type variables but with type parameters + // we require other types have the same number of type variables and type + // parameters in order to build a mapping + assert!(type_def.params.len() == 0 || type_def.params.len() == params.len()); + let map = type_def + .params + .clone() + .into_iter() + .zip(params.clone().into_iter()) + .collect(); + Ok(field.subst(&map)) + } + } + } + + pub fn get_type_def(&self, id: TypeId) -> &TypeDef { + self.global.get_type_def(id) + } +} diff --git a/nac3core/src/typecheck/context/global_context.rs b/nac3core/src/typecheck/context/global_context.rs deleted file mode 100644 index b322d7ea..00000000 --- a/nac3core/src/typecheck/context/global_context.rs +++ /dev/null @@ -1,109 +0,0 @@ -use super::super::typedef::*; -use std::collections::HashMap; -use std::rc::Rc; - -/// Structure for storing top-level type definitions. -/// Used for collecting type signature from source code. -/// Can be converted to `InferenceContext` for type inference in functions. -pub struct GlobalContext<'a> { - /// List of primitive definitions. - pub(super) primitive_defs: Vec>, - /// List of class definitions. - pub(super) class_defs: Vec>, - /// List of parametric type definitions. - pub(super) parametric_defs: Vec>, - /// List of type variable definitions. - pub(super) var_defs: Vec>, - /// Function name to signature mapping. - pub(super) fn_table: HashMap<&'a str, FnDef>, - - primitives: Vec, - variables: Vec, -} - -impl<'a> GlobalContext<'a> { - pub fn new(primitive_defs: Vec>) -> GlobalContext { - let mut primitives = Vec::new(); - for (i, t) in primitive_defs.iter().enumerate() { - primitives.push(TypeEnum::PrimitiveType(PrimitiveId(i)).into()); - } - GlobalContext { - primitive_defs, - class_defs: Vec::new(), - parametric_defs: Vec::new(), - var_defs: Vec::new(), - fn_table: HashMap::new(), - primitives, - variables: Vec::new(), - } - } - - pub fn add_class(&mut self, def: ClassDef<'a>) -> ClassId { - self.class_defs.push(def); - ClassId(self.class_defs.len() - 1) - } - - pub fn add_parametric(&mut self, def: ParametricDef<'a>) -> ParamId { - self.parametric_defs.push(def); - ParamId(self.parametric_defs.len() - 1) - } - - pub fn add_variable(&mut self, def: VarDef<'a>) -> VariableId { - self.add_variable_private(def) - } - - pub fn add_variable_private(&mut self, def: VarDef<'a>) -> VariableId { - self.var_defs.push(def); - self.variables - .push(TypeEnum::TypeVariable(VariableId(self.var_defs.len() - 1)).into()); - VariableId(self.var_defs.len() - 1) - } - - pub fn add_fn(&mut self, name: &'a str, def: FnDef) { - self.fn_table.insert(name, def); - } - - pub fn get_fn_def(&self, name: &str) -> Option<&FnDef> { - self.fn_table.get(name) - } - - pub fn get_primitive_def_mut(&mut self, id: PrimitiveId) -> &mut TypeDef<'a> { - self.primitive_defs.get_mut(id.0).unwrap() - } - - pub fn get_primitive_def(&self, id: PrimitiveId) -> &TypeDef { - self.primitive_defs.get(id.0).unwrap() - } - - pub fn get_class_def_mut(&mut self, id: ClassId) -> &mut ClassDef<'a> { - self.class_defs.get_mut(id.0).unwrap() - } - - pub fn get_class_def(&self, id: ClassId) -> &ClassDef { - self.class_defs.get(id.0).unwrap() - } - - pub fn get_parametric_def_mut(&mut self, id: ParamId) -> &mut ParametricDef<'a> { - self.parametric_defs.get_mut(id.0).unwrap() - } - - pub fn get_parametric_def(&self, id: ParamId) -> &ParametricDef { - self.parametric_defs.get(id.0).unwrap() - } - - pub fn get_variable_def_mut(&mut self, id: VariableId) -> &mut VarDef<'a> { - self.var_defs.get_mut(id.0).unwrap() - } - - pub fn get_variable_def(&self, id: VariableId) -> &VarDef { - self.var_defs.get(id.0).unwrap() - } - - pub fn get_primitive(&self, id: PrimitiveId) -> Type { - self.primitives.get(id.0).unwrap().clone() - } - - pub fn get_variable(&self, id: VariableId) -> Type { - self.variables.get(id.0).unwrap().clone() - } -} diff --git a/nac3core/src/typecheck/context/inference_context.rs b/nac3core/src/typecheck/context/inference_context.rs deleted file mode 100644 index d1c76a36..00000000 --- a/nac3core/src/typecheck/context/inference_context.rs +++ /dev/null @@ -1,202 +0,0 @@ -use super::super::location::{FileID, Location}; -use super::super::symbol_resolver::*; -use super::super::typedef::*; -use super::GlobalContext; -use rustpython_parser::ast; -use std::boxed::Box; -use std::collections::HashMap; - -struct ContextStack<'a> { - /// stack level, starts from 0 - level: u32, - /// stack of symbol definitions containing (name, level) where `level` is the smallest level - /// where the name is assigned a value - sym_def: Vec<(&'a str, u32)>, -} - -pub struct InferenceContext<'a> { - /// global context - global: GlobalContext<'a>, - /// per source symbol resolver - resolver: Box, - /// File ID - file: FileID, - - /// identifier to (type, readable) mapping. - /// an identifier might be defined earlier but has no value (for some code path), thus not - /// readable. - sym_table: HashMap<&'a str, (Type, bool, Location)>, - /// stack - stack: ContextStack<'a>, -} - -// non-trivial implementations here -impl<'a> InferenceContext<'a> { - pub fn new( - global: GlobalContext, - resolver: Box, - file: FileID, - ) -> InferenceContext { - InferenceContext { - global, - resolver, - file, - sym_table: HashMap::new(), - stack: ContextStack { - level: 0, - sym_def: Vec::new(), - }, - } - } - - /// execute the function with new scope. - /// variable assignment would be limited within the scope (not readable outside), and type - /// returns the list of variables assigned within the scope, and the result of the function - pub fn with_scope(&mut self, f: F) -> (Vec<(&'a str, Type, Location)>, R) - where - F: FnOnce(&mut Self) -> R, - { - self.stack.level += 1; - let result = f(self); - self.stack.level -= 1; - let mut poped_names = Vec::new(); - while !self.stack.sym_def.is_empty() { - let (_, level) = self.stack.sym_def.last().unwrap(); - if *level > self.stack.level { - let (name, _) = self.stack.sym_def.pop().unwrap(); - let (t, b, l) = self.sym_table.get_mut(name).unwrap(); - // set it to be unreadable - *b = false; - poped_names.push((name, t.clone(), *l)); - } else { - break; - } - } - (poped_names, result) - } - - /// assign a type to an identifier. - /// may return error if the identifier was defined but with different type - pub fn assign(&mut self, name: &'a str, ty: Type, loc: ast::Location) -> Result { - if let Some((t, x, _)) = self.sym_table.get_mut(name) { - if t == &ty { - if !*x { - self.stack.sym_def.push((name, self.stack.level)); - } - *x = true; - Ok(ty) - } else { - Err("different types".into()) - } - } else { - self.stack.sym_def.push((name, self.stack.level)); - self.sym_table.insert( - name, - (ty.clone(), true, Location::CodeRange(self.file, loc)), - ); - Ok(ty) - } - } - - /// get the type of an identifier - /// may return error if the identifier is not defined, and cannot be resolved with the - /// resolution function. - pub fn resolve(&self, name: &str) -> Result { - if let Some((t, x, _)) = self.sym_table.get(name) { - if *x { - Ok(t.clone()) - } else { - Err("may not be defined".into()) - } - } else { - match self.resolver.get_symbol_type(name) { - Some(SymbolType::Identifier(t)) => Ok(t), - Some(SymbolType::TypeName(_)) => Err("is not a value".into()), - _ => Err("unbounded identifier".into()), - } - } - } - - pub fn get_location(&self, name: &str) -> Option { - if let Some((_, _, l)) = self.sym_table.get(name) { - Some(*l) - } else { - self.resolver.get_symbol_location(name) - } - } -} - -// trivial getters: -impl<'a> InferenceContext<'a> { - pub fn get_primitive(&self, id: PrimitiveId) -> Type { - TypeEnum::PrimitiveType(id).into() - } - - pub fn get_variable(&self, id: VariableId) -> Type { - TypeEnum::TypeVariable(id).into() - } - - pub fn get_fn_def(&self, name: &str) -> Option<&FnDef> { - self.global.fn_table.get(name) - } - pub fn get_primitive_def(&self, id: PrimitiveId) -> &TypeDef { - self.global.primitive_defs.get(id.0).unwrap() - } - pub fn get_class_def(&self, id: ClassId) -> &ClassDef { - self.global.class_defs.get(id.0).unwrap() - } - pub fn get_parametric_def(&self, id: ParamId) -> &ParametricDef { - self.global.parametric_defs.get(id.0).unwrap() - } - pub fn get_variable_def(&self, id: VariableId) -> &VarDef { - self.global.var_defs.get(id.0).unwrap() - } - pub fn get_type(&self, name: &str) -> Result { - match self.resolver.get_symbol_type(name) { - Some(SymbolType::TypeName(t)) => Ok(t), - Some(SymbolType::Identifier(_)) => Err("not a type".into()), - _ => Err("unbounded identifier".into()), - } - } -} - -impl TypeEnum { - pub fn subst(&self, map: &HashMap) -> TypeEnum { - match self { - TypeEnum::TypeVariable(id) => map.get(id).map(|v| v.as_ref()).unwrap_or(self).clone(), - TypeEnum::ParametricType(id, params) => TypeEnum::ParametricType( - *id, - params - .iter() - .map(|v| v.as_ref().subst(map).into()) - .collect(), - ), - _ => self.clone(), - } - } - - pub fn get_subst(&self, ctx: &InferenceContext) -> HashMap { - match self { - TypeEnum::ParametricType(id, params) => { - let vars = &ctx.get_parametric_def(*id).params; - vars.iter() - .zip(params) - .map(|(v, p)| (*v, p.as_ref().clone().into())) - .collect() - } - // if this proves to be slow, we can use option type - _ => HashMap::new(), - } - } - - pub fn get_base<'b: 'a, 'a>(&'a self, ctx: &'b InferenceContext) -> Option<&'b TypeDef> { - match self { - TypeEnum::PrimitiveType(id) => Some(ctx.get_primitive_def(*id)), - TypeEnum::ClassType(id) | TypeEnum::VirtualClassType(id) => { - Some(&ctx.get_class_def(*id).base) - } - TypeEnum::ParametricType(id, _) => Some(&ctx.get_parametric_def(*id).base), - _ => None, - } - } -} diff --git a/nac3core/src/typecheck/context/mod.rs b/nac3core/src/typecheck/context/mod.rs deleted file mode 100644 index 3a5d8d11..00000000 --- a/nac3core/src/typecheck/context/mod.rs +++ /dev/null @@ -1,4 +0,0 @@ -mod inference_context; -mod global_context; -pub use inference_context::InferenceContext; -pub use global_context::GlobalContext; diff --git a/nac3core/src/typecheck/inference_core.rs b/nac3core/src/typecheck/inference_core.rs deleted file mode 100644 index 679c04c8..00000000 --- a/nac3core/src/typecheck/inference_core.rs +++ /dev/null @@ -1,525 +0,0 @@ -use super::context::InferenceContext; -use super::typedef::{TypeEnum::*, *}; -use std::collections::HashMap; - -fn find_subst( - ctx: &InferenceContext, - valuation: &Option<(VariableId, Type)>, - sub: &mut HashMap, - mut a: Type, - mut b: Type, -) -> Result<(), String> { - // TODO: fix error messages later - if let TypeVariable(id) = a.as_ref() { - if let Some((assumption_id, t)) = valuation { - if assumption_id == id { - a = t.clone(); - } - } - } - - let mut substituted = false; - if let TypeVariable(id) = b.as_ref() { - if let Some(c) = sub.get(&id) { - b = c.clone(); - substituted = true; - } - } - - match (a.as_ref(), b.as_ref()) { - (BotType, _) => Ok(()), - (TypeVariable(id_a), TypeVariable(id_b)) => { - if substituted { - return if id_a == id_b { - Ok(()) - } else { - Err("different variables".to_string()) - }; - } - let v_a = ctx.get_variable_def(*id_a); - let v_b = ctx.get_variable_def(*id_b); - if !v_b.bound.is_empty() { - if v_a.bound.is_empty() { - return Err("unbounded a".to_string()); - } else { - let diff: Vec<_> = v_a - .bound - .iter() - .filter(|x| !v_b.bound.contains(x)) - .collect(); - if !diff.is_empty() { - return Err("different domain".to_string()); - } - } - } - sub.insert(*id_b, a.clone()); - Ok(()) - } - (TypeVariable(id_a), _) => { - let v_a = ctx.get_variable_def(*id_a); - if v_a.bound.len() == 1 && v_a.bound[0].as_ref() == b.as_ref() { - Ok(()) - } else { - Err("different domain".to_string()) - } - } - (_, TypeVariable(id_b)) => { - let v_b = ctx.get_variable_def(*id_b); - if v_b.bound.is_empty() || v_b.bound.contains(&a) { - sub.insert(*id_b, a.clone()); - Ok(()) - } else { - Err("different domain".to_string()) - } - } - (_, VirtualClassType(id_b)) => { - let mut parents; - match a.as_ref() { - ClassType(id_a) => { - parents = [*id_a].to_vec(); - } - VirtualClassType(id_a) => { - parents = [*id_a].to_vec(); - } - _ => { - return Err("cannot substitute non-class type into virtual class".to_string()); - } - }; - while !parents.is_empty() { - if *id_b == parents[0] { - return Ok(()); - } - let c = ctx.get_class_def(parents.remove(0)); - parents.extend_from_slice(&c.parents); - } - Err("not subtype".to_string()) - } - (ParametricType(id_a, param_a), ParametricType(id_b, param_b)) => { - if id_a != id_b || param_a.len() != param_b.len() { - Err("different parametric types".to_string()) - } else { - for (x, y) in param_a.iter().zip(param_b.iter()) { - find_subst(ctx, valuation, sub, x.clone(), y.clone())?; - } - Ok(()) - } - } - (_, _) => { - if a == b { - Ok(()) - } else { - Err("not equal".to_string()) - } - } - } -} - -fn resolve_call_rec( - ctx: &InferenceContext, - valuation: &Option<(VariableId, Type)>, - obj: Option, - func: &str, - args: &[Type], -) -> Result, String> { - let mut subst = obj - .as_ref() - .map(|v| v.get_subst(ctx)) - .unwrap_or_else(HashMap::new); - - let fun = match &obj { - Some(obj) => { - let base = match obj.as_ref() { - PrimitiveType(id) => &ctx.get_primitive_def(*id), - ClassType(id) | VirtualClassType(id) => &ctx.get_class_def(*id).base, - ParametricType(id, _) => &ctx.get_parametric_def(*id).base, - _ => return Err("not supported".to_string()), - }; - base.methods.get(func) - } - None => ctx.get_fn_def(func), - } - .ok_or_else(|| "no such function".to_string())?; - - if args.len() != fun.args.len() { - return Err("incorrect parameter number".to_string()); - } - for (a, b) in args.iter().zip(fun.args.iter()) { - find_subst(ctx, valuation, &mut subst, a.clone(), b.clone())?; - } - let result = fun.result.as_ref().map(|v| v.subst(&subst)); - Ok(result.map(|result| { - if let SelfType = result { - obj.unwrap() - } else { - result.into() - } - })) -} - -pub fn resolve_call( - ctx: &InferenceContext, - obj: Option, - func: &str, - args: &[Type], -) -> Result, String> { - resolve_call_rec(ctx, &None, obj, func, args) -} - -#[cfg(test)] -mod tests { - use super::*; - use super::super::context::GlobalContext; - use super::super::primitives::*; - use std::rc::Rc; - - fn get_inference_context(ctx: GlobalContext) -> InferenceContext { - InferenceContext::new(ctx, Box::new(|_| Err("unbounded identifier".into()))) - } - - #[test] - fn test_simple_generic() { - let mut ctx = basic_ctx(); - let v1 = ctx.add_variable(VarDef { - name: "V1", - bound: vec![ctx.get_primitive(INT32_TYPE), ctx.get_primitive(FLOAT_TYPE)], - }); - let v1 = ctx.get_variable(v1); - let v2 = ctx.add_variable(VarDef { - name: "V2", - bound: vec![ - ctx.get_primitive(BOOL_TYPE), - ctx.get_primitive(INT32_TYPE), - ctx.get_primitive(FLOAT_TYPE), - ], - }); - let v2 = ctx.get_variable(v2); - let ctx = get_inference_context(ctx); - - assert_eq!( - resolve_call(&ctx, None, "int32", &[ctx.get_primitive(FLOAT_TYPE)]), - Ok(Some(ctx.get_primitive(INT32_TYPE))) - ); - - assert_eq!( - resolve_call(&ctx, None, "int32", &[ctx.get_primitive(INT32_TYPE)],), - Ok(Some(ctx.get_primitive(INT32_TYPE))) - ); - - assert_eq!( - resolve_call(&ctx, None, "float", &[ctx.get_primitive(INT32_TYPE)]), - Ok(Some(ctx.get_primitive(FLOAT_TYPE))) - ); - - assert_eq!( - resolve_call(&ctx, None, "float", &[ctx.get_primitive(BOOL_TYPE)]), - Err("different domain".to_string()) - ); - - assert_eq!( - resolve_call(&ctx, None, "float", &[]), - Err("incorrect parameter number".to_string()) - ); - - assert_eq!( - resolve_call(&ctx, None, "float", &[v1]), - Ok(Some(ctx.get_primitive(FLOAT_TYPE))) - ); - - assert_eq!( - resolve_call(&ctx, None, "float", &[v2]), - Err("different domain".to_string()) - ); - } - - #[test] - fn test_methods() { - let mut ctx = basic_ctx(); - - let v0 = ctx.add_variable(VarDef { - name: "V0", - bound: vec![], - }); - let v0 = ctx.get_variable(v0); - - let int32 = ctx.get_primitive(INT32_TYPE); - let int64 = ctx.get_primitive(INT64_TYPE); - let ctx = get_inference_context(ctx); - - // simple cases - assert_eq!( - resolve_call(&ctx, Some(int32.clone()), "__add__", &[int32.clone()]), - Ok(Some(int32.clone())) - ); - - assert_ne!( - resolve_call(&ctx, Some(int32.clone()), "__add__", &[int32.clone()]), - Ok(Some(int64.clone())) - ); - - assert_eq!( - resolve_call(&ctx, Some(int32), "__add__", &[int64]), - Err("not equal".to_string()) - ); - - // with type variables - assert_eq!( - resolve_call(&ctx, Some(v0.clone()), "__add__", &[v0.clone()]), - Err("not supported".into()) - ); - } - - #[test] - fn test_multi_generic() { - let mut ctx = basic_ctx(); - let v0 = ctx.add_variable(VarDef { - name: "V0", - bound: vec![], - }); - let v0 = ctx.get_variable(v0); - let v1 = ctx.add_variable(VarDef { - name: "V1", - bound: vec![], - }); - let v1 = ctx.get_variable(v1); - let v2 = ctx.add_variable(VarDef { - name: "V2", - bound: vec![], - }); - let v2 = ctx.get_variable(v2); - let v3 = ctx.add_variable(VarDef { - name: "V3", - bound: vec![], - }); - let v3 = ctx.get_variable(v3); - - ctx.add_fn( - "foo", - FnDef { - args: vec![v0.clone(), v0.clone(), v1.clone()], - result: Some(v0.clone()), - }, - ); - - ctx.add_fn( - "foo1", - FnDef { - args: vec![ParametricType(TUPLE_TYPE, vec![v0.clone(), v0.clone(), v1]).into()], - result: Some(v0), - }, - ); - let ctx = get_inference_context(ctx); - - assert_eq!( - resolve_call(&ctx, None, "foo", &[v2.clone(), v2.clone(), v2.clone()]), - Ok(Some(v2.clone())) - ); - assert_eq!( - resolve_call(&ctx, None, "foo", &[v2.clone(), v2.clone(), v3.clone()]), - Ok(Some(v2.clone())) - ); - assert_eq!( - resolve_call(&ctx, None, "foo", &[v2.clone(), v3.clone(), v3.clone()]), - Err("different variables".to_string()) - ); - - assert_eq!( - resolve_call( - &ctx, - None, - "foo1", - &[ParametricType(TUPLE_TYPE, vec![v2.clone(), v2.clone(), v2.clone()]).into()] - ), - Ok(Some(v2.clone())) - ); - assert_eq!( - resolve_call( - &ctx, - None, - "foo1", - &[ParametricType(TUPLE_TYPE, vec![v2.clone(), v2.clone(), v3.clone()]).into()] - ), - Ok(Some(v2.clone())) - ); - assert_eq!( - resolve_call( - &ctx, - None, - "foo1", - &[ParametricType(TUPLE_TYPE, vec![v2, v3.clone(), v3]).into()] - ), - Err("different variables".to_string()) - ); - } - - #[test] - fn test_class_generics() { - let mut ctx = basic_ctx(); - - let list = ctx.get_parametric_def_mut(LIST_TYPE); - let t = Rc::new(TypeVariable(list.params[0])); - list.base.methods.insert( - "head", - FnDef { - args: vec![], - result: Some(t.clone()), - }, - ); - list.base.methods.insert( - "append", - FnDef { - args: vec![t], - result: None, - }, - ); - - let v0 = ctx.add_variable(VarDef { - name: "V0", - bound: vec![], - }); - let v0 = ctx.get_variable(v0); - let v1 = ctx.add_variable(VarDef { - name: "V1", - bound: vec![], - }); - let v1 = ctx.get_variable(v1); - let ctx = get_inference_context(ctx); - - assert_eq!( - resolve_call( - &ctx, - Some(ParametricType(LIST_TYPE, vec![v0.clone()]).into()), - "head", - &[] - ), - Ok(Some(v0.clone())) - ); - assert_eq!( - resolve_call( - &ctx, - Some(ParametricType(LIST_TYPE, vec![v0.clone()]).into()), - "append", - &[v0.clone()] - ), - Ok(None) - ); - assert_eq!( - resolve_call( - &ctx, - Some(ParametricType(LIST_TYPE, vec![v0]).into()), - "append", - &[v1] - ), - Err("different variables".to_string()) - ); - } - - #[test] - fn test_virtual_class() { - let mut ctx = basic_ctx(); - - let foo = ctx.add_class(ClassDef { - base: TypeDef { - name: "Foo", - methods: HashMap::new(), - fields: HashMap::new(), - }, - parents: vec![], - }); - - let foo1 = ctx.add_class(ClassDef { - base: TypeDef { - name: "Foo1", - methods: HashMap::new(), - fields: HashMap::new(), - }, - parents: vec![foo], - }); - - let foo2 = ctx.add_class(ClassDef { - base: TypeDef { - name: "Foo2", - methods: HashMap::new(), - fields: HashMap::new(), - }, - parents: vec![foo1], - }); - - let bar = ctx.add_class(ClassDef { - base: TypeDef { - name: "bar", - methods: HashMap::new(), - fields: HashMap::new(), - }, - parents: vec![], - }); - - ctx.add_fn( - "foo", - FnDef { - args: vec![VirtualClassType(foo).into()], - result: None, - }, - ); - ctx.add_fn( - "foo1", - FnDef { - args: vec![VirtualClassType(foo1).into()], - result: None, - }, - ); - let ctx = get_inference_context(ctx); - - assert_eq!( - resolve_call(&ctx, None, "foo", &[ClassType(foo).into()]), - Ok(None) - ); - - assert_eq!( - resolve_call(&ctx, None, "foo", &[ClassType(foo1).into()]), - Ok(None) - ); - - assert_eq!( - resolve_call(&ctx, None, "foo", &[ClassType(foo2).into()]), - Ok(None) - ); - - assert_eq!( - resolve_call(&ctx, None, "foo", &[ClassType(bar).into()]), - Err("not subtype".to_string()) - ); - - assert_eq!( - resolve_call(&ctx, None, "foo1", &[ClassType(foo1).into()]), - Ok(None) - ); - - assert_eq!( - resolve_call(&ctx, None, "foo1", &[ClassType(foo2).into()]), - Ok(None) - ); - - assert_eq!( - resolve_call(&ctx, None, "foo1", &[ClassType(foo).into()]), - Err("not subtype".to_string()) - ); - - // virtual class substitution - assert_eq!( - resolve_call(&ctx, None, "foo", &[VirtualClassType(foo).into()]), - Ok(None) - ); - assert_eq!( - resolve_call(&ctx, None, "foo", &[VirtualClassType(foo1).into()]), - Ok(None) - ); - assert_eq!( - resolve_call(&ctx, None, "foo", &[VirtualClassType(foo2).into()]), - Ok(None) - ); - assert_eq!( - resolve_call(&ctx, None, "foo", &[VirtualClassType(bar).into()]), - Err("not subtype".to_string()) - ); - } -} diff --git a/nac3core/src/typecheck/mod.rs b/nac3core/src/typecheck/mod.rs index a3be5925..7ab82585 100644 --- a/nac3core/src/typecheck/mod.rs +++ b/nac3core/src/typecheck/mod.rs @@ -1,7 +1,6 @@ -pub mod context; -pub mod inference_core; +mod context; pub mod location; -pub mod magic_methods; -pub mod primitives; +mod magic_methods; +mod primitives; pub mod symbol_resolver; pub mod typedef; diff --git a/nac3core/src/typecheck/primitives.rs b/nac3core/src/typecheck/primitives.rs index 94e76ee7..c383e955 100644 --- a/nac3core/src/typecheck/primitives.rs +++ b/nac3core/src/typecheck/primitives.rs @@ -1,184 +1,168 @@ -use super::typedef::{TypeEnum::*, *}; use super::context::*; +use super::typedef::{TypeEnum::*, *}; use std::collections::HashMap; +use std::rc::Rc; -pub const TUPLE_TYPE: ParamId = ParamId(0); -pub const LIST_TYPE: ParamId = ParamId(1); +pub const FUNC_TYPE: TypeId = TypeId(0); +pub const TUPLE_TYPE: TypeId = TypeId(1); +pub const LIST_TYPE: TypeId = TypeId(2); +pub const VIRTUAL_TYPE: TypeId = TypeId(3); +pub const NONE_TYPE: TypeId = TypeId(4); -pub const BOOL_TYPE: PrimitiveId = PrimitiveId(0); -pub const INT32_TYPE: PrimitiveId = PrimitiveId(1); -pub const INT64_TYPE: PrimitiveId = PrimitiveId(2); -pub const FLOAT_TYPE: PrimitiveId = PrimitiveId(3); +pub const BOOL_TYPE: TypeId = TypeId(5); +pub const INT32_TYPE: TypeId = TypeId(6); +pub const INT64_TYPE: TypeId = TypeId(7); +pub const FLOAT_TYPE: TypeId = TypeId(8); -fn impl_math(def: &mut TypeDef, ty: &Type) { - let result = Some(ty.clone()); - let fun = FnDef { - args: vec![ty.clone()], - result: result.clone(), - }; - def.methods.insert("__add__", fun.clone()); - def.methods.insert("__sub__", fun.clone()); - def.methods.insert("__mul__", fun.clone()); - def.methods.insert( - "__neg__", - FnDef { - args: vec![], - result, - }, - ); - def.methods.insert( - "__truediv__", - FnDef { - args: vec![ty.clone()], - result: Some(PrimitiveType(FLOAT_TYPE).into()), - }, - ); - def.methods.insert("__floordiv__", fun.clone()); - def.methods.insert("__mod__", fun.clone()); - def.methods.insert("__pow__", fun); +fn primitive(base: BaseDef) -> TypeDef { + TypeDef { + base, + parents: vec![], + params: vec![], + } } -fn impl_bits(def: &mut TypeDef, ty: &Type) { - let result = Some(ty.clone()); - let fun = FnDef { - args: vec![PrimitiveType(INT32_TYPE).into()], - result, - }; - - def.methods.insert("__lshift__", fun.clone()); - def.methods.insert("__rshift__", fun); - def.methods.insert( - "__xor__", - FnDef { - args: vec![ty.clone()], - result: Some(ty.clone()), - }, - ); +pub fn get_fn(from: Type, to: Type) -> Type { + Rc::new(ClassType(FUNC_TYPE, vec![from, to])) } -fn impl_eq(def: &mut TypeDef, ty: &Type) { - let fun = FnDef { - args: vec![ty.clone()], - result: Some(PrimitiveType(BOOL_TYPE).into()), - }; - - def.methods.insert("__eq__", fun.clone()); - def.methods.insert("__ne__", fun); +pub fn get_tuple(types: &[Type]) -> Type { + Rc::new(ClassType(TUPLE_TYPE, types.to_vec())) } -fn impl_order(def: &mut TypeDef, ty: &Type) { - let fun = FnDef { - args: vec![ty.clone()], - result: Some(PrimitiveType(BOOL_TYPE).into()), - }; +pub fn get_list(t: Type) -> Type { + Rc::new(ClassType(LIST_TYPE, vec![t])) +} - def.methods.insert("__lt__", fun.clone()); - def.methods.insert("__gt__", fun.clone()); - def.methods.insert("__le__", fun.clone()); - def.methods.insert("__ge__", fun); +pub fn get_virtual(t: Type) -> Type { + Rc::new(ClassType(VIRTUAL_TYPE, vec![t])) +} + +pub fn get_none() -> Type { + Rc::new(ClassType(NONE_TYPE, Vec::new())) +} + +pub fn get_bool() -> Type { + Rc::new(ClassType(BOOL_TYPE, Vec::new())) +} +pub fn get_int32() -> Type { + Rc::new(ClassType(INT32_TYPE, Vec::new())) +} + +pub fn get_int64() -> Type { + Rc::new(ClassType(INT64_TYPE, Vec::new())) +} + +pub fn get_float() -> Type { + Rc::new(ClassType(FLOAT_TYPE, Vec::new())) +} + +pub fn get_var(id: VariableId) -> Type { + Rc::new(TypeVariable(id)) +} + +fn impl_math(def: &mut BaseDef, ty: &Type) { + let fun = get_fn(ty.clone(), ty.clone()); + def.fields.insert("__add__", fun.clone()); + def.fields.insert("__sub__", fun.clone()); + def.fields.insert("__mul__", fun.clone()); + def.fields.insert("__neg__", get_fn(get_none(), ty.clone())); + def.fields + .insert("__truediv__", get_fn(ty.clone(), get_float())); + def.fields.insert("__floordiv__", fun.clone()); + def.fields.insert("__mod__", fun.clone()); + def.fields.insert("__pow__", fun); +} + +fn impl_bits(def: &mut BaseDef, ty: &Type) { + let fun = get_fn(get_int32(), ty.clone()); + + def.fields.insert("__lshift__", fun.clone()); + def.fields.insert("__rshift__", fun); + def.fields.insert("__xor__", get_fn(ty.clone(), ty.clone())); +} + +fn impl_eq(def: &mut BaseDef, ty: &Type) { + let fun = get_fn(ty.clone(), get_bool()); + + def.fields.insert("__eq__", fun.clone()); + def.fields.insert("__ne__", fun); +} + +fn impl_order(def: &mut BaseDef, ty: &Type) { + let fun = get_fn(ty.clone(), get_bool()); + + def.fields.insert("__lt__", fun.clone()); + def.fields.insert("__gt__", fun.clone()); + def.fields.insert("__le__", fun.clone()); + def.fields.insert("__ge__", fun); } pub fn basic_ctx() -> GlobalContext<'static> { - let primitives = [ - TypeDef { + let mut ctx = GlobalContext::new(vec![ + primitive(BaseDef { + name: "function", + fields: HashMap::new(), + }), + primitive(BaseDef { + name: "tuple", + fields: HashMap::new(), + }), + primitive(BaseDef { + name: "list", + fields: HashMap::new(), + }), + primitive(BaseDef { + name: "virtual", + fields: HashMap::new(), + }), + primitive(BaseDef { + name: "None", + fields: HashMap::new(), + }), + primitive(BaseDef { name: "bool", fields: HashMap::new(), - methods: HashMap::new(), - }, - TypeDef { + }), + primitive(BaseDef { name: "int32", fields: HashMap::new(), - methods: HashMap::new(), - }, - TypeDef { + }), + primitive(BaseDef { name: "int64", fields: HashMap::new(), - methods: HashMap::new(), - }, - TypeDef { + }), + primitive(BaseDef { name: "float", fields: HashMap::new(), - methods: HashMap::new(), - }, - ] - .to_vec(); - let mut ctx = GlobalContext::new(primitives); + }), + ]); - let b = ctx.get_primitive(BOOL_TYPE); - let b_def = ctx.get_primitive_def_mut(BOOL_TYPE); - impl_eq(b_def, &b); - let int32 = ctx.get_primitive(INT32_TYPE); - let int32_def = ctx.get_primitive_def_mut(INT32_TYPE); + let t = ctx.add_variable(VarDef { + name: Some("T"), + bound: vec![], + }); + ctx.get_type_def_mut(LIST_TYPE).params.push(t); + + let b_def = ctx.get_type_def_mut(BOOL_TYPE); + impl_eq(&mut b_def.base, &get_bool()); + let int32 = get_int32(); + let int32_def = &mut ctx.get_type_def_mut(INT32_TYPE).base; impl_math(int32_def, &int32); impl_bits(int32_def, &int32); impl_order(int32_def, &int32); impl_eq(int32_def, &int32); - let int64 = ctx.get_primitive(INT64_TYPE); - let int64_def = ctx.get_primitive_def_mut(INT64_TYPE); + let int64 = get_int64(); + let int64_def = &mut ctx.get_type_def_mut(INT64_TYPE).base; impl_math(int64_def, &int64); impl_bits(int64_def, &int64); impl_order(int64_def, &int64); impl_eq(int64_def, &int64); - let float = ctx.get_primitive(FLOAT_TYPE); - let float_def = ctx.get_primitive_def_mut(FLOAT_TYPE); + let float = get_float(); + let float_def = &mut ctx.get_type_def_mut(FLOAT_TYPE).base; impl_math(float_def, &float); impl_order(float_def, &float); impl_eq(float_def, &float); - let t = ctx.add_variable_private(VarDef { - name: "T", - bound: vec![], - }); - - ctx.add_parametric(ParametricDef { - base: TypeDef { - name: "tuple", - fields: HashMap::new(), - methods: HashMap::new(), - }, - // we have nothing for tuple, so no param def - params: vec![], - }); - - ctx.add_parametric(ParametricDef { - base: TypeDef { - name: "list", - fields: HashMap::new(), - methods: HashMap::new(), - }, - params: vec![t], - }); - - let i = ctx.add_variable_private(VarDef { - name: "I", - bound: vec![ - PrimitiveType(INT32_TYPE).into(), - PrimitiveType(INT64_TYPE).into(), - PrimitiveType(FLOAT_TYPE).into(), - ], - }); - let args = vec![TypeVariable(i).into()]; - ctx.add_fn( - "int32", - FnDef { - args: args.clone(), - result: Some(PrimitiveType(INT32_TYPE).into()), - }, - ); - ctx.add_fn( - "int64", - FnDef { - args: args.clone(), - result: Some(PrimitiveType(INT64_TYPE).into()), - }, - ); - ctx.add_fn( - "float", - FnDef { - args, - result: Some(PrimitiveType(FLOAT_TYPE).into()), - }, - ); - ctx } diff --git a/nac3core/src/typecheck/typedef.rs b/nac3core/src/typecheck/typedef.rs index bec61fd1..7c447b61 100644 --- a/nac3core/src/typecheck/typedef.rs +++ b/nac3core/src/typecheck/typedef.rs @@ -1,60 +1,64 @@ use std::collections::HashMap; +use std::collections::HashSet; use std::rc::Rc; -#[derive(PartialEq, Eq, Copy, Clone, Hash, Debug)] -pub struct PrimitiveId(pub(crate) usize); - -#[derive(PartialEq, Eq, Copy, Clone, Hash, Debug)] -pub struct ClassId(pub(crate) usize); - -#[derive(PartialEq, Eq, Copy, Clone, Hash, Debug)] -pub struct ParamId(pub(crate) usize); - #[derive(PartialEq, Eq, Copy, Clone, Hash, Debug)] pub struct VariableId(pub(crate) usize); +#[derive(PartialEq, Eq, Copy, Clone, Hash, Debug)] +pub struct TypeId(pub(crate) usize); + #[derive(PartialEq, Eq, Clone, Hash, Debug)] pub enum TypeEnum { - BotType, - SelfType, - PrimitiveType(PrimitiveId), - ClassType(ClassId), - VirtualClassType(ClassId), - ParametricType(ParamId, Vec>), + ClassType(TypeId, Vec>), TypeVariable(VariableId), } pub type Type = Rc; #[derive(Clone)] -pub struct FnDef { - // we assume methods first argument to be SelfType, - // so the first argument is not contained here - pub args: Vec, - pub result: Option, +pub struct BaseDef<'a> { + pub name: &'a str, + pub fields: HashMap<&'a str, Type>, } #[derive(Clone)] pub struct TypeDef<'a> { - pub name: &'a str, - pub fields: HashMap<&'a str, Type>, - pub methods: HashMap<&'a str, FnDef>, -} - -#[derive(Clone)] -pub struct ClassDef<'a> { - pub base: TypeDef<'a>, - pub parents: Vec, -} - -#[derive(Clone)] -pub struct ParametricDef<'a> { - pub base: TypeDef<'a>, + pub base: BaseDef<'a>, + pub parents: Vec, pub params: Vec, } #[derive(Clone)] pub struct VarDef<'a> { - pub name: &'a str, + pub name: Option<&'a str>, pub bound: Vec, } + +impl TypeEnum { + pub fn get_vars(&self, vars: &mut HashSet) { + match self { + TypeEnum::TypeVariable(id) => { + vars.insert(*id); + } + TypeEnum::ClassType(_, params) => { + for t in params.iter() { + t.get_vars(vars) + } + } + } + } + + pub fn subst(&self, map: &HashMap) -> Type { + match self { + TypeEnum::TypeVariable(id) => map + .get(id) + .cloned() + .unwrap_or_else(|| Rc::new(self.clone())), + TypeEnum::ClassType(id, params) => Rc::new(TypeEnum::ClassType( + *id, + params.iter().map(|t| t.subst(map)).collect(), + )), + } + } +} From 84c980fed30f3f7d0c66ecbb0250d23356980a9b Mon Sep 17 00:00:00 2001 From: pca006132 Date: Wed, 30 Jun 2021 17:18:56 +0800 Subject: [PATCH 008/131] type scheme instantiation --- nac3core/src/typecheck/context.rs | 34 ++++++++++++++++++++++++++++--- nac3core/src/typecheck/mod.rs | 1 + 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/nac3core/src/typecheck/context.rs b/nac3core/src/typecheck/context.rs index c1b8bb07..4c23c0d7 100644 --- a/nac3core/src/typecheck/context.rs +++ b/nac3core/src/typecheck/context.rs @@ -1,4 +1,5 @@ use std::collections::HashMap; +use std::collections::HashSet; use super::primitives::get_var; use super::symbol_resolver::*; @@ -89,6 +90,13 @@ impl<'a> InferenceContext<'a> { VariableId(id) } + fn get_fresh_var_with_bound(&mut self, bound: Vec) -> VariableId { + self.local_variables.push(VarDef { name: None, bound }); + let id = self.fresh_var_id; + self.fresh_var_id += 1; + VariableId(id) + } + pub fn assign_identifier(&mut self, identifier: &'a str) -> Type { if let Some(t) = self.local_identifiers.get(identifier) { t.clone() @@ -141,22 +149,42 @@ impl<'a> InferenceContext<'a> { .base .fields .get(identifier) - .map_or_else(|| Err("no such field".to_owned()), |v| Ok(v))?; + .map_or_else(|| Err("no such field".to_owned()), Ok)?; // function and tuple can have 0 type variables but with type parameters // we require other types have the same number of type variables and type // parameters in order to build a mapping - assert!(type_def.params.len() == 0 || type_def.params.len() == params.len()); + assert!(type_def.params.is_empty() || type_def.params.len() == params.len()); let map = type_def .params .clone() .into_iter() .zip(params.clone().into_iter()) .collect(); - Ok(field.subst(&map)) + let field = field.subst(&map); + Ok(self.get_instance(field)) } } } + fn get_instance(&mut self, t: Type) -> Type { + let mut vars = HashSet::new(); + t.get_vars(&mut vars); + + let local_min = self.global.get_var_count(); + let bounded = vars.into_iter().filter(|id| id.0 < local_min); + let map = bounded + .map(|v| { + ( + v, + get_var( + self.get_fresh_var_with_bound(self.global.get_var_def(v).bound.clone()), + ), + ) + }) + .collect(); + t.subst(&map) + } + pub fn get_type_def(&self, id: TypeId) -> &TypeDef { self.global.get_type_def(id) } diff --git a/nac3core/src/typecheck/mod.rs b/nac3core/src/typecheck/mod.rs index 7ab82585..19afdd1c 100644 --- a/nac3core/src/typecheck/mod.rs +++ b/nac3core/src/typecheck/mod.rs @@ -1,3 +1,4 @@ +#![allow(dead_code)] mod context; pub mod location; mod magic_methods; From e554737b685efcfcbdba5a9b6c9f1a7945f2ac63 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Wed, 14 Jul 2021 08:12:47 +0800 Subject: [PATCH 009/131] tmp --- Cargo.lock | 39 +++-- nac3core/Cargo.toml | 2 + nac3core/src/lib.rs | 2 + nac3core/src/typecheck/mod.rs | 10 +- nac3core/src/typecheck/typedef.rs | 243 ++++++++++++++++++++++++------ 5 files changed, 237 insertions(+), 59 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a3b73348..c09cc2c6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,5 +1,7 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +version = 3 + [[package]] name = "ahash" version = "0.7.4" @@ -73,6 +75,12 @@ version = "1.0.68" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a72c244c1ff497a746a7e1fb3d14bd08420ecda70c8f25c7112f2781652d787" +[[package]] +name = "cfg-if" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" + [[package]] name = "cfg-if" version = "1.0.0" @@ -107,7 +115,7 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "dirs-sys-next", ] @@ -143,13 +151,22 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37ab347416e802de484e4d03c7316c48f1ecb56574dfd4a46a80f173ce1de04d" +[[package]] +name = "generational-arena" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e1d3b771574f62d0548cee0ad9057857e9fc25d7a3335f140c84f6acd0bf601" +dependencies = [ + "cfg-if 0.1.10", +] + [[package]] name = "getrandom" version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "libc", "wasi 0.9.0+wasi-snapshot-preview1", ] @@ -160,7 +177,7 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "libc", "wasi 0.10.2+wasi-snapshot-preview1", ] @@ -236,7 +253,7 @@ dependencies = [ [[package]] name = "inkwell" version = "0.1.0" -source = "git+https://github.com/TheDan64/inkwell#aa4de1d78471a3d2f0fda1f56801177ddf80f3bf" +source = "git+https://github.com/TheDan64/inkwell?branch=master#aa4de1d78471a3d2f0fda1f56801177ddf80f3bf" dependencies = [ "either", "inkwell_internals", @@ -250,7 +267,7 @@ dependencies = [ [[package]] name = "inkwell_internals" version = "0.3.0" -source = "git+https://github.com/TheDan64/inkwell#aa4de1d78471a3d2f0fda1f56801177ddf80f3bf" +source = "git+https://github.com/TheDan64/inkwell?branch=master#aa4de1d78471a3d2f0fda1f56801177ddf80f3bf" dependencies = [ "proc-macro2", "quote", @@ -263,7 +280,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61124eeebbd69b8190558df225adf7e4caafce0d743919e5d6b19652314ec5ec" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", ] [[package]] @@ -369,7 +386,7 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", ] [[package]] @@ -382,6 +399,8 @@ checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc" name = "nac3core" version = "0.1.0" dependencies = [ + "ena", + "generational-arena", "indoc 1.0.3", "inkwell", "num-bigint 0.3.2", @@ -478,7 +497,7 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fa7a782938e745763fe6907fc6ba86946d72f49fe7e21de074e08128a99fb018" dependencies = [ - "cfg-if", + "cfg-if 1.0.0", "instant", "libc", "redox_syscall", @@ -729,7 +748,7 @@ checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" [[package]] name = "rustpython-ast" version = "0.1.0" -source = "git+https://github.com/RustPython/RustPython#bee5794b6e2b777ee343c7277954b73d06b5cb7d" +source = "git+https://github.com/RustPython/RustPython?branch=master#bee5794b6e2b777ee343c7277954b73d06b5cb7d" dependencies = [ "num-bigint 0.4.0", ] @@ -737,7 +756,7 @@ dependencies = [ [[package]] name = "rustpython-parser" version = "0.1.2" -source = "git+https://github.com/RustPython/RustPython#bee5794b6e2b777ee343c7277954b73d06b5cb7d" +source = "git+https://github.com/RustPython/RustPython?branch=master#bee5794b6e2b777ee343c7277954b73d06b5cb7d" dependencies = [ "ahash", "lalrpop", diff --git a/nac3core/Cargo.toml b/nac3core/Cargo.toml index af84b255..79ad0997 100644 --- a/nac3core/Cargo.toml +++ b/nac3core/Cargo.toml @@ -10,4 +10,6 @@ num-traits = "0.2" inkwell = { git = "https://github.com/TheDan64/inkwell", branch = "master", features = ["llvm10-0"] } rustpython-parser = { git = "https://github.com/RustPython/RustPython", branch = "master" } indoc = "1.0" +generational-arena = "0.2" +ena = "0.14" diff --git a/nac3core/src/lib.rs b/nac3core/src/lib.rs index ff64158d..9fdca9d4 100644 --- a/nac3core/src/lib.rs +++ b/nac3core/src/lib.rs @@ -5,6 +5,8 @@ extern crate num_bigint; extern crate inkwell; extern crate rustpython_parser; extern crate indoc; +extern crate generational_arena; +extern crate ena; mod typecheck; diff --git a/nac3core/src/typecheck/mod.rs b/nac3core/src/typecheck/mod.rs index 19afdd1c..118a79ab 100644 --- a/nac3core/src/typecheck/mod.rs +++ b/nac3core/src/typecheck/mod.rs @@ -1,7 +1,7 @@ #![allow(dead_code)] -mod context; -pub mod location; -mod magic_methods; -mod primitives; -pub mod symbol_resolver; +// mod context; +// pub mod location; +// mod magic_methods; +// mod primitives; +// pub mod symbol_resolver; pub mod typedef; diff --git a/nac3core/src/typecheck/typedef.rs b/nac3core/src/typecheck/typedef.rs index 7c447b61..a3f14830 100644 --- a/nac3core/src/typecheck/typedef.rs +++ b/nac3core/src/typecheck/typedef.rs @@ -1,64 +1,219 @@ +use ena::unify::{InPlaceUnificationTable, NoError, UnifyKey, UnifyValue}; +use generational_arena::{Arena, Index}; +use std::cell::RefCell; use std::collections::HashMap; -use std::collections::HashSet; -use std::rc::Rc; -#[derive(PartialEq, Eq, Copy, Clone, Hash, Debug)] -pub struct VariableId(pub(crate) usize); +// Order: +// TVar +// |--> TSeq +// | |--> TTuple +// | `--> TList +// |--> TRecord +// | |--> TObj +// | `--> TVirtual +// `--> TCall +// `--> TFunc -#[derive(PartialEq, Eq, Copy, Clone, Hash, Debug)] -pub struct TypeId(pub(crate) usize); +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +struct Type(u32); -#[derive(PartialEq, Eq, Clone, Hash, Debug)] -pub enum TypeEnum { - ClassType(TypeId, Vec>), - TypeVariable(VariableId), +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +struct TypeIndex(Index); + +impl UnifyValue for TypeIndex { + type Error = NoError; + fn unify_values(_: &Self, value2: &Self) -> Result { + // WARN: depends on the implementation details of ena. + // We do not use this to do unification, instead we perform unification and assign the type + // by `union_value(key, new_value)`, which set the value as `unify_values(key.value, new_value)`. + // So, we need to return the right one. + Ok(*value2) + } } -pub type Type = Rc; - -#[derive(Clone)] -pub struct BaseDef<'a> { - pub name: &'a str, - pub fields: HashMap<&'a str, Type>, +impl UnifyKey for Type { + type Value = TypeIndex; + fn index(&self) -> u32 { + self.0 + } + fn from_index(u: u32) -> Self { + Type(u) + } + fn tag() -> &'static str { + "TypeKey" + } } -#[derive(Clone)] -pub struct TypeDef<'a> { - pub base: BaseDef<'a>, - pub parents: Vec, - pub params: Vec, +type VarMapping = HashMap; + +struct Call { + posargs: Vec, + kwargs: HashMap, + ret: Type, + fn_id: usize, } -#[derive(Clone)] -pub struct VarDef<'a> { - pub name: Option<&'a str>, - pub bound: Vec, +struct FuncArg { + name: String, + ty: Type, + is_optional: bool, +} + +enum TypeEnum { + TVar { + // TODO: upper/lower bound + id: u32, + }, + TSeq { + index: HashMap, + }, + TTuple { + index: HashMap, + }, + TList { + ty: Type, + }, + TRecord { + fields: HashMap, + }, + TObj { + obj_id: usize, + instantiation: VarMapping, + }, + TVirtual { + obj_id: usize, + instantiation: VarMapping, + }, + TCall { + calls: Vec, + }, + TFunc { + args: Vec, + ret: Type, + instantiation: VarMapping, + }, } impl TypeEnum { - pub fn get_vars(&self, vars: &mut HashSet) { + fn get_int(&self) -> i32 { match self { - TypeEnum::TypeVariable(id) => { - vars.insert(*id); - } - TypeEnum::ClassType(_, params) => { - for t in params.iter() { - t.get_vars(vars) - } - } + TypeEnum::TVar { .. } => 1, + TypeEnum::TSeq { .. } => 5, + TypeEnum::TTuple { .. } => 10, + TypeEnum::TList { .. } => 15, + TypeEnum::TRecord { .. } => 7, + TypeEnum::TObj { .. } => 14, + TypeEnum::TVirtual { .. } => 21, + TypeEnum::TCall { .. } => 11, + TypeEnum::TFunc { .. } => 22, } } - pub fn subst(&self, map: &HashMap) -> Type { - match self { - TypeEnum::TypeVariable(id) => map - .get(id) - .cloned() - .unwrap_or_else(|| Rc::new(self.clone())), - TypeEnum::ClassType(id, params) => Rc::new(TypeEnum::ClassType( - *id, - params.iter().map(|t| t.subst(map)).collect(), - )), + // e.g. List <: Var + pub fn kind_le(&self, other: &TypeEnum) -> bool { + let a = self.get_int(); + let b = other.get_int(); + (a % b) == 0 + } +} + +struct ObjDef { + name: String, + fields: HashMap, +} + +struct Unifier { + unification_table: RefCell>, + type_arena: RefCell>, + obj_def_table: Vec, +} + +impl Unifier { + fn unify(&self, a: Type, b: Type) { + let (i_a, i_b) = { + let mut table = self.unification_table.borrow_mut(); + (table.probe_value(a), table.probe_value(b)) + }; + + if i_a == i_b { + return; + } + + let arena = self.type_arena.borrow(); + let mut ty_a = arena.get(i_a.0).unwrap(); + let mut ty_b = arena.get(i_b.0).unwrap(); + + // simplify our pattern matching... + if ty_a.kind_le(ty_b) { + std::mem::swap(&mut ty_a, &mut ty_b); + } + + // TODO: type variables bound check + match (ty_a, ty_b) { + (TypeEnum::TVar { .. }, TypeEnum::TVar { .. }) => { + self.unification_table.borrow_mut().union(a, b); + let old = if self.unification_table.borrow_mut().find(a) == a { + i_b + } else { + i_a + } + .0; + self.type_arena.borrow_mut().remove(old); + } + (TypeEnum::TVar { .. }, _) => { + let mut table = self.unification_table.borrow_mut(); + table.union(a, b); + table.union_value(a, i_b); + // TODO: occur check... + self.type_arena.borrow_mut().remove(i_a.0); + } + (TypeEnum::TSeq { .. }, TypeEnum::TSeq { .. }) => { + let is_a = { + let mut table = self.unification_table.borrow_mut(); + table.union(a, b); + table.find(a) == a + }; + // fighting with the borrow checker... + // we have to manually drop this before we call borrow_mut + std::mem::drop(arena); + let (mut new, old) = { + // the mutable arena would be dropped before calling unify later + let mut arena = self.type_arena.borrow_mut(); + let (ty_a, ty_b) = arena.get2_mut(i_a.0, i_b.0); + let index1 = if let Some(TypeEnum::TSeq { index }) = ty_a { + std::mem::take(index) + } else { + unreachable!() + }; + let index2 = if let Some(TypeEnum::TSeq { index }) = ty_b { + std::mem::take(index) + } else { + unreachable!() + }; + if is_a { + arena.remove(i_b.0); + (index1, index2) + } else { + arena.remove(i_a.0); + (index2, index1) + } + }; + for (key, value) in old.iter() { + if let Some(ty) = new.get(key) { + self.unify(*ty, *value); + } else { + new.insert(*key, *value); + } + } + // put it back + let index = if is_a { i_a } else { i_b }.0; + if let Some(TypeEnum::TSeq { index }) = self.type_arena.borrow_mut().get_mut(index) { + *index = new; + } else { + unreachable!() + } + } + _ => unimplemented!(), } } } From 291e6426992df342d2d9db6db55697e1d1a794d7 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Wed, 14 Jul 2021 15:24:00 +0800 Subject: [PATCH 010/131] partial implementation --- nac3core/src/typecheck/typedef.rs | 459 ++++++++++++++++++++++++------ 1 file changed, 379 insertions(+), 80 deletions(-) diff --git a/nac3core/src/typecheck/typedef.rs b/nac3core/src/typecheck/typedef.rs index a3f14830..00264291 100644 --- a/nac3core/src/typecheck/typedef.rs +++ b/nac3core/src/typecheck/typedef.rs @@ -1,7 +1,9 @@ use ena::unify::{InPlaceUnificationTable, NoError, UnifyKey, UnifyValue}; use generational_arena::{Arena, Index}; +use std::borrow::{BorrowMut, Cow}; use std::cell::RefCell; -use std::collections::HashMap; +use std::collections::BTreeMap; +use std::mem::swap; // Order: // TVar @@ -24,9 +26,10 @@ impl UnifyValue for TypeIndex { type Error = NoError; fn unify_values(_: &Self, value2: &Self) -> Result { // WARN: depends on the implementation details of ena. - // We do not use this to do unification, instead we perform unification and assign the type - // by `union_value(key, new_value)`, which set the value as `unify_values(key.value, new_value)`. - // So, we need to return the right one. + // We do not use this to do unification, instead we perform unification + // and assign the type by `union_value(key, new_value)`, which set the + // value as `unify_values(key.value, new_value)`. So, we need to return + // the right one. Ok(*value2) } } @@ -40,15 +43,16 @@ impl UnifyKey for Type { Type(u) } fn tag() -> &'static str { - "TypeKey" + "TypeID" } } -type VarMapping = HashMap; +type Mapping = BTreeMap; +type VarMap = Mapping; struct Call { posargs: Vec, - kwargs: HashMap, + kwargs: BTreeMap, ret: Type, fn_id: usize, } @@ -59,38 +63,44 @@ struct FuncArg { is_optional: bool, } +// We use a lot of `RefCell`s here as we want to simplify our code. +// Pattern: +// 1. Take the complex data structure out +// 2. Drop the arena (required before unification) +// 3. Do unification for each type in the data structure +// 4. Put the complex data structure back... enum TypeEnum { TVar { // TODO: upper/lower bound id: u32, }, TSeq { - index: HashMap, + map: RefCell, }, TTuple { - index: HashMap, + ty: RefCell>, }, TList { ty: Type, }, TRecord { - fields: HashMap, + fields: RefCell>, }, TObj { obj_id: usize, - instantiation: VarMapping, + fields: RefCell>, + params: RefCell, }, TVirtual { - obj_id: usize, - instantiation: VarMapping, + ty: Type, }, TCall { - calls: Vec, + calls: RefCell>, }, TFunc { - args: Vec, + args: RefCell>, ret: Type, - instantiation: VarMapping, + params: RefCell, }, } @@ -115,11 +125,27 @@ impl TypeEnum { let b = other.get_int(); (a % b) == 0 } + + pub fn get_kind_name(&self) -> &'static str { + // this function is for debugging only... + // a proper to_str implementation requires the context + match self { + TypeEnum::TVar { .. } => "TVar", + TypeEnum::TSeq { .. } => "TSeq", + TypeEnum::TTuple { .. } => "TTuple", + TypeEnum::TList { .. } => "TList", + TypeEnum::TRecord { .. } => "TRecord", + TypeEnum::TObj { .. } => "TObj", + TypeEnum::TVirtual { .. } => "TVirtual", + TypeEnum::TCall { .. } => "TCall", + TypeEnum::TFunc { .. } => "TFunc", + } + } } struct ObjDef { name: String, - fields: HashMap, + fields: Mapping, } struct Unifier { @@ -129,14 +155,14 @@ struct Unifier { } impl Unifier { - fn unify(&self, a: Type, b: Type) { - let (i_a, i_b) = { + fn unify(&self, a: Type, b: Type) -> Result<(), String> { + let (mut i_a, mut i_b) = { let mut table = self.unification_table.borrow_mut(); (table.probe_value(a), table.probe_value(b)) }; if i_a == i_b { - return; + return Ok(()); } let arena = self.type_arena.borrow(); @@ -145,75 +171,348 @@ impl Unifier { // simplify our pattern matching... if ty_a.kind_le(ty_b) { - std::mem::swap(&mut ty_a, &mut ty_b); + swap(&mut i_a, &mut i_b); + swap(&mut ty_a, &mut ty_b); } - // TODO: type variables bound check - match (ty_a, ty_b) { - (TypeEnum::TVar { .. }, TypeEnum::TVar { .. }) => { - self.unification_table.borrow_mut().union(a, b); - let old = if self.unification_table.borrow_mut().find(a) == a { - i_b - } else { - i_a - } - .0; - self.type_arena.borrow_mut().remove(old); - } - (TypeEnum::TVar { .. }, _) => { - let mut table = self.unification_table.borrow_mut(); - table.union(a, b); - table.union_value(a, i_b); - // TODO: occur check... - self.type_arena.borrow_mut().remove(i_a.0); - } - (TypeEnum::TSeq { .. }, TypeEnum::TSeq { .. }) => { - let is_a = { - let mut table = self.unification_table.borrow_mut(); - table.union(a, b); - table.find(a) == a - }; - // fighting with the borrow checker... - // we have to manually drop this before we call borrow_mut - std::mem::drop(arena); - let (mut new, old) = { - // the mutable arena would be dropped before calling unify later - let mut arena = self.type_arena.borrow_mut(); - let (ty_a, ty_b) = arena.get2_mut(i_a.0, i_b.0); - let index1 = if let Some(TypeEnum::TSeq { index }) = ty_a { - std::mem::take(index) - } else { - unreachable!() - }; - let index2 = if let Some(TypeEnum::TSeq { index }) = ty_b { - std::mem::take(index) - } else { - unreachable!() - }; - if is_a { - arena.remove(i_b.0); - (index1, index2) - } else { - arena.remove(i_a.0); - (index2, index1) + match ty_a { + TypeEnum::TVar { .. } => { + match ty_b { + TypeEnum::TVar { .. } => { + // TODO: type variables bound check + let old = { + let mut table = self.unification_table.borrow_mut(); + table.union(a, b); + if table.find(a) == a { + i_b + } else { + i_a + } + } + .0; + drop(arena); + self.type_arena.borrow_mut().remove(old); } - }; - for (key, value) in old.iter() { - if let Some(ty) = new.get(key) { - self.unify(*ty, *value); - } else { - new.insert(*key, *value); + _ => { + // TODO: type variables bound check and occur check + drop(arena); + self.set_a_to_b(a, b); } } - // put it back - let index = if is_a { i_a } else { i_b }.0; - if let Some(TypeEnum::TSeq { index }) = self.type_arena.borrow_mut().get_mut(index) { - *index = new; + } + TypeEnum::TSeq { map: map1 } => { + match ty_b { + TypeEnum::TSeq { map: map2 } => { + // we get the tables out first. + // unification requires mutable access to the underlying + // structs, so we have to manaully drop the arena first, + // do the unification, and then get a mutable reference + // and put them back... + let mut map1 = map1.take(); + let map2 = map2.take(); + drop(arena); + self.set_a_to_b(a, b); + // unify them to map1 + for (key, value) in map2.iter() { + if let Some(ty) = map1.get(key) { + self.unify(*ty, *value)?; + } else { + map1.insert(*key, *value); + } + } + if let Some(TypeEnum::TSeq { map: mapping }) = + self.type_arena.borrow().get(i_b.0) + { + *mapping.borrow_mut() = map1; + } else { + unreachable!() + } + } + TypeEnum::TTuple { ty: types } => { + let map = map1.take(); + let types = types.take(); + drop(arena); + self.set_a_to_b(a, b); + let len = types.len() as u32; + for (k, v) in map.iter() { + if *k >= len { + return Err(format!( + "Tuple index out of range. (Length: {}, Index: {})", + types.len(), + k + )); + } + self.unify(*v, types[*k as usize])?; + } + + if let Some(TypeEnum::TTuple { ty }) = self.type_arena.borrow().get(i_b.0) { + *ty.borrow_mut() = types; + } else { + unreachable!() + } + } + TypeEnum::TList { ty } => { + let map = map1.take(); + let ty = *ty; + drop(arena); + self.set_a_to_b(a, b); + for v in map.values() { + self.unify(*v, ty)?; + } + } + _ => { + return self.report_kind_error(ty_a, ty_b); + } + } + } + TypeEnum::TTuple { ty: ty1 } => { + if let TypeEnum::TTuple { ty: ty2 } = ty_b { + let ty1 = ty1.take(); + let ty2 = ty2.take(); + if ty1.len() != ty2.len() { + return Err(format!( + "Cannot unify tuples with length {} and {}", + ty1.len(), + ty2.len() + )); + } + drop(arena); + self.set_a_to_b(a, b); + for (a, b) in ty1.iter().zip(ty2.iter()) { + self.unify(*a, *b)?; + } + if let Some(TypeEnum::TTuple { ty }) = + self.type_arena.borrow_mut().get_mut(i_b.0) + { + *ty.borrow_mut().get_mut() = ty1; + } else { + unreachable!() + } } else { - unreachable!() + return self.report_kind_error(ty_a, ty_b); + } + } + TypeEnum::TList { ty: ty1 } => { + if let TypeEnum::TList { ty: ty2 } = ty_b { + let ty1 = *ty1; + let ty2 = *ty2; + drop(arena); + self.set_a_to_b(a, b); + self.unify(ty1, ty2)?; + } else { + return self.report_kind_error(ty_a, ty_b); + } + } + TypeEnum::TRecord { fields: fields1 } => { + match ty_b { + TypeEnum::TRecord { fields: fields2 } => { + let mut fields1 = fields1.take(); + let fields2 = fields2.take(); + drop(arena); + self.set_a_to_b(a, b); + for (key, value) in fields2.iter() { + if let Some(ty) = fields1.get(key) { + self.unify(*ty, *value)?; + } else { + fields1.insert(key.clone(), *value); + } + } + if let Some(TypeEnum::TRecord { fields }) = + self.type_arena.borrow().get(i_b.0) + { + *fields.borrow_mut() = fields1; + } else { + unreachable!() + } + } + // obj... + _ => { + return self.report_kind_error(ty_a, ty_b); + } + } + } + _ => unimplemented!(), + } + Ok(()) + } + + fn set_a_to_b(&self, a: Type, b: Type) { + // unify a and b together, and set the value to b's value this would + // also deallocate a's previous value in the arena to save space... + let mut table = self.unification_table.borrow_mut(); + let i_a = table.probe_value(a); + let i_b = table.probe_value(b); + table.union(a, b); + table.union_value(a, i_b); + self.type_arena.borrow_mut().remove(i_a.0); + } + + fn report_kind_error(&self, a: &TypeEnum, b: &TypeEnum) -> Result<(), String> { + Err(format!( + "Cannot unify {} with {}", + a.get_kind_name(), + b.get_kind_name() + )) + } + + fn subst(&self, a: Type, mapping: &VarMap) -> Option { + let index = self.unification_table.borrow_mut().probe_value(a); + let arena = self.type_arena.borrow(); + let ty = arena.get(index.0).unwrap(); + // this function would only be called when we instantiate functions. + // function type signature should ONLY contain concrete types and type + // variables, i.e. things like TRecord, TCall should not occur, and we + // should be safe to not implement the substitution for those variants. + match ty { + TypeEnum::TVar { id } => mapping.get(&id).cloned(), + TypeEnum::TSeq { map } => { + let map = map.take(); + drop(arena); + let new_map = self.subst_map(&map, mapping); + if let Some(TypeEnum::TSeq { map: m }) = self.type_arena.borrow().get(index.0) { + *m.borrow_mut() = map; + } else { + unreachable!(); + }; + new_map.map(|m| { + let index = self + .type_arena + .borrow_mut() + .insert(TypeEnum::TSeq { map: m.into() }); + self.unification_table + .borrow_mut() + .new_key(TypeIndex(index)) + }) + } + TypeEnum::TTuple { ty } => { + let ty = ty.take(); + drop(arena); + let mut new_ty = None; + for (i, t) in ty.iter().enumerate() { + if let Some(t1) = self.subst(*t, mapping) { + if new_ty.is_none() { + new_ty = Some(ty.clone()); + } + new_ty.as_mut().unwrap()[i] = t1; + } + } + if let Some(TypeEnum::TTuple { ty: t }) = self.type_arena.borrow().get(index.0) { + *t.borrow_mut() = ty; + } else { + unreachable!(); + }; + new_ty.map(|t| { + let index = self + .type_arena + .borrow_mut() + .insert(TypeEnum::TTuple { ty: t.into() }); + self.unification_table + .borrow_mut() + .new_key(TypeIndex(index)) + }) + } + TypeEnum::TList { ty } => { + let ty = *ty; + drop(arena); + self.subst(ty, mapping).map(|t| { + let index = self + .type_arena + .borrow_mut() + .insert(TypeEnum::TList { ty: t }); + self.unification_table + .borrow_mut() + .new_key(TypeIndex(index)) + }) + } + TypeEnum::TVirtual { ty } => { + let ty = *ty; + drop(arena); + self.subst(ty, mapping).map(|t| { + let index = self + .type_arena + .borrow_mut() + .insert(TypeEnum::TVirtual { ty: t }); + self.unification_table + .borrow_mut() + .new_key(TypeIndex(index)) + }) + } + TypeEnum::TObj { + obj_id, + fields, + params, + } => { + let obj_id = *obj_id; + let params = params.take(); + let fields = fields.take(); + drop(arena); + let mut new_params = None; + let mut new_fields = None; + // Type variables in field types must be present in the type parameter. + // If the mapping does not contain any type variables in the + // parameter list, we don't need to substitute the fields. + // This is also used to prevent infinite substitution... + let need_subst = params.values().any(|v| { + let index = self.unification_table.borrow_mut().probe_value(*v); + let arena = self.type_arena.borrow(); + let ty = arena.get(index.0).unwrap(); + if let TypeEnum::TVar { id } = ty { + mapping.contains_key(id) + } else { + false + } + }); + if need_subst { + new_params = self + .subst_map(¶ms, mapping) + .or_else(|| Some(params.clone())); + new_fields = self + .subst_map(&fields, mapping) + .or_else(|| Some(fields.clone())); + } + if let Some(TypeEnum::TObj { + params: p, + fields: f, + .. + }) = self.type_arena.borrow().get(index.0) + { + *p.borrow_mut() = params; + *f.borrow_mut() = fields; + } else { + unreachable!(); + }; + if need_subst { + let index = self.type_arena.borrow_mut().insert(TypeEnum::TObj { + obj_id, + params: new_params.unwrap().into(), + fields: new_fields.unwrap().into(), + }); + Some( + self.unification_table + .borrow_mut() + .new_key(TypeIndex(index)), + ) + } else { + None } } _ => unimplemented!(), } } + + fn subst_map(&self, map: &Mapping, mapping: &VarMap) -> Option> + where + K: std::cmp::Ord + std::clone::Clone, + { + let mut map2 = None; + for (k, v) in map.iter() { + if let Some(v1) = self.subst(*v, mapping) { + if map2.is_none() { + map2 = Some(map.clone()); + } + *map2.as_mut().unwrap().get_mut(k).unwrap() = v1; + } + } + map2 + } } From e8c5189fcedff79a2b05604a4edd92e26126e916 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Wed, 14 Jul 2021 15:58:58 +0800 Subject: [PATCH 011/131] simplified code with Rc> --- nac3core/src/typecheck/typedef.rs | 292 +++++++++++------------------- 1 file changed, 110 insertions(+), 182 deletions(-) diff --git a/nac3core/src/typecheck/typedef.rs b/nac3core/src/typecheck/typedef.rs index 00264291..161eb50d 100644 --- a/nac3core/src/typecheck/typedef.rs +++ b/nac3core/src/typecheck/typedef.rs @@ -1,9 +1,9 @@ use ena::unify::{InPlaceUnificationTable, NoError, UnifyKey, UnifyValue}; use generational_arena::{Arena, Index}; -use std::borrow::{BorrowMut, Cow}; use std::cell::RefCell; use std::collections::BTreeMap; use std::mem::swap; +use std::rc::Rc; // Order: // TVar @@ -63,44 +63,41 @@ struct FuncArg { is_optional: bool, } -// We use a lot of `RefCell`s here as we want to simplify our code. -// Pattern: -// 1. Take the complex data structure out -// 2. Drop the arena (required before unification) -// 3. Do unification for each type in the data structure -// 4. Put the complex data structure back... +// We use a lot of `Rc`/`RefCell`s here as we want to simplify our code. +// We may not really need so much `Rc`s, but we would have to do complicated +// stuffs otherwise. enum TypeEnum { TVar { // TODO: upper/lower bound id: u32, }, TSeq { - map: RefCell, + map: VarMap, }, TTuple { - ty: RefCell>, + ty: Vec, }, TList { ty: Type, }, TRecord { - fields: RefCell>, + fields: Mapping, }, TObj { obj_id: usize, - fields: RefCell>, - params: RefCell, + fields: Mapping, + params: VarMap, }, TVirtual { ty: Type, }, TCall { - calls: RefCell>, + calls: Vec, }, TFunc { - args: RefCell>, + args: Vec, ret: Type, - params: RefCell, + params: VarMap, }, } @@ -150,7 +147,7 @@ struct ObjDef { struct Unifier { unification_table: RefCell>, - type_arena: RefCell>, + type_arena: RefCell>>>, obj_def_table: Vec, } @@ -165,19 +162,26 @@ impl Unifier { return Ok(()); } - let arena = self.type_arena.borrow(); - let mut ty_a = arena.get(i_a.0).unwrap(); - let mut ty_b = arena.get(i_b.0).unwrap(); + let (ty_a_cell, ty_b_cell) = { + let arena = self.type_arena.borrow(); + ( + arena.get(i_a.0).unwrap().clone(), + arena.get(i_b.0).unwrap().clone(), + ) + }; + + let mut ty_a = ty_a_cell.borrow(); + let mut ty_b = ty_b_cell.borrow(); // simplify our pattern matching... - if ty_a.kind_le(ty_b) { + if ty_a.kind_le(&ty_b) { swap(&mut i_a, &mut i_b); swap(&mut ty_a, &mut ty_b); } - match ty_a { + match &*ty_a { TypeEnum::TVar { .. } => { - match ty_b { + match *ty_b { TypeEnum::TVar { .. } => { // TODO: type variables bound check let old = { @@ -190,51 +194,37 @@ impl Unifier { } } .0; - drop(arena); self.type_arena.borrow_mut().remove(old); } _ => { // TODO: type variables bound check and occur check - drop(arena); self.set_a_to_b(a, b); } } } TypeEnum::TSeq { map: map1 } => { - match ty_b { + match &*ty_b { TypeEnum::TSeq { map: map2 } => { - // we get the tables out first. - // unification requires mutable access to the underlying - // structs, so we have to manaully drop the arena first, - // do the unification, and then get a mutable reference - // and put them back... - let mut map1 = map1.take(); - let map2 = map2.take(); - drop(arena); self.set_a_to_b(a, b); - // unify them to map1 - for (key, value) in map2.iter() { - if let Some(ty) = map1.get(key) { - self.unify(*ty, *value)?; - } else { - map1.insert(*key, *value); - } - } - if let Some(TypeEnum::TSeq { map: mapping }) = - self.type_arena.borrow().get(i_b.0) + drop(ty_a); + if let TypeEnum::TSeq { map: map1 } = &mut *ty_a_cell.as_ref().borrow_mut() { - *mapping.borrow_mut() = map1; + // unify them to map1 + for (key, value) in map2.iter() { + if let Some(ty) = map1.get(key) { + self.unify(*ty, *value)?; + } else { + map1.insert(*key, *value); + } + } } else { unreachable!() } } TypeEnum::TTuple { ty: types } => { - let map = map1.take(); - let types = types.take(); - drop(arena); self.set_a_to_b(a, b); let len = types.len() as u32; - for (k, v) in map.iter() { + for (k, v) in map1.iter() { if *k >= len { return Err(format!( "Tuple index out of range. (Length: {}, Index: {})", @@ -244,31 +234,20 @@ impl Unifier { } self.unify(*v, types[*k as usize])?; } - - if let Some(TypeEnum::TTuple { ty }) = self.type_arena.borrow().get(i_b.0) { - *ty.borrow_mut() = types; - } else { - unreachable!() - } } TypeEnum::TList { ty } => { - let map = map1.take(); - let ty = *ty; - drop(arena); self.set_a_to_b(a, b); - for v in map.values() { - self.unify(*v, ty)?; + for v in map1.values() { + self.unify(*v, *ty)?; } } _ => { - return self.report_kind_error(ty_a, ty_b); + return self.report_kind_error(&*ty_a, &*ty_b); } } } TypeEnum::TTuple { ty: ty1 } => { - if let TypeEnum::TTuple { ty: ty2 } = ty_b { - let ty1 = ty1.take(); - let ty2 = ty2.take(); + if let TypeEnum::TTuple { ty: ty2 } = &*ty_b { if ty1.len() != ty2.len() { return Err(format!( "Cannot unify tuples with length {} and {}", @@ -276,58 +255,44 @@ impl Unifier { ty2.len() )); } - drop(arena); self.set_a_to_b(a, b); for (a, b) in ty1.iter().zip(ty2.iter()) { self.unify(*a, *b)?; } - if let Some(TypeEnum::TTuple { ty }) = - self.type_arena.borrow_mut().get_mut(i_b.0) - { - *ty.borrow_mut().get_mut() = ty1; - } else { - unreachable!() - } } else { - return self.report_kind_error(ty_a, ty_b); + return self.report_kind_error(&*ty_a, &*ty_b); } } TypeEnum::TList { ty: ty1 } => { - if let TypeEnum::TList { ty: ty2 } = ty_b { - let ty1 = *ty1; - let ty2 = *ty2; - drop(arena); + if let TypeEnum::TList { ty: ty2 } = *ty_b { self.set_a_to_b(a, b); - self.unify(ty1, ty2)?; + self.unify(*ty1, ty2)?; } else { - return self.report_kind_error(ty_a, ty_b); + return self.report_kind_error(&*ty_a, &*ty_b); } } - TypeEnum::TRecord { fields: fields1 } => { - match ty_b { + TypeEnum::TRecord { .. } => { + match &*ty_b { TypeEnum::TRecord { fields: fields2 } => { - let mut fields1 = fields1.take(); - let fields2 = fields2.take(); - drop(arena); self.set_a_to_b(a, b); - for (key, value) in fields2.iter() { - if let Some(ty) = fields1.get(key) { - self.unify(*ty, *value)?; - } else { - fields1.insert(key.clone(), *value); - } - } - if let Some(TypeEnum::TRecord { fields }) = - self.type_arena.borrow().get(i_b.0) + drop(ty_a); + if let TypeEnum::TRecord { fields: fields1 } = + &mut *ty_a_cell.as_ref().borrow_mut() { - *fields.borrow_mut() = fields1; + for (key, value) in fields2.iter() { + if let Some(ty) = fields1.get(key) { + self.unify(*ty, *value)?; + } else { + fields1.insert(key.clone(), *value); + } + } } else { unreachable!() } } // obj... _ => { - return self.report_kind_error(ty_a, ty_b); + return self.report_kind_error(&*ty_a, &*ty_b); } } } @@ -357,36 +322,27 @@ impl Unifier { fn subst(&self, a: Type, mapping: &VarMap) -> Option { let index = self.unification_table.borrow_mut().probe_value(a); - let arena = self.type_arena.borrow(); - let ty = arena.get(index.0).unwrap(); + let ty_cell = { + let arena = self.type_arena.borrow(); + arena.get(index.0).unwrap().clone() + }; + let ty = ty_cell.borrow(); // this function would only be called when we instantiate functions. // function type signature should ONLY contain concrete types and type // variables, i.e. things like TRecord, TCall should not occur, and we // should be safe to not implement the substitution for those variants. - match ty { + match &*ty { TypeEnum::TVar { id } => mapping.get(&id).cloned(), - TypeEnum::TSeq { map } => { - let map = map.take(); - drop(arena); - let new_map = self.subst_map(&map, mapping); - if let Some(TypeEnum::TSeq { map: m }) = self.type_arena.borrow().get(index.0) { - *m.borrow_mut() = map; - } else { - unreachable!(); - }; - new_map.map(|m| { - let index = self - .type_arena - .borrow_mut() - .insert(TypeEnum::TSeq { map: m.into() }); - self.unification_table - .borrow_mut() - .new_key(TypeIndex(index)) - }) - } + TypeEnum::TSeq { map } => self.subst_map(map, mapping).map(|m| { + let index = self + .type_arena + .borrow_mut() + .insert(Rc::new(TypeEnum::TSeq { map: m }.into())); + self.unification_table + .borrow_mut() + .new_key(TypeIndex(index)) + }), TypeEnum::TTuple { ty } => { - let ty = ty.take(); - drop(arena); let mut new_ty = None; for (i, t) in ty.iter().enumerate() { if let Some(t1) = self.subst(*t, mapping) { @@ -396,58 +352,39 @@ impl Unifier { new_ty.as_mut().unwrap()[i] = t1; } } - if let Some(TypeEnum::TTuple { ty: t }) = self.type_arena.borrow().get(index.0) { - *t.borrow_mut() = ty; - } else { - unreachable!(); - }; new_ty.map(|t| { let index = self .type_arena .borrow_mut() - .insert(TypeEnum::TTuple { ty: t.into() }); - self.unification_table - .borrow_mut() - .new_key(TypeIndex(index)) - }) - } - TypeEnum::TList { ty } => { - let ty = *ty; - drop(arena); - self.subst(ty, mapping).map(|t| { - let index = self - .type_arena - .borrow_mut() - .insert(TypeEnum::TList { ty: t }); - self.unification_table - .borrow_mut() - .new_key(TypeIndex(index)) - }) - } - TypeEnum::TVirtual { ty } => { - let ty = *ty; - drop(arena); - self.subst(ty, mapping).map(|t| { - let index = self - .type_arena - .borrow_mut() - .insert(TypeEnum::TVirtual { ty: t }); + .insert(Rc::new(TypeEnum::TTuple { ty: t }.into())); self.unification_table .borrow_mut() .new_key(TypeIndex(index)) }) } + TypeEnum::TList { ty } => self.subst(*ty, mapping).map(|t| { + let index = self + .type_arena + .borrow_mut() + .insert(Rc::new(TypeEnum::TList { ty: t }.into())); + self.unification_table + .borrow_mut() + .new_key(TypeIndex(index)) + }), + TypeEnum::TVirtual { ty } => self.subst(*ty, mapping).map(|t| { + let index = self + .type_arena + .borrow_mut() + .insert(Rc::new(TypeEnum::TVirtual { ty: t }.into())); + self.unification_table + .borrow_mut() + .new_key(TypeIndex(index)) + }), TypeEnum::TObj { obj_id, fields, params, } => { - let obj_id = *obj_id; - let params = params.take(); - let fields = fields.take(); - drop(arena); - let mut new_params = None; - let mut new_fields = None; // Type variables in field types must be present in the type parameter. // If the mapping does not contain any type variables in the // parameter list, we don't need to substitute the fields. @@ -455,38 +392,29 @@ impl Unifier { let need_subst = params.values().any(|v| { let index = self.unification_table.borrow_mut().probe_value(*v); let arena = self.type_arena.borrow(); - let ty = arena.get(index.0).unwrap(); - if let TypeEnum::TVar { id } = ty { + let ty_cell = arena.get(index.0).unwrap(); + let ty = ty_cell.borrow(); + if let TypeEnum::TVar { id } = &*ty { mapping.contains_key(id) } else { false } }); if need_subst { - new_params = self - .subst_map(¶ms, mapping) - .or_else(|| Some(params.clone())); - new_fields = self - .subst_map(&fields, mapping) - .or_else(|| Some(fields.clone())); - } - if let Some(TypeEnum::TObj { - params: p, - fields: f, - .. - }) = self.type_arena.borrow().get(index.0) - { - *p.borrow_mut() = params; - *f.borrow_mut() = fields; - } else { - unreachable!(); - }; - if need_subst { - let index = self.type_arena.borrow_mut().insert(TypeEnum::TObj { - obj_id, - params: new_params.unwrap().into(), - fields: new_fields.unwrap().into(), - }); + let index = self.type_arena.borrow_mut().insert(Rc::new( + TypeEnum::TObj { + obj_id: *obj_id, + params: self + .subst_map(¶ms, mapping) + .or_else(|| Some(params.clone())) + .unwrap(), + fields: self + .subst_map(&fields, mapping) + .or_else(|| Some(fields.clone())) + .unwrap(), + } + .into(), + )); Some( self.unification_table .borrow_mut() From 97fe450a0b726ef9ac2b29e71a4417e35f73c8ac Mon Sep 17 00:00:00 2001 From: pca006132 Date: Wed, 14 Jul 2021 16:40:50 +0800 Subject: [PATCH 012/131] occur check --- nac3core/src/typecheck/typedef.rs | 101 ++++++++++++++++++++++++++++-- 1 file changed, 97 insertions(+), 4 deletions(-) diff --git a/nac3core/src/typecheck/typedef.rs b/nac3core/src/typecheck/typedef.rs index 161eb50d..c1f8cff5 100644 --- a/nac3core/src/typecheck/typedef.rs +++ b/nac3core/src/typecheck/typedef.rs @@ -2,6 +2,7 @@ use ena::unify::{InPlaceUnificationTable, NoError, UnifyKey, UnifyValue}; use generational_arena::{Arena, Index}; use std::cell::RefCell; use std::collections::BTreeMap; +use std::iter::{empty, once, Iterator}; use std::mem::swap; use std::rc::Rc; @@ -50,6 +51,7 @@ impl UnifyKey for Type { type Mapping = BTreeMap; type VarMap = Mapping; +#[derive(Clone)] struct Call { posargs: Vec, kwargs: BTreeMap, @@ -57,6 +59,7 @@ struct Call { fn_id: usize, } +#[derive(Clone)] struct FuncArg { name: String, ty: Type, @@ -320,6 +323,64 @@ impl Unifier { )) } + fn occur_check(&self, a: TypeIndex, b: Type) -> Result<(), String> { + let i_b = self.unification_table.borrow_mut().probe_value(b); + if a == i_b { + return Err("Recursive type detected!".to_owned()); + } + let ty = self.type_arena.borrow().get(i_b.0).unwrap().clone(); + let ty = ty.borrow(); + + match &*ty { + TypeEnum::TVar { .. } => { + // TODO: occur check for bounds... + } + TypeEnum::TSeq { map } | TypeEnum::TObj { params: map, .. } => { + for t in map.values() { + self.occur_check(a, *t)?; + } + } + TypeEnum::TTuple { ty } => { + for t in ty.iter() { + self.occur_check(a, *t)?; + } + } + TypeEnum::TList { ty } | TypeEnum::TVirtual { ty } => { + self.occur_check(a, *ty)?; + } + TypeEnum::TRecord { fields } => { + for t in fields.values() { + self.occur_check(a, *t)?; + } + } + TypeEnum::TCall { calls } => { + for t in calls + .iter() + .map(|call| { + call.posargs + .iter() + .chain(call.kwargs.values()) + .chain(once(&call.ret)) + }) + .flatten() + { + self.occur_check(a, *t)?; + } + } + TypeEnum::TFunc { args, ret, params } => { + for t in args + .iter() + .map(|v| &v.ty) + .chain(params.values()) + .chain(once(ret)) + { + self.occur_check(a, *t)?; + } + } + }; + Ok(()) + } + fn subst(&self, a: Type, mapping: &VarMap) -> Option { let index = self.unification_table.borrow_mut().probe_value(a); let ty_cell = { @@ -406,12 +467,44 @@ impl Unifier { obj_id: *obj_id, params: self .subst_map(¶ms, mapping) - .or_else(|| Some(params.clone())) - .unwrap(), + .unwrap_or_else(|| params.clone()), fields: self .subst_map(&fields, mapping) - .or_else(|| Some(fields.clone())) - .unwrap(), + .unwrap_or_else(|| fields.clone()), + } + .into(), + )); + Some( + self.unification_table + .borrow_mut() + .new_key(TypeIndex(index)), + ) + } else { + None + } + } + TypeEnum::TFunc { args, ret, params } => { + let new_params = self.subst_map(params, mapping); + let new_ret = self.subst(*ret, mapping); + let mut new_args = None; + for (i, t) in args.iter().enumerate() { + if let Some(t1) = self.subst(t.ty, mapping) { + if new_args.is_none() { + new_args = Some(args.clone()); + } + new_args.as_mut().unwrap()[i] = FuncArg { + name: t.name.clone(), + ty: t1, + is_optional: t.is_optional, + }; + } + } + if new_params.is_some() || new_ret.is_some() || new_args.is_some() { + let index = self.type_arena.borrow_mut().insert(Rc::new( + TypeEnum::TFunc { + params: new_params.unwrap_or_else(|| params.clone()), + ret: new_ret.unwrap_or_else(|| *ret), + args: new_args.unwrap_or_else(|| args.clone()), } .into(), )); From 1df3f4e757c0453a6705907227a4b233271e6eb7 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Wed, 14 Jul 2021 17:20:12 +0800 Subject: [PATCH 013/131] most of unification... --- nac3core/src/typecheck/typedef.rs | 96 +++++++++++++++++++++++-------- 1 file changed, 71 insertions(+), 25 deletions(-) diff --git a/nac3core/src/typecheck/typedef.rs b/nac3core/src/typecheck/typedef.rs index c1f8cff5..1fb5b932 100644 --- a/nac3core/src/typecheck/typedef.rs +++ b/nac3core/src/typecheck/typedef.rs @@ -2,7 +2,7 @@ use ena::unify::{InPlaceUnificationTable, NoError, UnifyKey, UnifyValue}; use generational_arena::{Arena, Index}; use std::cell::RefCell; use std::collections::BTreeMap; -use std::iter::{empty, once, Iterator}; +use std::iter::once; use std::mem::swap; use std::rc::Rc; @@ -155,7 +155,7 @@ struct Unifier { } impl Unifier { - fn unify(&self, a: Type, b: Type) -> Result<(), String> { + fn unify(&self, mut a: Type, mut b: Type) -> Result<(), String> { let (mut i_a, mut i_b) = { let mut table = self.unification_table.borrow_mut(); (table.probe_value(a), table.probe_value(b)) @@ -165,7 +165,7 @@ impl Unifier { return Ok(()); } - let (ty_a_cell, ty_b_cell) = { + let (mut ty_a_cell, mut ty_b_cell) = { let arena = self.type_arena.borrow(); ( arena.get(i_a.0).unwrap().clone(), @@ -173,15 +173,17 @@ impl Unifier { ) }; - let mut ty_a = ty_a_cell.borrow(); - let mut ty_b = ty_b_cell.borrow(); - - // simplify our pattern matching... - if ty_a.kind_le(&ty_b) { - swap(&mut i_a, &mut i_b); - swap(&mut ty_a, &mut ty_b); - } + let (ty_a, ty_b) = { + // simplify our pattern matching... + if ty_a_cell.borrow().kind_le(&ty_b_cell.borrow()) { + swap(&mut a, &mut b); + swap(&mut i_a, &mut i_b); + swap(&mut ty_a_cell, &mut ty_b_cell); + } + (ty_a_cell.borrow(), ty_b_cell.borrow()) + }; + self.occur_check(i_a, b)?; match &*ty_a { TypeEnum::TVar { .. } => { match *ty_b { @@ -195,12 +197,11 @@ impl Unifier { } else { i_a } - } - .0; - self.type_arena.borrow_mut().remove(old); + }; + self.type_arena.borrow_mut().remove(old.0); } _ => { - // TODO: type variables bound check and occur check + // TODO: type variables bound check self.set_a_to_b(a, b); } } @@ -208,7 +209,6 @@ impl Unifier { TypeEnum::TSeq { map: map1 } => { match &*ty_b { TypeEnum::TSeq { map: map2 } => { - self.set_a_to_b(a, b); drop(ty_a); if let TypeEnum::TSeq { map: map1 } = &mut *ty_a_cell.as_ref().borrow_mut() { @@ -223,9 +223,9 @@ impl Unifier { } else { unreachable!() } + self.set_a_to_b(a, b); } TypeEnum::TTuple { ty: types } => { - self.set_a_to_b(a, b); let len = types.len() as u32; for (k, v) in map1.iter() { if *k >= len { @@ -237,12 +237,13 @@ impl Unifier { } self.unify(*v, types[*k as usize])?; } + self.set_a_to_b(a, b); } TypeEnum::TList { ty } => { - self.set_a_to_b(a, b); for v in map1.values() { self.unify(*v, *ty)?; } + self.set_a_to_b(a, b); } _ => { return self.report_kind_error(&*ty_a, &*ty_b); @@ -258,26 +259,25 @@ impl Unifier { ty2.len() )); } - self.set_a_to_b(a, b); - for (a, b) in ty1.iter().zip(ty2.iter()) { - self.unify(*a, *b)?; + for (x, y) in ty1.iter().zip(ty2.iter()) { + self.unify(*x, *y)?; } + self.set_a_to_b(a, b); } else { return self.report_kind_error(&*ty_a, &*ty_b); } } TypeEnum::TList { ty: ty1 } => { if let TypeEnum::TList { ty: ty2 } = *ty_b { - self.set_a_to_b(a, b); self.unify(*ty1, ty2)?; + self.set_a_to_b(a, b); } else { return self.report_kind_error(&*ty_a, &*ty_b); } } - TypeEnum::TRecord { .. } => { + TypeEnum::TRecord { fields: fields1 } => { match &*ty_b { TypeEnum::TRecord { fields: fields2 } => { - self.set_a_to_b(a, b); drop(ty_a); if let TypeEnum::TRecord { fields: fields1 } = &mut *ty_a_cell.as_ref().borrow_mut() @@ -292,13 +292,59 @@ impl Unifier { } else { unreachable!() } + self.set_a_to_b(a, b); + } + TypeEnum::TObj { + fields: fields2, .. + } => { + for (key, value) in fields1.iter() { + if let Some(ty) = fields2.get(key) { + self.unify(*ty, *value)?; + } else { + return Err(format!("No such attribute {}", key)); + } + } + self.set_a_to_b(a, b); + } + TypeEnum::TVirtual { ty } => { + // not sure if this is correct... + self.unify(a, *ty)?; + self.set_a_to_b(a, b); } - // obj... _ => { return self.report_kind_error(&*ty_a, &*ty_b); } } } + TypeEnum::TObj { + obj_id: id1, + params: params1, + .. + } => { + if let TypeEnum::TObj { + obj_id: id2, + params: params2, + .. + } = &*ty_b + { + if id1 != id2 { + return Err(format!("Cannot unify objects with ID {} and {}", id1, id2)); + } + for (x, y) in params1.values().zip(params2.values()) { + self.unify(*x, *y)?; + } + self.set_a_to_b(a, b); + } else { + return self.report_kind_error(&*ty_a, &*ty_b); + } + } + TypeEnum::TVirtual { ty: ty1 } => { + if let TypeEnum::TVirtual { ty: ty2 } = &*ty_b { + self.unify(*ty1, *ty2)?; + } else { + return self.report_kind_error(&*ty_a, &*ty_b); + } + } _ => unimplemented!(), } Ok(()) From d94f25583bdcb4f85b6bae5ccaab7a4cdaba2c5b Mon Sep 17 00:00:00 2001 From: pca006132 Date: Thu, 15 Jul 2021 16:00:23 +0800 Subject: [PATCH 014/131] added tests --- Cargo.lock | 15 ++ nac3core/Cargo.toml | 4 + nac3core/src/typecheck/mod.rs | 1 + nac3core/src/typecheck/test_typedef.rs | 273 +++++++++++++++++++++++++ nac3core/src/typecheck/typedef.rs | 251 +++++++++++++---------- 5 files changed, 439 insertions(+), 105 deletions(-) create mode 100644 nac3core/src/typecheck/test_typedef.rs diff --git a/Cargo.lock b/Cargo.lock index c09cc2c6..afc45460 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -403,9 +403,11 @@ dependencies = [ "generational-arena", "indoc 1.0.3", "inkwell", + "itertools", "num-bigint 0.3.2", "num-traits", "rustpython-parser", + "test-case", ] [[package]] @@ -844,6 +846,19 @@ dependencies = [ "winapi", ] +[[package]] +name = "test-case" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b114ece25254e97bf48dd4bfc2a12bad0647adacfe4cae1247a9ca6ad302cec" +dependencies = [ + "cfg-if 1.0.0", + "proc-macro2", + "quote", + "syn", + "version_check", +] + [[package]] name = "tiny-keccak" version = "2.0.2" diff --git a/nac3core/Cargo.toml b/nac3core/Cargo.toml index 79ad0997..d3ae7744 100644 --- a/nac3core/Cargo.toml +++ b/nac3core/Cargo.toml @@ -13,3 +13,7 @@ indoc = "1.0" generational-arena = "0.2" ena = "0.14" +[dev-dependencies] +test-case = "1.2.0" +itertools = "0.10.1" + diff --git a/nac3core/src/typecheck/mod.rs b/nac3core/src/typecheck/mod.rs index 118a79ab..7b30426e 100644 --- a/nac3core/src/typecheck/mod.rs +++ b/nac3core/src/typecheck/mod.rs @@ -4,4 +4,5 @@ // mod magic_methods; // mod primitives; // pub mod symbol_resolver; +mod test_typedef; pub mod typedef; diff --git a/nac3core/src/typecheck/test_typedef.rs b/nac3core/src/typecheck/test_typedef.rs new file mode 100644 index 00000000..4eeb1cd8 --- /dev/null +++ b/nac3core/src/typecheck/test_typedef.rs @@ -0,0 +1,273 @@ +#[cfg(test)] +mod test { + use super::super::typedef::*; + use itertools::Itertools; + use std::collections::HashMap; + use test_case::test_case; + + struct TestEnvironment { + pub unifier: Unifier, + type_mapping: HashMap, + var_max_id: u32, + } + + impl TestEnvironment { + fn new() -> TestEnvironment { + let unifier = Unifier::new(); + let mut type_mapping = HashMap::new(); + let mut var_max_id = 0; + + type_mapping.insert( + "int".into(), + unifier.add_ty(TypeEnum::TObj { + obj_id: 0, + fields: HashMap::new(), + params: HashMap::new(), + }), + ); + type_mapping.insert( + "float".into(), + unifier.add_ty(TypeEnum::TObj { + obj_id: 1, + fields: HashMap::new(), + params: HashMap::new(), + }), + ); + type_mapping.insert( + "bool".into(), + unifier.add_ty(TypeEnum::TObj { + obj_id: 2, + fields: HashMap::new(), + params: HashMap::new(), + }), + ); + let v0 = unifier.add_ty(TypeEnum::TVar { id: 0 }); + var_max_id += 1; + type_mapping.insert( + "Foo".into(), + unifier.add_ty(TypeEnum::TObj { + obj_id: 3, + fields: [("a".into(), v0)].iter().cloned().collect(), + params: [(0u32, v0)].iter().cloned().collect(), + }), + ); + + TestEnvironment { + unifier, + type_mapping, + var_max_id, + } + } + + fn get_fresh_var(&mut self) -> Type { + let id = self.var_max_id + 1; + self.var_max_id += 1; + self.unifier.add_ty(TypeEnum::TVar { id }) + } + + fn parse(&self, typ: &str, mapping: &Mapping) -> Type { + let result = self.internal_parse(typ, mapping); + assert!(result.1.is_empty()); + result.0 + } + + fn internal_parse<'a, 'b>( + &'a self, + typ: &'b str, + mapping: &Mapping, + ) -> (Type, &'b str) { + // for testing only, so we can just panic when the input is malformed + let end = typ + .find(|c| ['[', ',', ']', '='].contains(&c)) + .unwrap_or_else(|| typ.len()); + match &typ[..end] { + "Tuple" => { + let mut s = &typ[end..]; + assert!(&s[0..1] == "["); + let mut ty = Vec::new(); + while &s[0..1] != "]" { + let result = self.internal_parse(&s[1..], mapping); + ty.push(result.0); + s = result.1; + } + (self.unifier.add_ty(TypeEnum::TTuple { ty }), &s[1..]) + } + "List" => { + assert!(&typ[end..end + 1] == "["); + let (ty, s) = self.internal_parse(&typ[end + 1..], mapping); + assert!(&s[0..1] == "]"); + (self.unifier.add_ty(TypeEnum::TList { ty }), &s[1..]) + } + "Record" => { + let mut s = &typ[end..]; + assert!(&s[0..1] == "["); + let mut fields = HashMap::new(); + while &s[0..1] != "]" { + let eq = s.find('=').unwrap(); + let key = s[1..eq].to_string(); + let result = self.internal_parse(&s[eq + 1..], mapping); + fields.insert(key, result.0); + s = result.1; + } + (self.unifier.add_ty(TypeEnum::TRecord { fields }), &s[1..]) + } + x => { + let mut s = &typ[end..]; + let ty = mapping.get(x).cloned().unwrap_or_else(|| { + // mapping should be type variables, type_mapping should be concrete types + // we should not resolve the type of type variables. + let mut ty = *self.type_mapping.get(x).unwrap(); + let te = self.unifier.get_ty(ty); + if let TypeEnum::TObj { params, .. } = &*te.as_ref().borrow() { + if !params.is_empty() { + assert!(&s[0..1] == "["); + let mut p = Vec::new(); + while &s[0..1] != "]" { + let result = self.internal_parse(&s[1..], mapping); + p.push(result.0); + s = result.1; + } + s = &s[1..]; + ty = self + .unifier + .subst(ty, ¶ms.keys().cloned().zip(p.into_iter()).collect()) + .unwrap_or(ty); + } + } + ty + }); + (ty, s) + } + } + } + } + + #[test_case(2, + &[("v1", "v2"), ("v2", "float")], + &[("v1", "float"), ("v2", "float")] + ; "simple variable" + )] + #[test_case(2, + &[("v1", "List[v2]"), ("v1", "List[float]")], + &[("v1", "List[float]"), ("v2", "float")] + ; "list element" + )] + #[test_case(3, + &[ + ("v1", "Record[a=v3,b=v3]"), + ("v2", "Record[b=float,c=v3]"), + ("v1", "v2") + ], + &[ + ("v1", "Record[a=float,b=float,c=float]"), + ("v2", "Record[a=float,b=float,c=float]"), + ("v3", "float") + ] + ; "record merge" + )] + #[test_case(3, + &[ + ("v1", "Record[a=float]"), + ("v2", "Foo[v3]"), + ("v1", "v2") + ], + &[ + ("v1", "Foo[float]"), + ("v3", "float") + ] + ; "record obj merge" + )] + fn test_unify( + variable_count: u32, + unify_pairs: &[(&'static str, &'static str)], + verify_pairs: &[(&'static str, &'static str)], + ) { + let unify_count = unify_pairs.len(); + // test all permutations... + for perm in unify_pairs.iter().permutations(unify_count) { + let mut env = TestEnvironment::new(); + let mut mapping = HashMap::new(); + for i in 1..=variable_count { + let v = env.get_fresh_var(); + mapping.insert(format!("v{}", i), v); + } + // unification may have side effect when we do type resolution, so freeze the types + // before doing unification. + let mut pairs = Vec::new(); + for (a, b) in perm.iter() { + let t1 = env.parse(a, &mapping); + let t2 = env.parse(b, &mapping); + pairs.push((t1, t2)); + } + for (t1, t2) in pairs { + env.unifier.unify(t1, t2).unwrap(); + } + for (a, b) in verify_pairs.iter() { + let t1 = env.parse(a, &mapping); + let t2 = env.parse(b, &mapping); + assert!(env.unifier.eq(t1, t2)); + } + } + } + + #[test_case(2, + &[ + ("v1", "Tuple[int]"), + ("v2", "List[int]"), + ], + (("v1", "v2"), "Cannot unify TTuple with TList") + ; "kind mismatch" + )] + #[test_case(2, + &[ + ("v1", "Tuple[int]"), + ("v2", "Tuple[float]"), + ], + (("v1", "v2"), "Cannot unify objects with ID 0 and 1") + ; "tuple parameter mismatch" + )] + #[test_case(2, + &[ + ("v1", "Tuple[int,int]"), + ("v2", "Tuple[int]"), + ], + (("v1", "v2"), "Cannot unify tuples with length 1 and 2") + ; "tuple length mismatch" + )] + #[test_case(3, + &[ + ("v1", "Record[a=float,b=int]"), + ("v2", "Foo[v3]"), + ], + (("v1", "v2"), "No such attribute b") + ; "record obj merge" + )] + fn test_invalid_unification( + variable_count: u32, + unify_pairs: &[(&'static str, &'static str)], + errornous_pair: ((&'static str, &'static str), &'static str), + ) { + let mut env = TestEnvironment::new(); + let mut mapping = HashMap::new(); + for i in 1..=variable_count { + let v = env.get_fresh_var(); + mapping.insert(format!("v{}", i), v); + } + // unification may have side effect when we do type resolution, so freeze the types + // before doing unification. + let mut pairs = Vec::new(); + for (a, b) in unify_pairs.iter() { + let t1 = env.parse(a, &mapping); + let t2 = env.parse(b, &mapping); + pairs.push((t1, t2)); + } + let (t1, t2) = ( + env.parse(errornous_pair.0 .0, &mapping), + env.parse(errornous_pair.0 .1, &mapping), + ); + for (a, b) in pairs { + env.unifier.unify(a, b).unwrap(); + } + assert_eq!(env.unifier.unify(t1, t2), Err(errornous_pair.1.to_string())); + } +} diff --git a/nac3core/src/typecheck/typedef.rs b/nac3core/src/typecheck/typedef.rs index 1fb5b932..03b3ae28 100644 --- a/nac3core/src/typecheck/typedef.rs +++ b/nac3core/src/typecheck/typedef.rs @@ -1,7 +1,7 @@ use ena::unify::{InPlaceUnificationTable, NoError, UnifyKey, UnifyValue}; use generational_arena::{Arena, Index}; use std::cell::RefCell; -use std::collections::BTreeMap; +use std::collections::HashMap; use std::iter::once; use std::mem::swap; use std::rc::Rc; @@ -18,10 +18,10 @@ use std::rc::Rc; // `--> TFunc #[derive(Copy, Clone, PartialEq, Eq, Debug)] -struct Type(u32); +pub struct Type(u32); #[derive(Copy, Clone, Debug, PartialEq, Eq)] -struct TypeIndex(Index); +pub struct TypeIndex(Index); impl UnifyValue for TypeIndex { type Error = NoError; @@ -48,19 +48,19 @@ impl UnifyKey for Type { } } -type Mapping = BTreeMap; -type VarMap = Mapping; +pub type Mapping = HashMap; +pub type VarMap = Mapping; #[derive(Clone)] -struct Call { +pub struct Call { posargs: Vec, - kwargs: BTreeMap, + kwargs: HashMap, ret: Type, - fn_id: usize, + fun: RefCell>, } #[derive(Clone)] -struct FuncArg { +pub struct FuncArg { name: String, ty: Type, is_optional: bool, @@ -69,7 +69,7 @@ struct FuncArg { // We use a lot of `Rc`/`RefCell`s here as we want to simplify our code. // We may not really need so much `Rc`s, but we would have to do complicated // stuffs otherwise. -enum TypeEnum { +pub enum TypeEnum { TVar { // TODO: upper/lower bound id: u32, @@ -95,7 +95,7 @@ enum TypeEnum { ty: Type, }, TCall { - calls: Vec, + calls: Vec>, }, TFunc { args: Vec, @@ -143,19 +143,40 @@ impl TypeEnum { } } -struct ObjDef { +pub struct ObjDef { name: String, fields: Mapping, } -struct Unifier { +pub struct Unifier { unification_table: RefCell>, type_arena: RefCell>>>, obj_def_table: Vec, } impl Unifier { - fn unify(&self, mut a: Type, mut b: Type) -> Result<(), String> { + pub fn new() -> Unifier { + Unifier { + unification_table: RefCell::new(InPlaceUnificationTable::new()), + type_arena: RefCell::new(Arena::new()), + obj_def_table: Vec::new(), + } + } + + pub fn add_ty(&self, a: TypeEnum) -> Type { + let index = self.type_arena.borrow_mut().insert(Rc::new(a.into())); + self.unification_table + .borrow_mut() + .new_key(TypeIndex(index)) + } + + pub fn get_ty(&self, a: Type) -> Rc> { + let mut table = self.unification_table.borrow_mut(); + let arena = self.type_arena.borrow(); + arena.get(table.probe_value(a).0).unwrap().clone() + } + + pub fn unify(&self, mut a: Type, mut b: Type) -> Result<(), String> { let (mut i_a, mut i_b) = { let mut table = self.unification_table.borrow_mut(); (table.probe_value(a), table.probe_value(b)) @@ -186,38 +207,21 @@ impl Unifier { self.occur_check(i_a, b)?; match &*ty_a { TypeEnum::TVar { .. } => { - match *ty_b { - TypeEnum::TVar { .. } => { - // TODO: type variables bound check - let old = { - let mut table = self.unification_table.borrow_mut(); - table.union(a, b); - if table.find(a) == a { - i_b - } else { - i_a - } - }; - self.type_arena.borrow_mut().remove(old.0); - } - _ => { - // TODO: type variables bound check - self.set_a_to_b(a, b); - } - } + // TODO: type variables bound check... + self.set_a_to_b(a, b); } TypeEnum::TSeq { map: map1 } => { match &*ty_b { - TypeEnum::TSeq { map: map2 } => { - drop(ty_a); - if let TypeEnum::TSeq { map: map1 } = &mut *ty_a_cell.as_ref().borrow_mut() + TypeEnum::TSeq { .. } => { + drop(ty_b); + if let TypeEnum::TSeq { map: map2 } = &mut *ty_b_cell.as_ref().borrow_mut() { // unify them to map1 - for (key, value) in map2.iter() { - if let Some(ty) = map1.get(key) { + for (key, value) in map1.iter() { + if let Some(ty) = map2.get(key) { self.unify(*ty, *value)?; } else { - map1.insert(*key, *value); + map2.insert(*key, *value); } } } else { @@ -277,16 +281,16 @@ impl Unifier { } TypeEnum::TRecord { fields: fields1 } => { match &*ty_b { - TypeEnum::TRecord { fields: fields2 } => { - drop(ty_a); - if let TypeEnum::TRecord { fields: fields1 } = - &mut *ty_a_cell.as_ref().borrow_mut() + TypeEnum::TRecord { .. } => { + drop(ty_b); + if let TypeEnum::TRecord { fields: fields2 } = + &mut *ty_b_cell.as_ref().borrow_mut() { - for (key, value) in fields2.iter() { - if let Some(ty) = fields1.get(key) { + for (key, value) in fields1.iter() { + if let Some(ty) = fields2.get(key) { self.unify(*ty, *value)?; } else { - fields1.insert(key.clone(), *value); + fields2.insert(key.clone(), *value); } } } else { @@ -341,6 +345,7 @@ impl Unifier { TypeEnum::TVirtual { ty: ty1 } => { if let TypeEnum::TVirtual { ty: ty2 } = &*ty_b { self.unify(*ty1, *ty2)?; + self.set_a_to_b(a, b); } else { return self.report_kind_error(&*ty_a, &*ty_b); } @@ -427,7 +432,7 @@ impl Unifier { Ok(()) } - fn subst(&self, a: Type, mapping: &VarMap) -> Option { + pub fn subst(&self, a: Type, mapping: &VarMap) -> Option { let index = self.unification_table.borrow_mut().probe_value(a); let ty_cell = { let arena = self.type_arena.borrow(); @@ -459,34 +464,14 @@ impl Unifier { new_ty.as_mut().unwrap()[i] = t1; } } - new_ty.map(|t| { - let index = self - .type_arena - .borrow_mut() - .insert(Rc::new(TypeEnum::TTuple { ty: t }.into())); - self.unification_table - .borrow_mut() - .new_key(TypeIndex(index)) - }) + new_ty.map(|t| self.add_ty(TypeEnum::TTuple { ty: t })) } - TypeEnum::TList { ty } => self.subst(*ty, mapping).map(|t| { - let index = self - .type_arena - .borrow_mut() - .insert(Rc::new(TypeEnum::TList { ty: t }.into())); - self.unification_table - .borrow_mut() - .new_key(TypeIndex(index)) - }), - TypeEnum::TVirtual { ty } => self.subst(*ty, mapping).map(|t| { - let index = self - .type_arena - .borrow_mut() - .insert(Rc::new(TypeEnum::TVirtual { ty: t }.into())); - self.unification_table - .borrow_mut() - .new_key(TypeIndex(index)) - }), + TypeEnum::TList { ty } => self + .subst(*ty, mapping) + .map(|t| self.add_ty(TypeEnum::TList { ty: t })), + TypeEnum::TVirtual { ty } => self + .subst(*ty, mapping) + .map(|t| self.add_ty(TypeEnum::TVirtual { ty: t })), TypeEnum::TObj { obj_id, fields, @@ -508,23 +493,18 @@ impl Unifier { } }); if need_subst { - let index = self.type_arena.borrow_mut().insert(Rc::new( - TypeEnum::TObj { - obj_id: *obj_id, - params: self - .subst_map(¶ms, mapping) - .unwrap_or_else(|| params.clone()), - fields: self - .subst_map(&fields, mapping) - .unwrap_or_else(|| fields.clone()), - } - .into(), - )); - Some( - self.unification_table - .borrow_mut() - .new_key(TypeIndex(index)), - ) + let obj_id = *obj_id; + let params = self + .subst_map(¶ms, mapping) + .unwrap_or_else(|| params.clone()); + let fields = self + .subst_map(&fields, mapping) + .unwrap_or_else(|| fields.clone()); + Some(self.add_ty(TypeEnum::TObj { + obj_id, + params, + fields, + })) } else { None } @@ -546,19 +526,10 @@ impl Unifier { } } if new_params.is_some() || new_ret.is_some() || new_args.is_some() { - let index = self.type_arena.borrow_mut().insert(Rc::new( - TypeEnum::TFunc { - params: new_params.unwrap_or_else(|| params.clone()), - ret: new_ret.unwrap_or_else(|| *ret), - args: new_args.unwrap_or_else(|| args.clone()), - } - .into(), - )); - Some( - self.unification_table - .borrow_mut() - .new_key(TypeIndex(index)), - ) + let params = new_params.unwrap_or_else(|| params.clone()); + let ret = new_ret.unwrap_or_else(|| *ret); + let args = new_args.unwrap_or_else(|| args.clone()); + Some(self.add_ty(TypeEnum::TFunc { params, ret, args })) } else { None } @@ -569,7 +540,7 @@ impl Unifier { fn subst_map(&self, map: &Mapping, mapping: &VarMap) -> Option> where - K: std::cmp::Ord + std::clone::Clone, + K: std::hash::Hash + std::cmp::Eq + std::clone::Clone, { let mut map2 = None; for (k, v) in map.iter() { @@ -582,4 +553,74 @@ impl Unifier { } map2 } + + pub fn eq(&self, a: Type, b: Type) -> bool { + if a == b { + return true; + } + let (i_a, i_b) = { + let mut table = self.unification_table.borrow_mut(); + (table.probe_value(a), table.probe_value(b)) + }; + + if i_a == i_b { + return true; + } + + let (ty_a, ty_b) = { + let arena = self.type_arena.borrow(); + ( + arena.get(i_a.0).unwrap().clone(), + arena.get(i_b.0).unwrap().clone(), + ) + }; + + let ty_a = ty_a.borrow(); + let ty_b = ty_b.borrow(); + + match (&*ty_a, &*ty_b) { + (TypeEnum::TVar { id: id1 }, TypeEnum::TVar { id: id2 }) => id1 == id2, + (TypeEnum::TSeq { map: map1 }, TypeEnum::TSeq { map: map2 }) => self.map_eq(map1, map2), + (TypeEnum::TTuple { ty: ty1 }, TypeEnum::TTuple { ty: ty2 }) => { + ty1.len() == ty2.len() + && ty1.iter().zip(ty2.iter()).all(|(t1, t2)| self.eq(*t1, *t2)) + } + (TypeEnum::TList { ty: ty1 }, TypeEnum::TList { ty: ty2 }) + | (TypeEnum::TVirtual { ty: ty1 }, TypeEnum::TVirtual { ty: ty2 }) => { + self.eq(*ty1, *ty2) + } + (TypeEnum::TRecord { fields: fields1 }, TypeEnum::TRecord { fields: fields2 }) => { + self.map_eq(fields1, fields2) + } + ( + TypeEnum::TObj { + obj_id: id1, + params: params1, + .. + }, + TypeEnum::TObj { + obj_id: id2, + params: params2, + .. + }, + ) => id1 == id2 && self.map_eq(params1, params2), + // TCall and TFunc are not yet implemented + _ => false, + } + } + + fn map_eq(&self, map1: &Mapping, map2: &Mapping) -> bool + where + K: std::hash::Hash + std::cmp::Eq + std::clone::Clone, + { + if map1.len() != map2.len() { + return false; + } + for (k, v) in map1.iter() { + if !map2.get(k).map(|v1| self.eq(*v, *v1)).unwrap_or(false) { + return false; + } + } + true + } } From c2d00aa762cd08dcc4f71ab52fe41d1ef5df6a9e Mon Sep 17 00:00:00 2001 From: pca006132 Date: Thu, 15 Jul 2021 16:51:55 +0800 Subject: [PATCH 015/131] occur check --- nac3core/src/typecheck/test_typedef.rs | 7 +++++++ nac3core/src/typecheck/typedef.rs | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/nac3core/src/typecheck/test_typedef.rs b/nac3core/src/typecheck/test_typedef.rs index 4eeb1cd8..f411f689 100644 --- a/nac3core/src/typecheck/test_typedef.rs +++ b/nac3core/src/typecheck/test_typedef.rs @@ -242,6 +242,13 @@ mod test { (("v1", "v2"), "No such attribute b") ; "record obj merge" )] + #[test_case(2, + &[ + ("v1", "List[v2]"), + ], + (("v1", "v2"), "Recursive type is prohibited.") + ; "recursive type for lists" + )] fn test_invalid_unification( variable_count: u32, unify_pairs: &[(&'static str, &'static str)], diff --git a/nac3core/src/typecheck/typedef.rs b/nac3core/src/typecheck/typedef.rs index 03b3ae28..645a96bc 100644 --- a/nac3core/src/typecheck/typedef.rs +++ b/nac3core/src/typecheck/typedef.rs @@ -216,7 +216,7 @@ impl Unifier { drop(ty_b); if let TypeEnum::TSeq { map: map2 } = &mut *ty_b_cell.as_ref().borrow_mut() { - // unify them to map1 + // unify them to map2 for (key, value) in map1.iter() { if let Some(ty) = map2.get(key) { self.unify(*ty, *value)?; @@ -377,7 +377,7 @@ impl Unifier { fn occur_check(&self, a: TypeIndex, b: Type) -> Result<(), String> { let i_b = self.unification_table.borrow_mut().probe_value(b); if a == i_b { - return Err("Recursive type detected!".to_owned()); + return Err("Recursive type is prohibited.".to_owned()); } let ty = self.type_arena.borrow().get(i_b.0).unwrap().clone(); let ty = ty.borrow(); From 62736bd4bfda7a3626778fa6296b587b9d0aff65 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Fri, 16 Jul 2021 13:55:29 +0800 Subject: [PATCH 016/131] cleanup: we don't actually need arena --- Cargo.lock | 30 ++--- nac3core/Cargo.toml | 1 - nac3core/src/lib.rs | 1 - nac3core/src/typecheck/typedef.rs | 198 ++++++++++++++++-------------- 4 files changed, 113 insertions(+), 117 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index afc45460..d08e77c0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -75,12 +75,6 @@ version = "1.0.68" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a72c244c1ff497a746a7e1fb3d14bd08420ecda70c8f25c7112f2781652d787" -[[package]] -name = "cfg-if" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" - [[package]] name = "cfg-if" version = "1.0.0" @@ -115,7 +109,7 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "dirs-sys-next", ] @@ -151,22 +145,13 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37ab347416e802de484e4d03c7316c48f1ecb56574dfd4a46a80f173ce1de04d" -[[package]] -name = "generational-arena" -version = "0.2.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e1d3b771574f62d0548cee0ad9057857e9fc25d7a3335f140c84f6acd0bf601" -dependencies = [ - "cfg-if 0.1.10", -] - [[package]] name = "getrandom" version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "libc", "wasi 0.9.0+wasi-snapshot-preview1", ] @@ -177,7 +162,7 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "libc", "wasi 0.10.2+wasi-snapshot-preview1", ] @@ -280,7 +265,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61124eeebbd69b8190558df225adf7e4caafce0d743919e5d6b19652314ec5ec" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", ] [[package]] @@ -386,7 +371,7 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", ] [[package]] @@ -400,7 +385,6 @@ name = "nac3core" version = "0.1.0" dependencies = [ "ena", - "generational-arena", "indoc 1.0.3", "inkwell", "itertools", @@ -499,7 +483,7 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fa7a782938e745763fe6907fc6ba86946d72f49fe7e21de074e08128a99fb018" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "instant", "libc", "redox_syscall", @@ -852,7 +836,7 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b114ece25254e97bf48dd4bfc2a12bad0647adacfe4cae1247a9ca6ad302cec" dependencies = [ - "cfg-if 1.0.0", + "cfg-if", "proc-macro2", "quote", "syn", diff --git a/nac3core/Cargo.toml b/nac3core/Cargo.toml index d3ae7744..6b15049d 100644 --- a/nac3core/Cargo.toml +++ b/nac3core/Cargo.toml @@ -10,7 +10,6 @@ num-traits = "0.2" inkwell = { git = "https://github.com/TheDan64/inkwell", branch = "master", features = ["llvm10-0"] } rustpython-parser = { git = "https://github.com/RustPython/RustPython", branch = "master" } indoc = "1.0" -generational-arena = "0.2" ena = "0.14" [dev-dependencies] diff --git a/nac3core/src/lib.rs b/nac3core/src/lib.rs index 9fdca9d4..fceaaeea 100644 --- a/nac3core/src/lib.rs +++ b/nac3core/src/lib.rs @@ -5,7 +5,6 @@ extern crate num_bigint; extern crate inkwell; extern crate rustpython_parser; extern crate indoc; -extern crate generational_arena; extern crate ena; mod typecheck; diff --git a/nac3core/src/typecheck/typedef.rs b/nac3core/src/typecheck/typedef.rs index 645a96bc..0338a36a 100644 --- a/nac3core/src/typecheck/typedef.rs +++ b/nac3core/src/typecheck/typedef.rs @@ -1,29 +1,20 @@ use ena::unify::{InPlaceUnificationTable, NoError, UnifyKey, UnifyValue}; -use generational_arena::{Arena, Index}; use std::cell::RefCell; use std::collections::HashMap; +use std::fmt::Debug; use std::iter::once; use std::mem::swap; +use std::ops::Deref; use std::rc::Rc; -// Order: -// TVar -// |--> TSeq -// | |--> TTuple -// | `--> TList -// |--> TRecord -// | |--> TObj -// | `--> TVirtual -// `--> TCall -// `--> TFunc - #[derive(Copy, Clone, PartialEq, Eq, Debug)] +/// Handle for a type, implementated as a key in the unification table. pub struct Type(u32); -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub struct TypeIndex(Index); +#[derive(Clone)] +pub struct TypeCell(Rc>); -impl UnifyValue for TypeIndex { +impl UnifyValue for TypeCell { type Error = NoError; fn unify_values(_: &Self, value2: &Self) -> Result { // WARN: depends on the implementation details of ena. @@ -31,12 +22,12 @@ impl UnifyValue for TypeIndex { // and assign the type by `union_value(key, new_value)`, which set the // value as `unify_values(key.value, new_value)`. So, we need to return // the right one. - Ok(*value2) + Ok(value2.clone()) } } impl UnifyKey for Type { - type Value = TypeIndex; + type Value = TypeCell; fn index(&self) -> u32 { self.0 } @@ -48,6 +39,14 @@ impl UnifyKey for Type { } } +impl Deref for TypeCell { + type Target = Rc>; + + fn deref(&self) -> &::Target { + &self.0 + } +} + pub type Mapping = HashMap; pub type VarMap = Mapping; @@ -104,18 +103,63 @@ pub enum TypeEnum { }, } +// Order: +// TVar +// |--> TSeq +// | |--> TTuple +// | `--> TList +// |--> TRecord +// | |--> TObj +// | `--> TVirtual +// `--> TCall +// `--> TFunc + +// We encode the types as natural numbers, and subtyping relation as divisibility. +// If a | b, b <: a. +// We assign unique prime numbers (1 to TVar, everything is a subtype of it) to each type: +// TVar = 1 +// |--> TSeq = 2 +// | |--> TTuple = 3 +// | `--> TList = 5 +// |--> TRecord = 7 +// | |--> TObj = 11 +// | `--> TVirtual = 13 +// `--> TCall = 17 +// `--> TFunc = 21 +// +// And then, based on the subtyping relation, multiply them together... +// TVar = 1 +// |--> TSeq = 2 * TVar +// | |--> TTuple = 3 * TSeq * TVar +// | `--> TList = 5 * TSeq * TVar +// |--> TRecord = 7 * TVar +// | |--> TObj = 11 * TRecord * TVar +// | `--> TVirtual = 13 * TRecord * TVar +// `--> TCall = 17 * TVar +// `--> TFunc = 21 * TCall * TVar + impl TypeEnum { fn get_int(&self) -> i32 { + const TVAR: i32 = 1; + const TSEQ: i32 = 2; + const TTUPLE: i32 = 3; + const TLIST: i32 = 5; + const TRECORD: i32 = 7; + const TOBJ: i32 = 11; + const TVIRTUAL: i32 = 13; + const TCALL: i32 = 17; + const TFUNC: i32 = 21; + match self { - TypeEnum::TVar { .. } => 1, - TypeEnum::TSeq { .. } => 5, - TypeEnum::TTuple { .. } => 10, - TypeEnum::TList { .. } => 15, - TypeEnum::TRecord { .. } => 7, - TypeEnum::TObj { .. } => 14, - TypeEnum::TVirtual { .. } => 21, - TypeEnum::TCall { .. } => 11, - TypeEnum::TFunc { .. } => 22, + TypeEnum::TVar { .. } => TVAR, + TypeEnum::TSeq { .. } => TSEQ * TVAR, + TypeEnum::TTuple { .. } => TTUPLE * TSEQ * TVAR, + TypeEnum::TList { .. } => TLIST * TSEQ * TVAR, + TypeEnum::TRecord { .. } => TRECORD * TVAR, + TypeEnum::TObj { .. } => TOBJ * TRECORD * TVAR, + TypeEnum::TVirtual { .. } => TVIRTUAL * TRECORD * TVAR, + TypeEnum::TCall { .. } => TCALL * TVAR, + TypeEnum::TFunc { .. } => TFUNC * TCALL * TVAR, } } @@ -126,7 +170,7 @@ impl TypeEnum { (a % b) == 0 } - pub fn get_kind_name(&self) -> &'static str { + pub fn get_type_name(&self) -> &'static str { // this function is for debugging only... // a proper to_str implementation requires the context match self { @@ -143,6 +187,12 @@ impl TypeEnum { } } +impl Debug for TypeCell { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(&self.borrow().get_type_name()) + } +} + pub struct ObjDef { name: String, fields: Mapping, @@ -150,7 +200,6 @@ pub struct ObjDef { pub struct Unifier { unification_table: RefCell>, - type_arena: RefCell>>>, obj_def_table: Vec, } @@ -158,53 +207,43 @@ impl Unifier { pub fn new() -> Unifier { Unifier { unification_table: RefCell::new(InPlaceUnificationTable::new()), - type_arena: RefCell::new(Arena::new()), obj_def_table: Vec::new(), } } + /// Register a type to the unifier. + /// Returns a key in the unification_table. pub fn add_ty(&self, a: TypeEnum) -> Type { - let index = self.type_arena.borrow_mut().insert(Rc::new(a.into())); self.unification_table .borrow_mut() - .new_key(TypeIndex(index)) + .new_key(TypeCell(Rc::new(a.into()))) } + /// Get the TypeEnum of a type. pub fn get_ty(&self, a: Type) -> Rc> { let mut table = self.unification_table.borrow_mut(); - let arena = self.type_arena.borrow(); - arena.get(table.probe_value(a).0).unwrap().clone() + table.probe_value(a).0 } pub fn unify(&self, mut a: Type, mut b: Type) -> Result<(), String> { - let (mut i_a, mut i_b) = { - let mut table = self.unification_table.borrow_mut(); - (table.probe_value(a), table.probe_value(b)) - }; - - if i_a == i_b { - return Ok(()); - } - let (mut ty_a_cell, mut ty_b_cell) = { - let arena = self.type_arena.borrow(); - ( - arena.get(i_a.0).unwrap().clone(), - arena.get(i_b.0).unwrap().clone(), - ) + let mut table = self.unification_table.borrow_mut(); + if table.unioned(a, b) { + return Ok(()); + } + (table.probe_value(a), table.probe_value(b)) }; let (ty_a, ty_b) = { // simplify our pattern matching... if ty_a_cell.borrow().kind_le(&ty_b_cell.borrow()) { swap(&mut a, &mut b); - swap(&mut i_a, &mut i_b); swap(&mut ty_a_cell, &mut ty_b_cell); } (ty_a_cell.borrow(), ty_b_cell.borrow()) }; - self.occur_check(i_a, b)?; + self.occur_check(a, b)?; match &*ty_a { TypeEnum::TVar { .. } => { // TODO: type variables bound check... @@ -356,30 +395,26 @@ impl Unifier { } fn set_a_to_b(&self, a: Type, b: Type) { - // unify a and b together, and set the value to b's value this would - // also deallocate a's previous value in the arena to save space... + // unify a and b together, and set the value to b's value. let mut table = self.unification_table.borrow_mut(); - let i_a = table.probe_value(a); - let i_b = table.probe_value(b); + let ty_b = table.probe_value(b); table.union(a, b); - table.union_value(a, i_b); - self.type_arena.borrow_mut().remove(i_a.0); + table.union_value(a, ty_b); } fn report_kind_error(&self, a: &TypeEnum, b: &TypeEnum) -> Result<(), String> { Err(format!( "Cannot unify {} with {}", - a.get_kind_name(), - b.get_kind_name() + a.get_type_name(), + b.get_type_name() )) } - fn occur_check(&self, a: TypeIndex, b: Type) -> Result<(), String> { - let i_b = self.unification_table.borrow_mut().probe_value(b); - if a == i_b { + fn occur_check(&self, a: Type, b: Type) -> Result<(), String> { + if self.unification_table.borrow_mut().unioned(a, b) { return Err("Recursive type is prohibited.".to_owned()); } - let ty = self.type_arena.borrow().get(i_b.0).unwrap().clone(); + let ty = self.unification_table.borrow_mut().probe_value(b); let ty = ty.borrow(); match &*ty { @@ -433,11 +468,7 @@ impl Unifier { } pub fn subst(&self, a: Type, mapping: &VarMap) -> Option { - let index = self.unification_table.borrow_mut().probe_value(a); - let ty_cell = { - let arena = self.type_arena.borrow(); - arena.get(index.0).unwrap().clone() - }; + let ty_cell = self.unification_table.borrow_mut().probe_value(a); let ty = ty_cell.borrow(); // this function would only be called when we instantiate functions. // function type signature should ONLY contain concrete types and type @@ -445,15 +476,9 @@ impl Unifier { // should be safe to not implement the substitution for those variants. match &*ty { TypeEnum::TVar { id } => mapping.get(&id).cloned(), - TypeEnum::TSeq { map } => self.subst_map(map, mapping).map(|m| { - let index = self - .type_arena - .borrow_mut() - .insert(Rc::new(TypeEnum::TSeq { map: m }.into())); - self.unification_table - .borrow_mut() - .new_key(TypeIndex(index)) - }), + TypeEnum::TSeq { map } => self + .subst_map(map, mapping) + .map(|m| self.add_ty(TypeEnum::TSeq { map: m })), TypeEnum::TTuple { ty } => { let mut new_ty = None; for (i, t) in ty.iter().enumerate() { @@ -482,9 +507,7 @@ impl Unifier { // parameter list, we don't need to substitute the fields. // This is also used to prevent infinite substitution... let need_subst = params.values().any(|v| { - let index = self.unification_table.borrow_mut().probe_value(*v); - let arena = self.type_arena.borrow(); - let ty_cell = arena.get(index.0).unwrap(); + let ty_cell = self.unification_table.borrow_mut().probe_value(*v); let ty = ty_cell.borrow(); if let TypeEnum::TVar { id } = &*ty { mapping.contains_key(id) @@ -558,21 +581,12 @@ impl Unifier { if a == b { return true; } - let (i_a, i_b) = { - let mut table = self.unification_table.borrow_mut(); - (table.probe_value(a), table.probe_value(b)) - }; - - if i_a == i_b { - return true; - } - let (ty_a, ty_b) = { - let arena = self.type_arena.borrow(); - ( - arena.get(i_a.0).unwrap().clone(), - arena.get(i_b.0).unwrap().clone(), - ) + let mut table = self.unification_table.borrow_mut(); + if table.unioned(a, b) { + return true; + } + (table.probe_value(a), table.probe_value(b)) }; let ty_a = ty_a.borrow(); From 8b078dfa1b9fc3fc8bfacd605164327ae5682444 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Fri, 16 Jul 2021 13:59:08 +0800 Subject: [PATCH 017/131] naming --- nac3core/src/typecheck/test_typedef.rs | 2 +- nac3core/src/typecheck/typedef.rs | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/nac3core/src/typecheck/test_typedef.rs b/nac3core/src/typecheck/test_typedef.rs index f411f689..a8f5e68f 100644 --- a/nac3core/src/typecheck/test_typedef.rs +++ b/nac3core/src/typecheck/test_typedef.rs @@ -216,7 +216,7 @@ mod test { ("v2", "List[int]"), ], (("v1", "v2"), "Cannot unify TTuple with TList") - ; "kind mismatch" + ; "type mismatch" )] #[test_case(2, &[ diff --git a/nac3core/src/typecheck/typedef.rs b/nac3core/src/typecheck/typedef.rs index 0338a36a..d105e947 100644 --- a/nac3core/src/typecheck/typedef.rs +++ b/nac3core/src/typecheck/typedef.rs @@ -164,7 +164,7 @@ impl TypeEnum { } // e.g. List <: Var - pub fn kind_le(&self, other: &TypeEnum) -> bool { + pub fn type_le(&self, other: &TypeEnum) -> bool { let a = self.get_int(); let b = other.get_int(); (a % b) == 0 @@ -236,7 +236,7 @@ impl Unifier { let (ty_a, ty_b) = { // simplify our pattern matching... - if ty_a_cell.borrow().kind_le(&ty_b_cell.borrow()) { + if ty_a_cell.borrow().type_le(&ty_b_cell.borrow()) { swap(&mut a, &mut b); swap(&mut ty_a_cell, &mut ty_b_cell); } @@ -289,7 +289,7 @@ impl Unifier { self.set_a_to_b(a, b); } _ => { - return self.report_kind_error(&*ty_a, &*ty_b); + return self.incompatible_types(&*ty_a, &*ty_b); } } } @@ -307,7 +307,7 @@ impl Unifier { } self.set_a_to_b(a, b); } else { - return self.report_kind_error(&*ty_a, &*ty_b); + return self.incompatible_types(&*ty_a, &*ty_b); } } TypeEnum::TList { ty: ty1 } => { @@ -315,7 +315,7 @@ impl Unifier { self.unify(*ty1, ty2)?; self.set_a_to_b(a, b); } else { - return self.report_kind_error(&*ty_a, &*ty_b); + return self.incompatible_types(&*ty_a, &*ty_b); } } TypeEnum::TRecord { fields: fields1 } => { @@ -355,7 +355,7 @@ impl Unifier { self.set_a_to_b(a, b); } _ => { - return self.report_kind_error(&*ty_a, &*ty_b); + return self.incompatible_types(&*ty_a, &*ty_b); } } } @@ -378,7 +378,7 @@ impl Unifier { } self.set_a_to_b(a, b); } else { - return self.report_kind_error(&*ty_a, &*ty_b); + return self.incompatible_types(&*ty_a, &*ty_b); } } TypeEnum::TVirtual { ty: ty1 } => { @@ -386,7 +386,7 @@ impl Unifier { self.unify(*ty1, *ty2)?; self.set_a_to_b(a, b); } else { - return self.report_kind_error(&*ty_a, &*ty_b); + return self.incompatible_types(&*ty_a, &*ty_b); } } _ => unimplemented!(), @@ -402,7 +402,7 @@ impl Unifier { table.union_value(a, ty_b); } - fn report_kind_error(&self, a: &TypeEnum, b: &TypeEnum) -> Result<(), String> { + fn incompatible_types(&self, a: &TypeEnum, b: &TypeEnum) -> Result<(), String> { Err(format!( "Cannot unify {} with {}", a.get_type_name(), From f4121b570dcce12adc1ef8af609c97ef9b504ac2 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Fri, 16 Jul 2021 14:34:52 +0800 Subject: [PATCH 018/131] added documentation --- nac3core/src/typecheck/test_typedef.rs | 2 ++ nac3core/src/typecheck/typedef.rs | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/nac3core/src/typecheck/test_typedef.rs b/nac3core/src/typecheck/test_typedef.rs index a8f5e68f..b61bdb99 100644 --- a/nac3core/src/typecheck/test_typedef.rs +++ b/nac3core/src/typecheck/test_typedef.rs @@ -177,6 +177,7 @@ mod test { ] ; "record obj merge" )] + /// Test cases for valid unifications. fn test_unify( variable_count: u32, unify_pairs: &[(&'static str, &'static str)], @@ -249,6 +250,7 @@ mod test { (("v1", "v2"), "Recursive type is prohibited.") ; "recursive type for lists" )] + /// Test cases for invalid unifications. fn test_invalid_unification( variable_count: u32, unify_pairs: &[(&'static str, &'static str)], diff --git a/nac3core/src/typecheck/typedef.rs b/nac3core/src/typecheck/typedef.rs index d105e947..b7fd33a6 100644 --- a/nac3core/src/typecheck/typedef.rs +++ b/nac3core/src/typecheck/typedef.rs @@ -225,6 +225,7 @@ impl Unifier { table.probe_value(a).0 } + /// Unify two types, i.e. a = b. pub fn unify(&self, mut a: Type, mut b: Type) -> Result<(), String> { let (mut ty_a_cell, mut ty_b_cell) = { let mut table = self.unification_table.borrow_mut(); @@ -467,6 +468,10 @@ impl Unifier { Ok(()) } + /// Substitute type variables within a type into other types. + /// If this returns Some(T), T would be the substituted type. + /// If this returns None, the result type would be the original type + /// (no substitution has to be done). pub fn subst(&self, a: Type, mapping: &VarMap) -> Option { let ty_cell = self.unification_table.borrow_mut().probe_value(a); let ty = ty_cell.borrow(); @@ -577,6 +582,7 @@ impl Unifier { map2 } + /// Check whether two types are equal. pub fn eq(&self, a: Type, b: Type) -> bool { if a == b { return true; From d67407716c041554f534ea0ea79d5a902dac8b14 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Fri, 16 Jul 2021 15:55:52 +0800 Subject: [PATCH 019/131] function unification... --- nac3core/src/typecheck/test_typedef.rs | 28 ++-- nac3core/src/typecheck/typedef.rs | 205 ++++++++++++++++++++----- 2 files changed, 177 insertions(+), 56 deletions(-) diff --git a/nac3core/src/typecheck/test_typedef.rs b/nac3core/src/typecheck/test_typedef.rs index b61bdb99..3bce23a0 100644 --- a/nac3core/src/typecheck/test_typedef.rs +++ b/nac3core/src/typecheck/test_typedef.rs @@ -8,14 +8,12 @@ mod test { struct TestEnvironment { pub unifier: Unifier, type_mapping: HashMap, - var_max_id: u32, } impl TestEnvironment { fn new() -> TestEnvironment { - let unifier = Unifier::new(); + let mut unifier = Unifier::new(); let mut type_mapping = HashMap::new(); - let mut var_max_id = 0; type_mapping.insert( "int".into(), @@ -41,38 +39,30 @@ mod test { params: HashMap::new(), }), ); - let v0 = unifier.add_ty(TypeEnum::TVar { id: 0 }); - var_max_id += 1; + let (v0, id) = unifier.get_fresh_var(); type_mapping.insert( "Foo".into(), unifier.add_ty(TypeEnum::TObj { obj_id: 3, fields: [("a".into(), v0)].iter().cloned().collect(), - params: [(0u32, v0)].iter().cloned().collect(), + params: [(id, v0)].iter().cloned().collect(), }), ); TestEnvironment { unifier, type_mapping, - var_max_id, } } - fn get_fresh_var(&mut self) -> Type { - let id = self.var_max_id + 1; - self.var_max_id += 1; - self.unifier.add_ty(TypeEnum::TVar { id }) - } - - fn parse(&self, typ: &str, mapping: &Mapping) -> Type { + fn parse(&mut self, typ: &str, mapping: &Mapping) -> Type { let result = self.internal_parse(typ, mapping); assert!(result.1.is_empty()); result.0 } fn internal_parse<'a, 'b>( - &'a self, + &'a mut self, typ: &'b str, mapping: &Mapping, ) -> (Type, &'b str) { @@ -189,8 +179,8 @@ mod test { let mut env = TestEnvironment::new(); let mut mapping = HashMap::new(); for i in 1..=variable_count { - let v = env.get_fresh_var(); - mapping.insert(format!("v{}", i), v); + let v = env.unifier.get_fresh_var(); + mapping.insert(format!("v{}", i), v.0); } // unification may have side effect when we do type resolution, so freeze the types // before doing unification. @@ -259,8 +249,8 @@ mod test { let mut env = TestEnvironment::new(); let mut mapping = HashMap::new(); for i in 1..=variable_count { - let v = env.get_fresh_var(); - mapping.insert(format!("v{}", i), v); + let v = env.unifier.get_fresh_var(); + mapping.insert(format!("v{}", i), v.0); } // unification may have side effect when we do type resolution, so freeze the types // before doing unification. diff --git a/nac3core/src/typecheck/typedef.rs b/nac3core/src/typecheck/typedef.rs index b7fd33a6..6ff93879 100644 --- a/nac3core/src/typecheck/typedef.rs +++ b/nac3core/src/typecheck/typedef.rs @@ -48,7 +48,7 @@ impl Deref for TypeCell { } pub type Mapping = HashMap; -pub type VarMap = Mapping; +type VarMap = Mapping; #[derive(Clone)] pub struct Call { @@ -65,6 +65,13 @@ pub struct FuncArg { is_optional: bool, } +#[derive(Clone)] +pub struct FunSignature { + args: Vec, + ret: Type, + params: VarMap, +} + // We use a lot of `Rc`/`RefCell`s here as we want to simplify our code. // We may not really need so much `Rc`s, but we would have to do complicated // stuffs otherwise. @@ -96,11 +103,7 @@ pub enum TypeEnum { TCall { calls: Vec>, }, - TFunc { - args: Vec, - ret: Type, - params: VarMap, - }, + TFunc(FunSignature), } // Order: @@ -199,40 +202,41 @@ pub struct ObjDef { } pub struct Unifier { - unification_table: RefCell>, + unification_table: InPlaceUnificationTable, obj_def_table: Vec, + var_id: u32, } impl Unifier { pub fn new() -> Unifier { Unifier { - unification_table: RefCell::new(InPlaceUnificationTable::new()), + unification_table: InPlaceUnificationTable::new(), obj_def_table: Vec::new(), + var_id: 0, } } /// Register a type to the unifier. /// Returns a key in the unification_table. - pub fn add_ty(&self, a: TypeEnum) -> Type { - self.unification_table - .borrow_mut() - .new_key(TypeCell(Rc::new(a.into()))) + pub fn add_ty(&mut self, a: TypeEnum) -> Type { + self.unification_table.new_key(TypeCell(Rc::new(a.into()))) } /// Get the TypeEnum of a type. - pub fn get_ty(&self, a: Type) -> Rc> { - let mut table = self.unification_table.borrow_mut(); - table.probe_value(a).0 + pub fn get_ty(&mut self, a: Type) -> Rc> { + self.unification_table.probe_value(a).0 } /// Unify two types, i.e. a = b. - pub fn unify(&self, mut a: Type, mut b: Type) -> Result<(), String> { + pub fn unify(&mut self, mut a: Type, mut b: Type) -> Result<(), String> { let (mut ty_a_cell, mut ty_b_cell) = { - let mut table = self.unification_table.borrow_mut(); - if table.unioned(a, b) { + if self.unification_table.unioned(a, b) { return Ok(()); } - (table.probe_value(a), table.probe_value(b)) + ( + self.unification_table.probe_value(a), + self.unification_table.probe_value(b), + ) }; let (ty_a, ty_b) = { @@ -353,7 +357,6 @@ impl Unifier { TypeEnum::TVirtual { ty } => { // not sure if this is correct... self.unify(a, *ty)?; - self.set_a_to_b(a, b); } _ => { return self.incompatible_types(&*ty_a, &*ty_b); @@ -390,14 +393,105 @@ impl Unifier { return self.incompatible_types(&*ty_a, &*ty_b); } } - _ => unimplemented!(), + TypeEnum::TCall { calls: c1 } => match &*ty_b { + TypeEnum::TCall { .. } => { + drop(ty_b); + if let TypeEnum::TCall { calls: c2 } = &mut *ty_b_cell.as_ref().borrow_mut() { + c2.extend(c1.iter().cloned()); + } else { + unreachable!() + } + self.set_a_to_b(a, b); + } + TypeEnum::TFunc(signature) => { + let required: Vec = signature + .args + .iter() + .filter(|v| !v.is_optional) + .map(|v| v.name.clone()) + .rev() + .collect(); + for c in c1 { + let Call { + posargs, + kwargs, + ret, + fun, + } = c.as_ref(); + let instantiated = self.instantiate_fun(b, signature); + let signature; + let r = self.get_ty(instantiated); + let r = r.as_ref().borrow(); + if let TypeEnum::TFunc(s) = &*r { + signature = s; + } else { + unreachable!(); + } + let mut required = required.clone(); + let mut all_names: Vec<_> = signature + .args + .iter() + .map(|v| (v.name.clone(), v.ty)) + .rev() + .collect(); + for (i, t) in posargs.iter().enumerate() { + if signature.args.len() <= i { + return Err(format!("Too many arguments.")); + } + if !required.is_empty() { + required.pop(); + } + self.unify(all_names.pop().unwrap().1, *t)?; + } + for (k, t) in kwargs.iter() { + if let Some(i) = required.iter().position(|v| v == k) { + required.remove(i); + } + if let Some(i) = all_names.iter().position(|v| &v.0 == k) { + self.unify(all_names.remove(i).1, *t)?; + } else { + return Err(format!("Unknown keyword argument {}", k)); + } + } + self.unify(*ret, signature.ret)?; + *fun.borrow_mut() = Some(instantiated); + } + self.set_a_to_b(a, b); + } + _ => { + return self.incompatible_types(&*ty_a, &*ty_b); + } + }, + TypeEnum::TFunc(sign1) => { + if let TypeEnum::TFunc(sign2) = &*ty_b { + if !sign1.params.is_empty() || !sign2.params.is_empty() { + return Err(format!("Polymorphic function pointer is prohibited.")); + } + if sign1.args.len() != sign2.args.len() { + return Err(format!("Functions differ in number of parameters.")); + } + for (x, y) in sign1.args.iter().zip(sign2.args.iter()) { + if x.name != y.name { + return Err(format!("Functions differ in parameter names.")); + } + if x.is_optional != y.is_optional { + return Err(format!("Functions differ in optional parameters.")); + } + self.unify(x.ty, y.ty)?; + } + self.unify(sign1.ret, sign2.ret)?; + self.set_a_to_b(a, b); + } else { + return self.incompatible_types(&*ty_a, &*ty_b); + } + } } Ok(()) } - fn set_a_to_b(&self, a: Type, b: Type) { + fn set_a_to_b(&mut self, a: Type, b: Type) { // unify a and b together, and set the value to b's value. - let mut table = self.unification_table.borrow_mut(); + let table = &mut self.unification_table; let ty_b = table.probe_value(b); table.union(a, b); table.union_value(a, ty_b); @@ -411,11 +505,11 @@ impl Unifier { )) } - fn occur_check(&self, a: Type, b: Type) -> Result<(), String> { - if self.unification_table.borrow_mut().unioned(a, b) { + fn occur_check(&mut self, a: Type, b: Type) -> Result<(), String> { + if self.unification_table.unioned(a, b) { return Err("Recursive type is prohibited.".to_owned()); } - let ty = self.unification_table.borrow_mut().probe_value(b); + let ty = self.unification_table.probe_value(b); let ty = ty.borrow(); match &*ty { @@ -454,7 +548,7 @@ impl Unifier { self.occur_check(a, *t)?; } } - TypeEnum::TFunc { args, ret, params } => { + TypeEnum::TFunc(FunSignature { args, ret, params }) => { for t in args .iter() .map(|v| &v.ty) @@ -472,8 +566,8 @@ impl Unifier { /// If this returns Some(T), T would be the substituted type. /// If this returns None, the result type would be the original type /// (no substitution has to be done). - pub fn subst(&self, a: Type, mapping: &VarMap) -> Option { - let ty_cell = self.unification_table.borrow_mut().probe_value(a); + pub fn subst(&mut self, a: Type, mapping: &VarMap) -> Option { + let ty_cell = self.unification_table.probe_value(a); let ty = ty_cell.borrow(); // this function would only be called when we instantiate functions. // function type signature should ONLY contain concrete types and type @@ -512,10 +606,10 @@ impl Unifier { // parameter list, we don't need to substitute the fields. // This is also used to prevent infinite substitution... let need_subst = params.values().any(|v| { - let ty_cell = self.unification_table.borrow_mut().probe_value(*v); + let ty_cell = self.unification_table.probe_value(*v); let ty = ty_cell.borrow(); if let TypeEnum::TVar { id } = &*ty { - mapping.contains_key(id) + mapping.contains_key(&id) } else { false } @@ -537,7 +631,7 @@ impl Unifier { None } } - TypeEnum::TFunc { args, ret, params } => { + TypeEnum::TFunc(FunSignature { args, ret, params }) => { let new_params = self.subst_map(params, mapping); let new_ret = self.subst(*ret, mapping); let mut new_args = None; @@ -557,7 +651,7 @@ impl Unifier { let params = new_params.unwrap_or_else(|| params.clone()); let ret = new_ret.unwrap_or_else(|| *ret); let args = new_args.unwrap_or_else(|| args.clone()); - Some(self.add_ty(TypeEnum::TFunc { params, ret, args })) + Some(self.add_ty(TypeEnum::TFunc(FunSignature { params, ret, args }))) } else { None } @@ -566,7 +660,7 @@ impl Unifier { } } - fn subst_map(&self, map: &Mapping, mapping: &VarMap) -> Option> + fn subst_map(&mut self, map: &Mapping, mapping: &VarMap) -> Option> where K: std::hash::Hash + std::cmp::Eq + std::clone::Clone, { @@ -582,13 +676,43 @@ impl Unifier { map2 } + /// Instantiate a function if it hasn't been instntiated. + /// Returns Some(T) where T is the instantiated type. + /// Returns None if the function is already instantiated. + fn instantiate_fun(&mut self, ty: Type, fun: &FunSignature) -> Type { + let mut instantiated = false; + for (k, v) in fun.params.iter() { + if let TypeEnum::TVar { id } = + &*self.unification_table.probe_value(*v).as_ref().borrow() + { + if k != id { + instantiated = true; + break; + } + } else { + instantiated = true; + break; + } + } + if instantiated { + ty + } else { + let mapping = fun + .params + .iter() + .map(|(k, _)| (*k, self.get_fresh_var().0)) + .collect(); + self.subst(ty, &mapping).unwrap_or(ty) + } + } + /// Check whether two types are equal. - pub fn eq(&self, a: Type, b: Type) -> bool { + pub fn eq(&mut self, a: Type, b: Type) -> bool { if a == b { return true; } let (ty_a, ty_b) = { - let mut table = self.unification_table.borrow_mut(); + let table = &mut self.unification_table; if table.unioned(a, b) { return true; } @@ -629,7 +753,7 @@ impl Unifier { } } - fn map_eq(&self, map1: &Mapping, map2: &Mapping) -> bool + fn map_eq(&mut self, map1: &Mapping, map2: &Mapping) -> bool where K: std::hash::Hash + std::cmp::Eq + std::clone::Clone, { @@ -643,4 +767,11 @@ impl Unifier { } true } + + /// Get a fresh type variable. + pub fn get_fresh_var(&mut self) -> (Type, u32) { + let id = self.var_id + 1; + self.var_id += 1; + (self.add_ty(TypeEnum::TVar { id }), id) + } } From f51603f6dac8172163866b754b455453c658038c Mon Sep 17 00:00:00 2001 From: pca006132 Date: Mon, 19 Jul 2021 09:52:25 +0800 Subject: [PATCH 020/131] cleanup --- nac3core/src/typecheck/context.rs | 191 --------------------------- nac3core/src/typecheck/mod.rs | 8 +- nac3core/src/typecheck/primitives.rs | 168 ----------------------- nac3core/src/typecheck/typedef.rs | 12 +- 4 files changed, 9 insertions(+), 370 deletions(-) delete mode 100644 nac3core/src/typecheck/context.rs delete mode 100644 nac3core/src/typecheck/primitives.rs diff --git a/nac3core/src/typecheck/context.rs b/nac3core/src/typecheck/context.rs deleted file mode 100644 index 4c23c0d7..00000000 --- a/nac3core/src/typecheck/context.rs +++ /dev/null @@ -1,191 +0,0 @@ -use std::collections::HashMap; -use std::collections::HashSet; - -use super::primitives::get_var; -use super::symbol_resolver::*; -use super::typedef::*; -use rustpython_parser::ast::Location; - -/// Structure for storing top-level type definitions. -/// Used for collecting type signature from source code. -/// Can be converted to `InferenceContext` for type inference in functions. -#[derive(Clone)] -pub struct GlobalContext<'a> { - /// List of type definitions. - pub type_defs: Vec>, - /// List of type variable definitions. - pub var_defs: Vec>, -} - -impl<'a> GlobalContext<'a> { - pub fn new(type_defs: Vec>) -> GlobalContext { - GlobalContext { - type_defs, - var_defs: Vec::new(), - } - } - - pub fn add_type(&mut self, def: TypeDef<'a>) -> TypeId { - self.type_defs.push(def); - TypeId(self.type_defs.len() - 1) - } - - pub fn add_variable(&mut self, def: VarDef<'a>) -> VariableId { - self.var_defs.push(def); - VariableId(self.var_defs.len() - 1) - } - - pub fn get_type_def_mut(&mut self, id: TypeId) -> &mut TypeDef<'a> { - self.type_defs.get_mut(id.0).unwrap() - } - - pub fn get_type_def(&self, id: TypeId) -> &TypeDef { - self.type_defs.get(id.0).unwrap() - } - - pub fn get_var_def(&self, id: VariableId) -> &VarDef { - self.var_defs.get(id.0).unwrap() - } - - pub fn get_var_count(&self) -> usize { - self.var_defs.len() - } -} - -pub struct InferenceContext<'a> { - // a: (i, x) means that a.i = x - pub fields_assignment: HashMap>, - pub constraints: Vec<(Type, Type)>, - global: GlobalContext<'a>, - resolver: Box, - local_identifiers: HashMap<&'a str, Type>, - local_variables: Vec>, - fresh_var_id: usize, -} - -impl<'a> InferenceContext<'a> { - pub fn new( - global: GlobalContext<'a>, - resolver: Box, - ) -> InferenceContext<'a> { - let id = global.get_var_count(); - InferenceContext { - global, - fields_assignment: HashMap::new(), - constraints: Vec::new(), - resolver, - local_identifiers: HashMap::new(), - local_variables: Vec::new(), - fresh_var_id: id, - } - } - - fn get_fresh_var(&mut self) -> VariableId { - self.local_variables.push(VarDef { - name: None, - bound: Vec::new(), - }); - let id = self.fresh_var_id; - self.fresh_var_id += 1; - VariableId(id) - } - - fn get_fresh_var_with_bound(&mut self, bound: Vec) -> VariableId { - self.local_variables.push(VarDef { name: None, bound }); - let id = self.fresh_var_id; - self.fresh_var_id += 1; - VariableId(id) - } - - pub fn assign_identifier(&mut self, identifier: &'a str) -> Type { - if let Some(t) = self.local_identifiers.get(identifier) { - t.clone() - } else if let Some(SymbolType::Identifier(t)) = self.resolver.get_symbol_type(identifier) { - t - } else { - get_var(self.get_fresh_var()) - } - } - - pub fn get_identifier_type(&self, identifier: &'a str) -> Result { - if let Some(t) = self.local_identifiers.get(identifier) { - Ok(t.clone()) - } else if let Some(SymbolType::Identifier(t)) = self.resolver.get_symbol_type(identifier) { - Ok(t) - } else { - Err("unbounded identifier".into()) - } - } - - pub fn get_attribute_type( - &mut self, - expr: Type, - identifier: &'a str, - location: Location, - ) -> Result { - match expr.as_ref() { - TypeEnum::TypeVariable(id) => { - if !self.fields_assignment.contains_key(id) { - self.fields_assignment.insert(*id, Vec::new()); - } - let var_id = VariableId(self.fresh_var_id); - let entry = self.fields_assignment.get_mut(&id).unwrap(); - for (attr, t, _) in entry.iter() { - if *attr == identifier { - return Ok(get_var(*t)); - } - } - entry.push((identifier, var_id, location)); - self.local_variables.push(VarDef { - name: None, - bound: Vec::new(), - }); - self.fresh_var_id += 1; - Ok(get_var(var_id)) - } - TypeEnum::ClassType(id, params) => { - let type_def = self.global.get_type_def(*id); - let field = type_def - .base - .fields - .get(identifier) - .map_or_else(|| Err("no such field".to_owned()), Ok)?; - // function and tuple can have 0 type variables but with type parameters - // we require other types have the same number of type variables and type - // parameters in order to build a mapping - assert!(type_def.params.is_empty() || type_def.params.len() == params.len()); - let map = type_def - .params - .clone() - .into_iter() - .zip(params.clone().into_iter()) - .collect(); - let field = field.subst(&map); - Ok(self.get_instance(field)) - } - } - } - - fn get_instance(&mut self, t: Type) -> Type { - let mut vars = HashSet::new(); - t.get_vars(&mut vars); - - let local_min = self.global.get_var_count(); - let bounded = vars.into_iter().filter(|id| id.0 < local_min); - let map = bounded - .map(|v| { - ( - v, - get_var( - self.get_fresh_var_with_bound(self.global.get_var_def(v).bound.clone()), - ), - ) - }) - .collect(); - t.subst(&map) - } - - pub fn get_type_def(&self, id: TypeId) -> &TypeDef { - self.global.get_type_def(id) - } -} diff --git a/nac3core/src/typecheck/mod.rs b/nac3core/src/typecheck/mod.rs index 7b30426e..bb470cd0 100644 --- a/nac3core/src/typecheck/mod.rs +++ b/nac3core/src/typecheck/mod.rs @@ -1,8 +1,6 @@ #![allow(dead_code)] -// mod context; -// pub mod location; -// mod magic_methods; -// mod primitives; -// pub mod symbol_resolver; +pub mod location; +mod magic_methods; +pub mod symbol_resolver; mod test_typedef; pub mod typedef; diff --git a/nac3core/src/typecheck/primitives.rs b/nac3core/src/typecheck/primitives.rs deleted file mode 100644 index c383e955..00000000 --- a/nac3core/src/typecheck/primitives.rs +++ /dev/null @@ -1,168 +0,0 @@ -use super::context::*; -use super::typedef::{TypeEnum::*, *}; -use std::collections::HashMap; -use std::rc::Rc; - -pub const FUNC_TYPE: TypeId = TypeId(0); -pub const TUPLE_TYPE: TypeId = TypeId(1); -pub const LIST_TYPE: TypeId = TypeId(2); -pub const VIRTUAL_TYPE: TypeId = TypeId(3); -pub const NONE_TYPE: TypeId = TypeId(4); - -pub const BOOL_TYPE: TypeId = TypeId(5); -pub const INT32_TYPE: TypeId = TypeId(6); -pub const INT64_TYPE: TypeId = TypeId(7); -pub const FLOAT_TYPE: TypeId = TypeId(8); - -fn primitive(base: BaseDef) -> TypeDef { - TypeDef { - base, - parents: vec![], - params: vec![], - } -} - -pub fn get_fn(from: Type, to: Type) -> Type { - Rc::new(ClassType(FUNC_TYPE, vec![from, to])) -} - -pub fn get_tuple(types: &[Type]) -> Type { - Rc::new(ClassType(TUPLE_TYPE, types.to_vec())) -} - -pub fn get_list(t: Type) -> Type { - Rc::new(ClassType(LIST_TYPE, vec![t])) -} - -pub fn get_virtual(t: Type) -> Type { - Rc::new(ClassType(VIRTUAL_TYPE, vec![t])) -} - -pub fn get_none() -> Type { - Rc::new(ClassType(NONE_TYPE, Vec::new())) -} - -pub fn get_bool() -> Type { - Rc::new(ClassType(BOOL_TYPE, Vec::new())) -} -pub fn get_int32() -> Type { - Rc::new(ClassType(INT32_TYPE, Vec::new())) -} - -pub fn get_int64() -> Type { - Rc::new(ClassType(INT64_TYPE, Vec::new())) -} - -pub fn get_float() -> Type { - Rc::new(ClassType(FLOAT_TYPE, Vec::new())) -} - -pub fn get_var(id: VariableId) -> Type { - Rc::new(TypeVariable(id)) -} - -fn impl_math(def: &mut BaseDef, ty: &Type) { - let fun = get_fn(ty.clone(), ty.clone()); - def.fields.insert("__add__", fun.clone()); - def.fields.insert("__sub__", fun.clone()); - def.fields.insert("__mul__", fun.clone()); - def.fields.insert("__neg__", get_fn(get_none(), ty.clone())); - def.fields - .insert("__truediv__", get_fn(ty.clone(), get_float())); - def.fields.insert("__floordiv__", fun.clone()); - def.fields.insert("__mod__", fun.clone()); - def.fields.insert("__pow__", fun); -} - -fn impl_bits(def: &mut BaseDef, ty: &Type) { - let fun = get_fn(get_int32(), ty.clone()); - - def.fields.insert("__lshift__", fun.clone()); - def.fields.insert("__rshift__", fun); - def.fields.insert("__xor__", get_fn(ty.clone(), ty.clone())); -} - -fn impl_eq(def: &mut BaseDef, ty: &Type) { - let fun = get_fn(ty.clone(), get_bool()); - - def.fields.insert("__eq__", fun.clone()); - def.fields.insert("__ne__", fun); -} - -fn impl_order(def: &mut BaseDef, ty: &Type) { - let fun = get_fn(ty.clone(), get_bool()); - - def.fields.insert("__lt__", fun.clone()); - def.fields.insert("__gt__", fun.clone()); - def.fields.insert("__le__", fun.clone()); - def.fields.insert("__ge__", fun); -} - -pub fn basic_ctx() -> GlobalContext<'static> { - let mut ctx = GlobalContext::new(vec![ - primitive(BaseDef { - name: "function", - fields: HashMap::new(), - }), - primitive(BaseDef { - name: "tuple", - fields: HashMap::new(), - }), - primitive(BaseDef { - name: "list", - fields: HashMap::new(), - }), - primitive(BaseDef { - name: "virtual", - fields: HashMap::new(), - }), - primitive(BaseDef { - name: "None", - fields: HashMap::new(), - }), - primitive(BaseDef { - name: "bool", - fields: HashMap::new(), - }), - primitive(BaseDef { - name: "int32", - fields: HashMap::new(), - }), - primitive(BaseDef { - name: "int64", - fields: HashMap::new(), - }), - primitive(BaseDef { - name: "float", - fields: HashMap::new(), - }), - ]); - - let t = ctx.add_variable(VarDef { - name: Some("T"), - bound: vec![], - }); - ctx.get_type_def_mut(LIST_TYPE).params.push(t); - - let b_def = ctx.get_type_def_mut(BOOL_TYPE); - impl_eq(&mut b_def.base, &get_bool()); - let int32 = get_int32(); - let int32_def = &mut ctx.get_type_def_mut(INT32_TYPE).base; - impl_math(int32_def, &int32); - impl_bits(int32_def, &int32); - impl_order(int32_def, &int32); - impl_eq(int32_def, &int32); - let int64 = get_int64(); - let int64_def = &mut ctx.get_type_def_mut(INT64_TYPE).base; - impl_math(int64_def, &int64); - impl_bits(int64_def, &int64); - impl_order(int64_def, &int64); - impl_eq(int64_def, &int64); - let float = get_float(); - let float_def = &mut ctx.get_type_def_mut(FLOAT_TYPE).base; - impl_math(float_def, &float); - impl_order(float_def, &float); - impl_eq(float_def, &float); - - ctx -} diff --git a/nac3core/src/typecheck/typedef.rs b/nac3core/src/typecheck/typedef.rs index 6ff93879..e5680feb 100644 --- a/nac3core/src/typecheck/typedef.rs +++ b/nac3core/src/typecheck/typedef.rs @@ -436,7 +436,7 @@ impl Unifier { .collect(); for (i, t) in posargs.iter().enumerate() { if signature.args.len() <= i { - return Err(format!("Too many arguments.")); + return Err("Too many arguments.".to_string()); } if !required.is_empty() { required.pop(); @@ -465,17 +465,17 @@ impl Unifier { TypeEnum::TFunc(sign1) => { if let TypeEnum::TFunc(sign2) = &*ty_b { if !sign1.params.is_empty() || !sign2.params.is_empty() { - return Err(format!("Polymorphic function pointer is prohibited.")); + return Err("Polymorphic function pointer is prohibited.".to_string()); } if sign1.args.len() != sign2.args.len() { - return Err(format!("Functions differ in number of parameters.")); + return Err("Functions differ in number of parameters.".to_string()); } for (x, y) in sign1.args.iter().zip(sign2.args.iter()) { if x.name != y.name { - return Err(format!("Functions differ in parameter names.")); + return Err("Functions differ in parameter names.".to_string()); } if x.is_optional != y.is_optional { - return Err(format!("Functions differ in optional parameters.")); + return Err("Functions differ in optional parameters.".to_string()); } self.unify(x.ty, y.ty)?; } @@ -651,7 +651,7 @@ impl Unifier { let params = new_params.unwrap_or_else(|| params.clone()); let ret = new_ret.unwrap_or_else(|| *ret); let args = new_args.unwrap_or_else(|| args.clone()); - Some(self.add_ty(TypeEnum::TFunc(FunSignature { params, ret, args }))) + Some(self.add_ty(TypeEnum::TFunc(FunSignature { args, ret, params }))) } else { None } From c913fb28bdf4ed13be4276288081454e10627349 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Mon, 19 Jul 2021 13:34:45 +0800 Subject: [PATCH 021/131] use signed integer for TSeq --- nac3core/src/typecheck/typedef.rs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/nac3core/src/typecheck/typedef.rs b/nac3core/src/typecheck/typedef.rs index e5680feb..9c20f918 100644 --- a/nac3core/src/typecheck/typedef.rs +++ b/nac3core/src/typecheck/typedef.rs @@ -81,7 +81,7 @@ pub enum TypeEnum { id: u32, }, TSeq { - map: VarMap, + map: Mapping, }, TTuple { ty: Vec, @@ -274,16 +274,18 @@ impl Unifier { self.set_a_to_b(a, b); } TypeEnum::TTuple { ty: types } => { - let len = types.len() as u32; + let len = types.len() as i32; for (k, v) in map1.iter() { - if *k >= len { + // handle negative index + let ind = if *k < 0 { len + *k } else { *k }; + if ind >= len || ind < 0 { return Err(format!( "Tuple index out of range. (Length: {}, Index: {})", types.len(), k )); } - self.unify(*v, types[*k as usize])?; + self.unify(*v, types[ind as usize])?; } self.set_a_to_b(a, b); } @@ -516,7 +518,7 @@ impl Unifier { TypeEnum::TVar { .. } => { // TODO: occur check for bounds... } - TypeEnum::TSeq { map } | TypeEnum::TObj { params: map, .. } => { + TypeEnum::TSeq { map } => { for t in map.values() { self.occur_check(a, *t)?; } @@ -534,6 +536,11 @@ impl Unifier { self.occur_check(a, *t)?; } } + TypeEnum::TObj { params: map, .. } => { + for t in map.values() { + self.occur_check(a, *t)?; + } + } TypeEnum::TCall { calls } => { for t in calls .iter() From d4b85d0bac939a0c55d519876b8f0ba99eba3332 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Mon, 19 Jul 2021 13:35:01 +0800 Subject: [PATCH 022/131] expression type inference (WIP) --- nac3core/Cargo.toml | 2 +- nac3core/src/lib.rs | 1 + nac3core/src/typecheck/mod.rs | 1 + nac3core/src/typecheck/symbol_resolver.rs | 6 +- nac3core/src/typecheck/type_inferencer.rs | 227 ++++++++++++++++++++++ nac3core/src/typecheck/typedef.rs | 20 +- 6 files changed, 243 insertions(+), 14 deletions(-) create mode 100644 nac3core/src/typecheck/type_inferencer.rs diff --git a/nac3core/Cargo.toml b/nac3core/Cargo.toml index 6b15049d..50ba54ed 100644 --- a/nac3core/Cargo.toml +++ b/nac3core/Cargo.toml @@ -11,8 +11,8 @@ inkwell = { git = "https://github.com/TheDan64/inkwell", branch = "master", feat rustpython-parser = { git = "https://github.com/RustPython/RustPython", branch = "master" } indoc = "1.0" ena = "0.14" +itertools = "0.10.1" [dev-dependencies] test-case = "1.2.0" -itertools = "0.10.1" diff --git a/nac3core/src/lib.rs b/nac3core/src/lib.rs index fceaaeea..74440eef 100644 --- a/nac3core/src/lib.rs +++ b/nac3core/src/lib.rs @@ -6,6 +6,7 @@ extern crate inkwell; extern crate rustpython_parser; extern crate indoc; extern crate ena; +extern crate itertools; mod typecheck; diff --git a/nac3core/src/typecheck/mod.rs b/nac3core/src/typecheck/mod.rs index bb470cd0..cfab64c1 100644 --- a/nac3core/src/typecheck/mod.rs +++ b/nac3core/src/typecheck/mod.rs @@ -4,3 +4,4 @@ mod magic_methods; pub mod symbol_resolver; mod test_typedef; pub mod typedef; +pub mod type_inferencer; diff --git a/nac3core/src/typecheck/symbol_resolver.rs b/nac3core/src/typecheck/symbol_resolver.rs index a6eff440..accb1aae 100644 --- a/nac3core/src/typecheck/symbol_resolver.rs +++ b/nac3core/src/typecheck/symbol_resolver.rs @@ -16,8 +16,8 @@ pub enum SymbolValue<'a> { } pub trait SymbolResolver { - fn get_symbol_type(&self, str: &str) -> Option; - fn get_symbol_value(&self, str: &str) -> Option; - fn get_symbol_location(&self, str: &str) -> Option; + fn get_symbol_type(&mut self, str: &str) -> Option; + fn get_symbol_value(&mut self, str: &str) -> Option; + fn get_symbol_location(&mut self, str: &str) -> Option; // handle function call etc. } diff --git a/nac3core/src/typecheck/type_inferencer.rs b/nac3core/src/typecheck/type_inferencer.rs new file mode 100644 index 00000000..3ccc3016 --- /dev/null +++ b/nac3core/src/typecheck/type_inferencer.rs @@ -0,0 +1,227 @@ +use std::cell::RefCell; +use std::collections::HashMap; +use std::convert::TryInto; +use std::iter::once; +use std::rc::Rc; + +use super::magic_methods::*; +use super::symbol_resolver::{SymbolResolver, SymbolType}; +use super::typedef::{Call, Type, TypeEnum, Unifier}; +use itertools::izip; +use rustpython_parser::ast::{self, fold::Fold}; + +pub struct PrimitiveStore { + int32: Type, + int64: Type, + float: Type, + bool: Type, + none: Type, +} + +pub struct Inferencer<'a> { + resolver: &'a mut Box, + unifier: &'a mut Unifier, + variable_mapping: &'a mut HashMap, + calls: &'a mut Vec>, + primitives: &'a PrimitiveStore, +} + +impl<'a> Fold<()> for Inferencer<'a> { + type TargetU = Option; + type Error = String; + + fn map_user(&mut self, _: ()) -> Result { + Ok(None) + } +} + +type InferenceResult = Result; + +impl<'a> Inferencer<'a> { + fn build_method_call( + &mut self, + method: String, + obj: Type, + params: Vec, + ret: Type, + ) -> InferenceResult { + let call = Rc::new(Call { + posargs: params, + kwargs: HashMap::new(), + ret, + fun: RefCell::new(None), + }); + self.calls.push(call.clone()); + let call = self.unifier.add_ty(TypeEnum::TCall { calls: vec![call] }); + let fields = once((method, call)).collect(); + let record = self.unifier.add_ty(TypeEnum::TRecord { fields }); + self.unifier.unify(obj, record)?; + Ok(ret) + } + + fn infer_identifier(&mut self, id: &str) -> InferenceResult { + if let Some(ty) = self.variable_mapping.get(id) { + Ok(*ty) + } else { + match self.resolver.get_symbol_type(id) { + Some(SymbolType::TypeName(_)) => { + Err("Expected expression instead of type".to_string()) + } + Some(SymbolType::Identifier(ty)) => Ok(ty), + None => { + let ty = self.unifier.get_fresh_var().0; + self.variable_mapping.insert(id.to_string(), ty); + Ok(ty) + } + } + } + } + + fn infer_constant(&mut self, constant: &ast::Constant) -> InferenceResult { + match constant { + ast::Constant::Bool(_) => Ok(self.primitives.bool), + ast::Constant::Int(val) => { + let int32: Result = val.try_into(); + // int64 would be handled separately in functions + if int32.is_ok() { + Ok(self.primitives.int64) + } else { + Err("Integer out of bound".into()) + } + } + ast::Constant::Float(_) => Ok(self.primitives.float), + ast::Constant::Tuple(vals) => { + let ty: Result, _> = vals.iter().map(|x| self.infer_constant(x)).collect(); + Ok(self.unifier.add_ty(TypeEnum::TTuple { ty: ty? })) + } + _ => Err("not supported".into()), + } + } + + fn infer_list(&mut self, elts: &[ast::Expr>]) -> InferenceResult { + let (ty, _) = self.unifier.get_fresh_var(); + for t in elts.iter() { + self.unifier.unify(ty, t.custom.unwrap())?; + } + Ok(ty) + } + + fn infer_tuple(&mut self, elts: &[ast::Expr>]) -> InferenceResult { + let ty = elts.iter().map(|x| x.custom.unwrap()).collect(); + Ok(self.unifier.add_ty(TypeEnum::TTuple { ty })) + } + + fn infer_attribute(&mut self, value: &ast::Expr>, attr: &str) -> InferenceResult { + let (attr_ty, _) = self.unifier.get_fresh_var(); + let fields = once((attr.to_string(), attr_ty)).collect(); + let parent = self.unifier.add_ty(TypeEnum::TRecord { fields }); + self.unifier.unify(value.custom.unwrap(), parent)?; + Ok(attr_ty) + } + + fn infer_bool_ops(&mut self, values: &[ast::Expr>]) -> InferenceResult { + let b = self.primitives.bool; + for v in values { + self.unifier.unify(v.custom.unwrap(), b)?; + } + Ok(b) + } + + fn infer_bin_ops( + &mut self, + left: &ast::Expr>, + op: &ast::Operator, + right: &ast::Expr>, + ) -> InferenceResult { + let method = binop_name(op); + let ret = self.unifier.get_fresh_var().0; + self.build_method_call( + method.to_string(), + left.custom.unwrap(), + vec![right.custom.unwrap()], + ret, + ) + } + + fn infer_unary_ops( + &mut self, + op: &ast::Unaryop, + operand: &ast::Expr>, + ) -> InferenceResult { + let method = unaryop_name(op); + let ret = self.unifier.get_fresh_var().0; + self.build_method_call(method.to_string(), operand.custom.unwrap(), vec![], ret) + } + + fn infer_compare( + &mut self, + left: &ast::Expr>, + ops: &[ast::Cmpop], + comparators: &[ast::Expr>], + ) -> InferenceResult { + let boolean = self.primitives.bool; + for (a, b, c) in izip!(once(left).chain(comparators), comparators, ops) { + let method = comparison_name(c) + .ok_or_else(|| "unsupported comparator".to_string())? + .to_string(); + self.build_method_call(method, a.custom.unwrap(), vec![b.custom.unwrap()], boolean)?; + } + Ok(boolean) + } + + fn infer_subscript( + &mut self, + value: &ast::Expr>, + slice: &ast::Expr>, + ) -> InferenceResult { + let ty = self.unifier.get_fresh_var().0; + match &slice.node { + ast::ExprKind::Slice { lower, upper, step } => { + for v in [lower.as_ref(), upper.as_ref(), step.as_ref()] + .iter() + .flatten() + { + self.unifier + .unify(self.primitives.int32, v.custom.unwrap())?; + } + let list = self.unifier.add_ty(TypeEnum::TList { ty }); + self.unifier.unify(value.custom.unwrap(), list)?; + Ok(list) + } + ast::ExprKind::Constant { + value: ast::Constant::Int(val), + .. + } => { + // the index is a constant, so value can be a sequence (either list/tuple) + let ind: i32 = val + .try_into() + .map_err(|_| "Index must be int32".to_string())?; + let map = once((ind, ty)).collect(); + let seq = self.unifier.add_ty(TypeEnum::TSeq { map }); + self.unifier.unify(value.custom.unwrap(), seq)?; + Ok(ty) + } + _ => { + // the index is not a constant, so value can only be a list + self.unifier + .unify(slice.custom.unwrap(), self.primitives.int32)?; + let list = self.unifier.add_ty(TypeEnum::TList { ty }); + self.unifier.unify(value.custom.unwrap(), list)?; + Ok(ty) + } + } + } + + fn infer_if_expr( + &mut self, + test: &ast::Expr>, + body: ast::Expr>, + orelse: ast::Expr>, + ) -> InferenceResult { + self.unifier + .unify(test.custom.unwrap(), self.primitives.bool)?; + self.unifier + .unify(body.custom.unwrap(), orelse.custom.unwrap())?; + Ok(body.custom.unwrap()) + } +} diff --git a/nac3core/src/typecheck/typedef.rs b/nac3core/src/typecheck/typedef.rs index 9c20f918..d2da32ea 100644 --- a/nac3core/src/typecheck/typedef.rs +++ b/nac3core/src/typecheck/typedef.rs @@ -52,24 +52,24 @@ type VarMap = Mapping; #[derive(Clone)] pub struct Call { - posargs: Vec, - kwargs: HashMap, - ret: Type, - fun: RefCell>, + pub posargs: Vec, + pub kwargs: HashMap, + pub ret: Type, + pub fun: RefCell>, } #[derive(Clone)] pub struct FuncArg { - name: String, - ty: Type, - is_optional: bool, + pub name: String, + pub ty: Type, + pub is_optional: bool, } #[derive(Clone)] pub struct FunSignature { - args: Vec, - ret: Type, - params: VarMap, + pub args: Vec, + pub ret: Type, + pub params: VarMap, } // We use a lot of `Rc`/`RefCell`s here as we want to simplify our code. From e732f7e089a12c55a5e1f94a2c94da35aec7cdb9 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Mon, 19 Jul 2021 16:51:58 +0800 Subject: [PATCH 023/131] removed integer encoding --- nac3core/src/typecheck/test_typedef.rs | 4 +- nac3core/src/typecheck/typedef.rs | 459 +++++++++++-------------- 2 files changed, 194 insertions(+), 269 deletions(-) diff --git a/nac3core/src/typecheck/test_typedef.rs b/nac3core/src/typecheck/test_typedef.rs index 3bce23a0..d85fda7a 100644 --- a/nac3core/src/typecheck/test_typedef.rs +++ b/nac3core/src/typecheck/test_typedef.rs @@ -206,7 +206,7 @@ mod test { ("v1", "Tuple[int]"), ("v2", "List[int]"), ], - (("v1", "v2"), "Cannot unify TTuple with TList") + (("v1", "v2"), "Cannot unify TList with TTuple") ; "type mismatch" )] #[test_case(2, @@ -222,7 +222,7 @@ mod test { ("v1", "Tuple[int,int]"), ("v2", "Tuple[int]"), ], - (("v1", "v2"), "Cannot unify tuples with length 1 and 2") + (("v1", "v2"), "Cannot unify tuples with length 2 and 1") ; "tuple length mismatch" )] #[test_case(3, diff --git a/nac3core/src/typecheck/typedef.rs b/nac3core/src/typecheck/typedef.rs index d2da32ea..4c7eaa89 100644 --- a/nac3core/src/typecheck/typedef.rs +++ b/nac3core/src/typecheck/typedef.rs @@ -3,7 +3,6 @@ use std::cell::RefCell; use std::collections::HashMap; use std::fmt::Debug; use std::iter::once; -use std::mem::swap; use std::ops::Deref; use std::rc::Rc; @@ -69,7 +68,7 @@ pub struct FuncArg { pub struct FunSignature { pub args: Vec, pub ret: Type, - pub params: VarMap, + pub vars: VarMap, } // We use a lot of `Rc`/`RefCell`s here as we want to simplify our code. @@ -117,62 +116,7 @@ pub enum TypeEnum { // `--> TCall // `--> TFunc -// We encode the types as natural numbers, and subtyping relation as divisibility. -// If a | b, b <: a. -// We assign unique prime numbers (1 to TVar, everything is a subtype of it) to each type: -// TVar = 1 -// |--> TSeq = 2 -// | |--> TTuple = 3 -// | `--> TList = 5 -// |--> TRecord = 7 -// | |--> TObj = 11 -// | `--> TVirtual = 13 -// `--> TCall = 17 -// `--> TFunc = 21 -// -// And then, based on the subtyping relation, multiply them together... -// TVar = 1 -// |--> TSeq = 2 * TVar -// | |--> TTuple = 3 * TSeq * TVar -// | `--> TList = 5 * TSeq * TVar -// |--> TRecord = 7 * TVar -// | |--> TObj = 11 * TRecord * TVar -// | `--> TVirtual = 13 * TRecord * TVar -// `--> TCall = 17 * TVar -// `--> TFunc = 21 * TCall * TVar - impl TypeEnum { - fn get_int(&self) -> i32 { - const TVAR: i32 = 1; - const TSEQ: i32 = 2; - const TTUPLE: i32 = 3; - const TLIST: i32 = 5; - const TRECORD: i32 = 7; - const TOBJ: i32 = 11; - const TVIRTUAL: i32 = 13; - const TCALL: i32 = 17; - const TFUNC: i32 = 21; - - match self { - TypeEnum::TVar { .. } => TVAR, - TypeEnum::TSeq { .. } => TSEQ * TVAR, - TypeEnum::TTuple { .. } => TTUPLE * TSEQ * TVAR, - TypeEnum::TList { .. } => TLIST * TSEQ * TVAR, - TypeEnum::TRecord { .. } => TRECORD * TVAR, - TypeEnum::TObj { .. } => TOBJ * TRECORD * TVAR, - TypeEnum::TVirtual { .. } => TVIRTUAL * TRECORD * TVAR, - TypeEnum::TCall { .. } => TCALL * TVAR, - TypeEnum::TFunc { .. } => TFUNC * TCALL * TVAR, - } - } - - // e.g. List <: Var - pub fn type_le(&self, other: &TypeEnum) -> bool { - let a = self.get_int(); - let b = other.get_int(); - (a % b) == 0 - } - pub fn get_type_name(&self) -> &'static str { // this function is for debugging only... // a proper to_str implementation requires the context @@ -227,9 +171,14 @@ impl Unifier { self.unification_table.probe_value(a).0 } + pub fn unify(&mut self, a: Type, b: Type) -> Result<(), String> { + self.unify_impl(a, b, false) + } + /// Unify two types, i.e. a = b. - pub fn unify(&mut self, mut a: Type, mut b: Type) -> Result<(), String> { - let (mut ty_a_cell, mut ty_b_cell) = { + fn unify_impl(&mut self, a: Type, b: Type, swapped: bool) -> Result<(), String> { + use TypeEnum::*; + let (ty_a_cell, ty_b_cell) = { if self.unification_table.unioned(a, b) { return Ok(()); } @@ -240,251 +189,215 @@ impl Unifier { }; let (ty_a, ty_b) = { - // simplify our pattern matching... - if ty_a_cell.borrow().type_le(&ty_b_cell.borrow()) { - swap(&mut a, &mut b); - swap(&mut ty_a_cell, &mut ty_b_cell); - } (ty_a_cell.borrow(), ty_b_cell.borrow()) }; self.occur_check(a, b)?; - match &*ty_a { - TypeEnum::TVar { .. } => { - // TODO: type variables bound check... + match (&*ty_a, &*ty_b) { + (TypeEnum::TVar { .. }, _) => { self.set_a_to_b(a, b); } - TypeEnum::TSeq { map: map1 } => { - match &*ty_b { - TypeEnum::TSeq { .. } => { - drop(ty_b); - if let TypeEnum::TSeq { map: map2 } = &mut *ty_b_cell.as_ref().borrow_mut() - { - // unify them to map2 - for (key, value) in map1.iter() { - if let Some(ty) = map2.get(key) { - self.unify(*ty, *value)?; - } else { - map2.insert(*key, *value); - } - } + (TSeq { map: map1 }, TSeq { .. }) => { + drop(ty_b); + if let TypeEnum::TSeq { map: map2 } = &mut *ty_b_cell.as_ref().borrow_mut() { + // unify them to map2 + for (key, value) in map1.iter() { + if let Some(ty) = map2.get(key) { + self.unify(*ty, *value)?; } else { - unreachable!() + map2.insert(*key, *value); } - self.set_a_to_b(a, b); - } - TypeEnum::TTuple { ty: types } => { - let len = types.len() as i32; - for (k, v) in map1.iter() { - // handle negative index - let ind = if *k < 0 { len + *k } else { *k }; - if ind >= len || ind < 0 { - return Err(format!( - "Tuple index out of range. (Length: {}, Index: {})", - types.len(), - k - )); - } - self.unify(*v, types[ind as usize])?; - } - self.set_a_to_b(a, b); - } - TypeEnum::TList { ty } => { - for v in map1.values() { - self.unify(*v, *ty)?; - } - self.set_a_to_b(a, b); - } - _ => { - return self.incompatible_types(&*ty_a, &*ty_b); } + } else { + unreachable!() } + self.set_a_to_b(a, b); } - TypeEnum::TTuple { ty: ty1 } => { - if let TypeEnum::TTuple { ty: ty2 } = &*ty_b { - if ty1.len() != ty2.len() { + (TSeq { map: map1 }, TTuple { ty: types }) => { + let len = types.len() as i32; + for (k, v) in map1.iter() { + // handle negative index + let ind = if *k < 0 { len + *k } else { *k }; + if ind >= len || ind < 0 { return Err(format!( - "Cannot unify tuples with length {} and {}", - ty1.len(), - ty2.len() + "Tuple index out of range. (Length: {}, Index: {})", + types.len(), + k )); } - for (x, y) in ty1.iter().zip(ty2.iter()) { - self.unify(*x, *y)?; - } - self.set_a_to_b(a, b); - } else { - return self.incompatible_types(&*ty_a, &*ty_b); + self.unify(*v, types[ind as usize])?; } + self.set_a_to_b(a, b); } - TypeEnum::TList { ty: ty1 } => { - if let TypeEnum::TList { ty: ty2 } = *ty_b { - self.unify(*ty1, ty2)?; - self.set_a_to_b(a, b); - } else { - return self.incompatible_types(&*ty_a, &*ty_b); + (TSeq { map: map1 }, TList { ty }) => { + for v in map1.values() { + self.unify(*v, *ty)?; } + self.set_a_to_b(a, b); } - TypeEnum::TRecord { fields: fields1 } => { - match &*ty_b { - TypeEnum::TRecord { .. } => { - drop(ty_b); - if let TypeEnum::TRecord { fields: fields2 } = - &mut *ty_b_cell.as_ref().borrow_mut() - { - for (key, value) in fields1.iter() { - if let Some(ty) = fields2.get(key) { - self.unify(*ty, *value)?; - } else { - fields2.insert(key.clone(), *value); - } - } + (TTuple { ty: ty1 }, TTuple { ty: ty2 }) => { + if ty1.len() != ty2.len() { + return Err(format!( + "Cannot unify tuples with length {} and {}", + ty1.len(), + ty2.len() + )); + } + for (x, y) in ty1.iter().zip(ty2.iter()) { + self.unify(*x, *y)?; + } + self.set_a_to_b(a, b); + } + (TList { ty: ty1 }, TList { ty: ty2 }) => { + self.unify(*ty1, *ty2)?; + self.set_a_to_b(a, b); + } + (TRecord { fields: fields1 }, TRecord { .. }) => { + drop(ty_b); + if let TypeEnum::TRecord { fields: fields2 } = &mut *ty_b_cell.as_ref().borrow_mut() + { + for (key, value) in fields1.iter() { + if let Some(ty) = fields2.get(key) { + self.unify(*ty, *value)?; } else { - unreachable!() + fields2.insert(key.clone(), *value); } - self.set_a_to_b(a, b); } - TypeEnum::TObj { - fields: fields2, .. - } => { - for (key, value) in fields1.iter() { - if let Some(ty) = fields2.get(key) { - self.unify(*ty, *value)?; - } else { - return Err(format!("No such attribute {}", key)); - } - } - self.set_a_to_b(a, b); - } - TypeEnum::TVirtual { ty } => { - // not sure if this is correct... - self.unify(a, *ty)?; - } - _ => { - return self.incompatible_types(&*ty_a, &*ty_b); + } else { + unreachable!() + } + self.set_a_to_b(a, b); + } + ( + TRecord { fields: fields1 }, + TObj { + fields: fields2, .. + }, + ) => { + for (key, value) in fields1.iter() { + if let Some(ty) = fields2.get(key) { + self.unify(*ty, *value)?; + } else { + return Err(format!("No such attribute {}", key)); } } + self.set_a_to_b(a, b); } - TypeEnum::TObj { - obj_id: id1, - params: params1, - .. - } => { - if let TypeEnum::TObj { + (TRecord { .. }, TVirtual { ty }) => { + self.unify(a, *ty)?; + } + ( + TObj { + obj_id: id1, + params: params1, + .. + }, + TObj { obj_id: id2, params: params2, .. - } = &*ty_b - { - if id1 != id2 { - return Err(format!("Cannot unify objects with ID {} and {}", id1, id2)); - } - for (x, y) in params1.values().zip(params2.values()) { - self.unify(*x, *y)?; - } - self.set_a_to_b(a, b); - } else { - return self.incompatible_types(&*ty_a, &*ty_b); + }, + ) => { + if id1 != id2 { + return Err(format!("Cannot unify objects with ID {} and {}", id1, id2)); } - } - TypeEnum::TVirtual { ty: ty1 } => { - if let TypeEnum::TVirtual { ty: ty2 } = &*ty_b { - self.unify(*ty1, *ty2)?; - self.set_a_to_b(a, b); - } else { - return self.incompatible_types(&*ty_a, &*ty_b); + for (x, y) in params1.values().zip(params2.values()) { + self.unify(*x, *y)?; } + self.set_a_to_b(a, b); } - TypeEnum::TCall { calls: c1 } => match &*ty_b { - TypeEnum::TCall { .. } => { - drop(ty_b); - if let TypeEnum::TCall { calls: c2 } = &mut *ty_b_cell.as_ref().borrow_mut() { - c2.extend(c1.iter().cloned()); + (TVirtual { ty: ty1 }, TVirtual { ty: ty2 }) => { + self.unify(*ty1, *ty2)?; + self.set_a_to_b(a, b); + } + (TCall { calls: c1 }, TCall { .. }) => { + drop(ty_b); + if let TypeEnum::TCall { calls: c2 } = &mut *ty_b_cell.as_ref().borrow_mut() { + c2.extend(c1.iter().cloned()); + } else { + unreachable!() + } + self.set_a_to_b(a, b); + } + (TCall { calls }, TFunc(signature)) => { + let required: Vec = signature + .args + .iter() + .filter(|v| !v.is_optional) + .map(|v| v.name.clone()) + .rev() + .collect(); + for c in calls { + let Call { + posargs, + kwargs, + ret, + fun, + } = c.as_ref(); + let instantiated = self.instantiate_fun(b, signature); + let signature; + let r = self.get_ty(instantiated); + let r = r.as_ref().borrow(); + if let TypeEnum::TFunc(s) = &*r { + signature = s; } else { - unreachable!() + unreachable!(); } - self.set_a_to_b(a, b); - } - TypeEnum::TFunc(signature) => { - let required: Vec = signature + let mut required = required.clone(); + let mut all_names: Vec<_> = signature .args .iter() - .filter(|v| !v.is_optional) - .map(|v| v.name.clone()) + .map(|v| (v.name.clone(), v.ty)) .rev() .collect(); - for c in c1 { - let Call { - posargs, - kwargs, - ret, - fun, - } = c.as_ref(); - let instantiated = self.instantiate_fun(b, signature); - let signature; - let r = self.get_ty(instantiated); - let r = r.as_ref().borrow(); - if let TypeEnum::TFunc(s) = &*r { - signature = s; + for (i, t) in posargs.iter().enumerate() { + if signature.args.len() <= i { + return Err("Too many arguments.".to_string()); + } + if !required.is_empty() { + required.pop(); + } + self.unify(all_names.pop().unwrap().1, *t)?; + } + for (k, t) in kwargs.iter() { + if let Some(i) = required.iter().position(|v| v == k) { + required.remove(i); + } + if let Some(i) = all_names.iter().position(|v| &v.0 == k) { + self.unify(all_names.remove(i).1, *t)?; } else { - unreachable!(); + return Err(format!("Unknown keyword argument {}", k)); } - let mut required = required.clone(); - let mut all_names: Vec<_> = signature - .args - .iter() - .map(|v| (v.name.clone(), v.ty)) - .rev() - .collect(); - for (i, t) in posargs.iter().enumerate() { - if signature.args.len() <= i { - return Err("Too many arguments.".to_string()); - } - if !required.is_empty() { - required.pop(); - } - self.unify(all_names.pop().unwrap().1, *t)?; - } - for (k, t) in kwargs.iter() { - if let Some(i) = required.iter().position(|v| v == k) { - required.remove(i); - } - if let Some(i) = all_names.iter().position(|v| &v.0 == k) { - self.unify(all_names.remove(i).1, *t)?; - } else { - return Err(format!("Unknown keyword argument {}", k)); - } - } - self.unify(*ret, signature.ret)?; - *fun.borrow_mut() = Some(instantiated); } - self.set_a_to_b(a, b); + self.unify(*ret, signature.ret)?; + *fun.borrow_mut() = Some(instantiated); } - _ => { + self.set_a_to_b(a, b); + } + (TFunc(sign1), TFunc(sign2)) => { + if !sign1.vars.is_empty() || !sign2.vars.is_empty() { + return Err("Polymorphic function pointer is prohibited.".to_string()); + } + if sign1.args.len() != sign2.args.len() { + return Err("Functions differ in number of parameters.".to_string()); + } + for (x, y) in sign1.args.iter().zip(sign2.args.iter()) { + if x.name != y.name { + return Err("Functions differ in parameter names.".to_string()); + } + if x.is_optional != y.is_optional { + return Err("Functions differ in optional parameters.".to_string()); + } + self.unify(x.ty, y.ty)?; + } + self.unify(sign1.ret, sign2.ret)?; + self.set_a_to_b(a, b); + } + _ => { + if swapped { return self.incompatible_types(&*ty_a, &*ty_b); - } - }, - TypeEnum::TFunc(sign1) => { - if let TypeEnum::TFunc(sign2) = &*ty_b { - if !sign1.params.is_empty() || !sign2.params.is_empty() { - return Err("Polymorphic function pointer is prohibited.".to_string()); - } - if sign1.args.len() != sign2.args.len() { - return Err("Functions differ in number of parameters.".to_string()); - } - for (x, y) in sign1.args.iter().zip(sign2.args.iter()) { - if x.name != y.name { - return Err("Functions differ in parameter names.".to_string()); - } - if x.is_optional != y.is_optional { - return Err("Functions differ in optional parameters.".to_string()); - } - self.unify(x.ty, y.ty)?; - } - self.unify(sign1.ret, sign2.ret)?; - self.set_a_to_b(a, b); } else { - return self.incompatible_types(&*ty_a, &*ty_b); + self.unify_impl(b, a, true)?; } } } @@ -555,7 +468,11 @@ impl Unifier { self.occur_check(a, *t)?; } } - TypeEnum::TFunc(FunSignature { args, ret, params }) => { + TypeEnum::TFunc(FunSignature { + args, + ret, + vars: params, + }) => { for t in args .iter() .map(|v| &v.ty) @@ -638,7 +555,11 @@ impl Unifier { None } } - TypeEnum::TFunc(FunSignature { args, ret, params }) => { + TypeEnum::TFunc(FunSignature { + args, + ret, + vars: params, + }) => { let new_params = self.subst_map(params, mapping); let new_ret = self.subst(*ret, mapping); let mut new_args = None; @@ -658,7 +579,11 @@ impl Unifier { let params = new_params.unwrap_or_else(|| params.clone()); let ret = new_ret.unwrap_or_else(|| *ret); let args = new_args.unwrap_or_else(|| args.clone()); - Some(self.add_ty(TypeEnum::TFunc(FunSignature { args, ret, params }))) + Some(self.add_ty(TypeEnum::TFunc(FunSignature { + args, + ret, + vars: params, + }))) } else { None } @@ -688,7 +613,7 @@ impl Unifier { /// Returns None if the function is already instantiated. fn instantiate_fun(&mut self, ty: Type, fun: &FunSignature) -> Type { let mut instantiated = false; - for (k, v) in fun.params.iter() { + for (k, v) in fun.vars.iter() { if let TypeEnum::TVar { id } = &*self.unification_table.probe_value(*v).as_ref().borrow() { @@ -705,7 +630,7 @@ impl Unifier { ty } else { let mapping = fun - .params + .vars .iter() .map(|(k, _)| (*k, self.get_fresh_var().0)) .collect(); From eb4b2bb7f67a90ea4d9ed36502f11dc470ba81ef Mon Sep 17 00:00:00 2001 From: pca006132 Date: Mon, 19 Jul 2021 17:05:48 +0800 Subject: [PATCH 024/131] refactored using constrain to allow easier modification later with subtyping --- nac3core/src/typecheck/type_inferencer.rs | 38 +++++++++++++---------- nac3core/src/typecheck/typedef.rs | 4 +-- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/nac3core/src/typecheck/type_inferencer.rs b/nac3core/src/typecheck/type_inferencer.rs index 3ccc3016..6c273ec9 100644 --- a/nac3core/src/typecheck/type_inferencer.rs +++ b/nac3core/src/typecheck/type_inferencer.rs @@ -38,6 +38,12 @@ impl<'a> Fold<()> for Inferencer<'a> { type InferenceResult = Result; impl<'a> Inferencer<'a> { + /// Constrain a <: b + /// Currently implemented as unification + fn constrain(&mut self, a: Type, b: Type) -> Result<(), String> { + self.unifier.unify(a, b) + } + fn build_method_call( &mut self, method: String, @@ -55,7 +61,7 @@ impl<'a> Inferencer<'a> { let call = self.unifier.add_ty(TypeEnum::TCall { calls: vec![call] }); let fields = once((method, call)).collect(); let record = self.unifier.add_ty(TypeEnum::TRecord { fields }); - self.unifier.unify(obj, record)?; + self.constrain(obj, record)?; Ok(ret) } @@ -114,15 +120,15 @@ impl<'a> Inferencer<'a> { fn infer_attribute(&mut self, value: &ast::Expr>, attr: &str) -> InferenceResult { let (attr_ty, _) = self.unifier.get_fresh_var(); let fields = once((attr.to_string(), attr_ty)).collect(); - let parent = self.unifier.add_ty(TypeEnum::TRecord { fields }); - self.unifier.unify(value.custom.unwrap(), parent)?; + let record = self.unifier.add_ty(TypeEnum::TRecord { fields }); + self.constrain(value.custom.unwrap(), record)?; Ok(attr_ty) } fn infer_bool_ops(&mut self, values: &[ast::Expr>]) -> InferenceResult { let b = self.primitives.bool; for v in values { - self.unifier.unify(v.custom.unwrap(), b)?; + self.constrain(v.custom.unwrap(), b)?; } Ok(b) } @@ -181,32 +187,30 @@ impl<'a> Inferencer<'a> { .iter() .flatten() { - self.unifier - .unify(self.primitives.int32, v.custom.unwrap())?; + self.constrain(v.custom.unwrap(), self.primitives.int32)?; } let list = self.unifier.add_ty(TypeEnum::TList { ty }); - self.unifier.unify(value.custom.unwrap(), list)?; + self.constrain(value.custom.unwrap(), list)?; Ok(list) } ast::ExprKind::Constant { value: ast::Constant::Int(val), .. } => { - // the index is a constant, so value can be a sequence (either list/tuple) + // the index is a constant, so value can be a sequence. let ind: i32 = val .try_into() .map_err(|_| "Index must be int32".to_string())?; let map = once((ind, ty)).collect(); let seq = self.unifier.add_ty(TypeEnum::TSeq { map }); - self.unifier.unify(value.custom.unwrap(), seq)?; + self.constrain(value.custom.unwrap(), seq)?; Ok(ty) } _ => { // the index is not a constant, so value can only be a list - self.unifier - .unify(slice.custom.unwrap(), self.primitives.int32)?; + self.constrain(slice.custom.unwrap(), self.primitives.int32)?; let list = self.unifier.add_ty(TypeEnum::TList { ty }); - self.unifier.unify(value.custom.unwrap(), list)?; + self.constrain(value.custom.unwrap(), list)?; Ok(ty) } } @@ -218,10 +222,10 @@ impl<'a> Inferencer<'a> { body: ast::Expr>, orelse: ast::Expr>, ) -> InferenceResult { - self.unifier - .unify(test.custom.unwrap(), self.primitives.bool)?; - self.unifier - .unify(body.custom.unwrap(), orelse.custom.unwrap())?; - Ok(body.custom.unwrap()) + self.constrain(test.custom.unwrap(), self.primitives.bool)?; + let ty = self.unifier.get_fresh_var().0; + self.constrain(body.custom.unwrap(), ty)?; + self.constrain(orelse.custom.unwrap(), ty)?; + Ok(ty) } } diff --git a/nac3core/src/typecheck/typedef.rs b/nac3core/src/typecheck/typedef.rs index 4c7eaa89..e9abbb16 100644 --- a/nac3core/src/typecheck/typedef.rs +++ b/nac3core/src/typecheck/typedef.rs @@ -188,9 +188,7 @@ impl Unifier { ) }; - let (ty_a, ty_b) = { - (ty_a_cell.borrow(), ty_b_cell.borrow()) - }; + let (ty_a, ty_b) = { (ty_a_cell.borrow(), ty_b_cell.borrow()) }; self.occur_check(a, b)?; match (&*ty_a, &*ty_b) { From 016166de4635ebf6399c93a81ea13bfc9cca11c9 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Mon, 19 Jul 2021 17:26:51 +0800 Subject: [PATCH 025/131] skeleton done --- nac3core/src/typecheck/type_inferencer.rs | 60 +++++++++++++++++++++-- 1 file changed, 56 insertions(+), 4 deletions(-) diff --git a/nac3core/src/typecheck/type_inferencer.rs b/nac3core/src/typecheck/type_inferencer.rs index 6c273ec9..01289c3a 100644 --- a/nac3core/src/typecheck/type_inferencer.rs +++ b/nac3core/src/typecheck/type_inferencer.rs @@ -8,7 +8,7 @@ use super::magic_methods::*; use super::symbol_resolver::{SymbolResolver, SymbolType}; use super::typedef::{Call, Type, TypeEnum, Unifier}; use itertools::izip; -use rustpython_parser::ast::{self, fold::Fold}; +use rustpython_parser::ast::{self, fold}; pub struct PrimitiveStore { int32: Type, @@ -26,13 +26,65 @@ pub struct Inferencer<'a> { primitives: &'a PrimitiveStore, } -impl<'a> Fold<()> for Inferencer<'a> { +impl<'a> fold::Fold<()> for Inferencer<'a> { type TargetU = Option; type Error = String; fn map_user(&mut self, _: ()) -> Result { Ok(None) } + + fn fold_expr(&mut self, node: ast::Expr<()>) -> Result, Self::Error> { + let expr = match &node.node { + ast::ExprKind::Call { .. } => unimplemented!(), + ast::ExprKind::Lambda { .. } => unimplemented!(), + ast::ExprKind::ListComp { .. } => unimplemented!(), + _ => fold::fold_expr(self, node)?, + }; + let custom = match &expr.node { + ast::ExprKind::Constant { value, .. } => Some(self.infer_constant(value)?), + ast::ExprKind::Name { id, .. } => Some(self.infer_identifier(id)?), + ast::ExprKind::List { elts, .. } => Some(self.infer_list(elts)?), + ast::ExprKind::Tuple { elts, .. } => Some(self.infer_tuple(elts)?), + ast::ExprKind::Attribute { + value, + attr, + ctx: _, + } => Some(self.infer_attribute(value, attr)?), + ast::ExprKind::BoolOp { op: _, values } => Some(self.infer_bool_ops(values)?), + ast::ExprKind::BinOp { left, op, right } => Some(self.infer_bin_ops(left, op, right)?), + ast::ExprKind::UnaryOp { op, operand } => Some(self.infer_unary_ops(op, operand)?), + ast::ExprKind::Compare { + left, + ops, + comparators, + } => Some(self.infer_compare(left, ops, comparators)?), + ast::ExprKind::Call { + func, + args, + keywords, + } => unimplemented!(), + ast::ExprKind::Subscript { + value, + slice, + ctx: _, + } => Some(self.infer_subscript(value.as_ref(), slice.as_ref())?), + ast::ExprKind::IfExp { test, body, orelse } => { + Some(self.infer_if_expr(test, body.as_ref(), orelse.as_ref())?) + } + ast::ExprKind::ListComp { + elt: _, + generators: _, + } => expr.custom, // already computed + ast::ExprKind::Slice { .. } => None, // we don't need it for slice + _ => return Err("not supported yet".into()), + }; + Ok(ast::Expr { + custom, + location: expr.location, + node: expr.node, + }) + } } type InferenceResult = Result; @@ -219,8 +271,8 @@ impl<'a> Inferencer<'a> { fn infer_if_expr( &mut self, test: &ast::Expr>, - body: ast::Expr>, - orelse: ast::Expr>, + body: &ast::Expr>, + orelse: &ast::Expr>, ) -> InferenceResult { self.constrain(test.custom.unwrap(), self.primitives.bool)?; let ty = self.unifier.get_fresh_var().0; From 22455e43ac7a764adb9f03e97f0ccb9b7b48c3e3 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Tue, 20 Jul 2021 11:34:32 +0800 Subject: [PATCH 026/131] lambda fold --- nac3core/src/typecheck/type_inferencer.rs | 93 ++++++++++++++++++++--- 1 file changed, 81 insertions(+), 12 deletions(-) diff --git a/nac3core/src/typecheck/type_inferencer.rs b/nac3core/src/typecheck/type_inferencer.rs index 01289c3a..77cccf22 100644 --- a/nac3core/src/typecheck/type_inferencer.rs +++ b/nac3core/src/typecheck/type_inferencer.rs @@ -6,9 +6,13 @@ use std::rc::Rc; use super::magic_methods::*; use super::symbol_resolver::{SymbolResolver, SymbolType}; -use super::typedef::{Call, Type, TypeEnum, Unifier}; +use super::typedef::{Call, FunSignature, FuncArg, Type, TypeEnum, Unifier}; use itertools::izip; -use rustpython_parser::ast::{self, fold}; +use rustpython_parser::ast::{ + self, + fold::{self, Fold}, + Arguments, Expr, ExprKind, Located, Location, +}; pub struct PrimitiveStore { int32: Type, @@ -21,7 +25,7 @@ pub struct PrimitiveStore { pub struct Inferencer<'a> { resolver: &'a mut Box, unifier: &'a mut Unifier, - variable_mapping: &'a mut HashMap, + variable_mapping: HashMap, calls: &'a mut Vec>, primitives: &'a PrimitiveStore, } @@ -35,10 +39,16 @@ impl<'a> fold::Fold<()> for Inferencer<'a> { } fn fold_expr(&mut self, node: ast::Expr<()>) -> Result, Self::Error> { - let expr = match &node.node { - ast::ExprKind::Call { .. } => unimplemented!(), - ast::ExprKind::Lambda { .. } => unimplemented!(), - ast::ExprKind::ListComp { .. } => unimplemented!(), + let expr = match node.node { + ast::ExprKind::Call { + func, + args, + keywords, + } => unimplemented!(), + ast::ExprKind::Lambda { args, body } => { + self.fold_lambda(node.location, *args, *body)? + } + ast::ExprKind::ListComp { elt, generators } => unimplemented!(), _ => fold::fold_expr(self, node)?, }; let custom = match &expr.node { @@ -59,11 +69,7 @@ impl<'a> fold::Fold<()> for Inferencer<'a> { ops, comparators, } => Some(self.infer_compare(left, ops, comparators)?), - ast::ExprKind::Call { - func, - args, - keywords, - } => unimplemented!(), + ast::ExprKind::Call { .. } => expr.custom, ast::ExprKind::Subscript { value, slice, @@ -117,6 +123,69 @@ impl<'a> Inferencer<'a> { Ok(ret) } + fn fold_lambda( + &mut self, + location: Location, + args: Arguments, + body: ast::Expr<()>, + ) -> Result>, String> { + if !args.posonlyargs.is_empty() + || args.vararg.is_some() + || !args.kwonlyargs.is_empty() + || args.kwarg.is_some() + || !args.defaults.is_empty() + { + // actually I'm not sure whether programs violating this is a valid python program. + return Err( + "We only support positional or keyword arguments without defaults for lambdas." + .to_string(), + ); + } + + let fn_args: Vec<_> = args + .args + .iter() + .map(|v| (v.node.arg.clone(), self.unifier.get_fresh_var().0)) + .collect(); + let mut variable_mapping = self.variable_mapping.clone(); + variable_mapping.extend(fn_args.iter().cloned()); + let ret = self.unifier.get_fresh_var().0; + let mut new_context = Inferencer { + resolver: self.resolver, + unifier: self.unifier, + variable_mapping, + calls: self.calls, + primitives: self.primitives, + }; + let fun = FunSignature { + args: fn_args + .iter() + .map(|(k, ty)| FuncArg { + name: k.clone(), + ty: *ty, + is_optional: false, + }) + .collect(), + ret, + vars: Default::default(), + }; + let body = new_context.fold_expr(body)?; + new_context.unifier.unify(fun.ret, body.custom.unwrap())?; + let mut args = new_context.fold_arguments(args)?; + for (arg, (name, ty)) in args.args.iter_mut().zip(fn_args.iter()) { + assert_eq!(&arg.node.arg, name); + arg.custom = Some(*ty); + } + Ok(Located { + location, + node: ExprKind::Lambda { + args: args.into(), + body: body.into(), + }, + custom: Some(self.unifier.add_ty(TypeEnum::TFunc(fun))), + }) + } + fn infer_identifier(&mut self, id: &str) -> InferenceResult { if let Some(ty) = self.variable_mapping.get(id) { Ok(*ty) From fa31e8f33683be3fe436b49a83b02f0c70fadcc6 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Tue, 20 Jul 2021 13:45:17 +0800 Subject: [PATCH 027/131] fold listcomp --- nac3core/src/typecheck/type_inferencer.rs | 79 +++++++++++++++++++---- 1 file changed, 68 insertions(+), 11 deletions(-) diff --git a/nac3core/src/typecheck/type_inferencer.rs b/nac3core/src/typecheck/type_inferencer.rs index 77cccf22..d26dae6f 100644 --- a/nac3core/src/typecheck/type_inferencer.rs +++ b/nac3core/src/typecheck/type_inferencer.rs @@ -11,7 +11,7 @@ use itertools::izip; use rustpython_parser::ast::{ self, fold::{self, Fold}, - Arguments, Expr, ExprKind, Located, Location, + Arguments, Comprehension, ExprKind, Located, Location, }; pub struct PrimitiveStore { @@ -48,7 +48,9 @@ impl<'a> fold::Fold<()> for Inferencer<'a> { ast::ExprKind::Lambda { args, body } => { self.fold_lambda(node.location, *args, *body)? } - ast::ExprKind::ListComp { elt, generators } => unimplemented!(), + ast::ExprKind::ListComp { elt, generators } => { + self.fold_listcomp(node.location, *elt, generators)? + } _ => fold::fold_expr(self, node)?, }; let custom = match &expr.node { @@ -70,18 +72,13 @@ impl<'a> fold::Fold<()> for Inferencer<'a> { comparators, } => Some(self.infer_compare(left, ops, comparators)?), ast::ExprKind::Call { .. } => expr.custom, - ast::ExprKind::Subscript { - value, - slice, - ctx: _, - } => Some(self.infer_subscript(value.as_ref(), slice.as_ref())?), + ast::ExprKind::Subscript { value, slice, .. } => { + Some(self.infer_subscript(value.as_ref(), slice.as_ref())?) + } ast::ExprKind::IfExp { test, body, orelse } => { Some(self.infer_if_expr(test, body.as_ref(), orelse.as_ref())?) } - ast::ExprKind::ListComp { - elt: _, - generators: _, - } => expr.custom, // already computed + ast::ExprKind::ListComp { .. } | ast::ExprKind::Lambda { .. } => expr.custom, // already computed ast::ExprKind::Slice { .. } => None, // we don't need it for slice _ => return Err("not supported yet".into()), }; @@ -186,6 +183,66 @@ impl<'a> Inferencer<'a> { }) } + fn fold_listcomp( + &mut self, + location: Location, + elt: ast::Expr<()>, + mut generators: Vec, + ) -> Result>, String> { + if generators.len() != 1 { + return Err( + "Only 1 generator statement for list comprehension is supported.".to_string(), + ); + } + let variable_mapping = self.variable_mapping.clone(); + let mut new_context = Inferencer { + resolver: self.resolver, + unifier: self.unifier, + variable_mapping, + calls: self.calls, + primitives: self.primitives, + }; + let elt = new_context.fold_expr(elt)?; + let generator = generators.pop().unwrap(); + if generator.is_async { + return Err("Async iterator not supported.".to_string()); + } + let target = new_context.fold_expr(*generator.target)?; + let iter = new_context.fold_expr(*generator.iter)?; + let ifs: Vec<_> = generator + .ifs + .into_iter() + .map(|v| new_context.fold_expr(v)) + .collect::>()?; + + // iter should be a list of targets... + // actually it should be an iterator of targets, but we don't have iter type for now + let list = new_context.unifier.add_ty(TypeEnum::TList { + ty: target.custom.unwrap(), + }); + new_context.unifier.unify(iter.custom.unwrap(), list)?; + // if conditions should be bool + for v in ifs.iter() { + new_context + .unifier + .unify(v.custom.unwrap(), new_context.primitives.bool)?; + } + + Ok(Located { + location, + custom: Some(list), + node: ExprKind::ListComp { + elt: Box::new(elt), + generators: vec![ast::Comprehension { + target: Box::new(target), + iter: Box::new(iter), + ifs, + is_async: false, + }], + }, + }) + } + fn infer_identifier(&mut self, id: &str) -> InferenceResult { if let Some(ty) = self.variable_mapping.get(id) { Ok(*ty) From bc9b453b3e89ceffedfbd7ce7ee7695ff06bdb71 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Tue, 20 Jul 2021 16:13:43 +0800 Subject: [PATCH 028/131] function call implementation --- nac3core/src/typecheck/symbol_resolver.rs | 4 +- nac3core/src/typecheck/type_inferencer.rs | 139 +++++++++++++++++++--- 2 files changed, 126 insertions(+), 17 deletions(-) diff --git a/nac3core/src/typecheck/symbol_resolver.rs b/nac3core/src/typecheck/symbol_resolver.rs index accb1aae..96003410 100644 --- a/nac3core/src/typecheck/symbol_resolver.rs +++ b/nac3core/src/typecheck/symbol_resolver.rs @@ -1,5 +1,6 @@ use super::typedef::Type; use super::location::Location; +use rustpython_parser::ast::Expr; pub enum SymbolType { TypeName(Type), @@ -16,7 +17,8 @@ pub enum SymbolValue<'a> { } pub trait SymbolResolver { - fn get_symbol_type(&mut self, str: &str) -> Option; + fn get_symbol_type(&mut self, str: &str) -> Option; + fn parse_type_name(&mut self, expr: &Expr<()>) -> Option; fn get_symbol_value(&mut self, str: &str) -> Option; fn get_symbol_location(&mut self, str: &str) -> Option; // handle function call etc. diff --git a/nac3core/src/typecheck/type_inferencer.rs b/nac3core/src/typecheck/type_inferencer.rs index d26dae6f..18cb87e7 100644 --- a/nac3core/src/typecheck/type_inferencer.rs +++ b/nac3core/src/typecheck/type_inferencer.rs @@ -5,7 +5,7 @@ use std::iter::once; use std::rc::Rc; use super::magic_methods::*; -use super::symbol_resolver::{SymbolResolver, SymbolType}; +use super::symbol_resolver::SymbolResolver; use super::typedef::{Call, FunSignature, FuncArg, Type, TypeEnum, Unifier}; use itertools::izip; use rustpython_parser::ast::{ @@ -44,7 +44,7 @@ impl<'a> fold::Fold<()> for Inferencer<'a> { func, args, keywords, - } => unimplemented!(), + } => self.fold_call(node.location, *func, args, keywords)?, ast::ExprKind::Lambda { args, body } => { self.fold_lambda(node.location, *args, *body)? } @@ -71,14 +71,15 @@ impl<'a> fold::Fold<()> for Inferencer<'a> { ops, comparators, } => Some(self.infer_compare(left, ops, comparators)?), - ast::ExprKind::Call { .. } => expr.custom, ast::ExprKind::Subscript { value, slice, .. } => { Some(self.infer_subscript(value.as_ref(), slice.as_ref())?) } ast::ExprKind::IfExp { test, body, orelse } => { Some(self.infer_if_expr(test, body.as_ref(), orelse.as_ref())?) } - ast::ExprKind::ListComp { .. } | ast::ExprKind::Lambda { .. } => expr.custom, // already computed + ast::ExprKind::ListComp { .. } + | ast::ExprKind::Lambda { .. } + | ast::ExprKind::Call { .. } => expr.custom, // already computed ast::ExprKind::Slice { .. } => None, // we don't need it for slice _ => return Err("not supported yet".into()), }; @@ -243,21 +244,127 @@ impl<'a> Inferencer<'a> { }) } + fn fold_call( + &mut self, + location: Location, + func: ast::Expr<()>, + mut args: Vec>, + keywords: Vec>, + ) -> Result>, String> { + let func = if let Located { + location: func_location, + custom, + node: ExprKind::Name { id, ctx }, + } = func + { + // handle special functions that cannot be typed in the usual way... + if id == "virtual" { + if args.is_empty() || args.len() > 2 || !keywords.is_empty() { + return Err("`virtual` can only accept 1/2 positional arguments.".to_string()); + } + let arg0 = self.fold_expr(args.remove(0))?; + let ty = if let Some(arg) = args.pop() { + self.resolver + .parse_type_name(&arg) + .ok_or_else(|| "error parsing type".to_string())? + } else { + self.unifier.get_fresh_var().0 + }; + let custom = Some(self.unifier.add_ty(TypeEnum::TVirtual { ty })); + return Ok(Located { + location, + custom, + node: ExprKind::Call { + func: Box::new(Located { + custom: None, + location: func.location, + node: ExprKind::Name { id, ctx }, + }), + args: vec![arg0], + keywords: vec![], + }, + }); + } + // int64 is special because its argument can be a constant larger than int32 + if id == "int64" && args.len() == 1 { + if let ExprKind::Constant { + value: ast::Constant::Int(val), + .. + } = &args[0].node + { + let int64: Result = val.try_into(); + let custom; + if int64.is_ok() { + custom = Some(self.primitives.int64); + } else { + return Err("Integer out of bound".into()); + } + return Ok(Located { + location, + custom, + node: ExprKind::Call { + func: Box::new(Located { + custom: None, + location: func.location, + node: ExprKind::Name { id, ctx }, + }), + args: vec![self.fold_expr(args.pop().unwrap())?], + keywords: vec![], + }, + }); + } + } + Located { + location: func_location, + custom, + node: ExprKind::Name { id, ctx }, + } + } else { + func + }; + let func = Box::new(self.fold_expr(func)?); + let args = args + .into_iter() + .map(|v| self.fold_expr(v)) + .collect::, _>>()?; + let keywords = keywords + .into_iter() + .map(|v| fold::fold_keyword(self, v)) + .collect::, _>>()?; + let ret = self.unifier.get_fresh_var().0; + let call = Rc::new(Call { + posargs: args.iter().map(|v| v.custom.unwrap()).collect(), + kwargs: keywords + .iter() + .map(|v| (v.node.arg.as_ref().unwrap().clone(), v.custom.unwrap())) + .collect(), + fun: RefCell::new(None), + ret, + }); + self.calls.push(call.clone()); + let call = self.unifier.add_ty(TypeEnum::TCall { calls: vec![call] }); + self.unifier.unify(func.custom.unwrap(), call)?; + + Ok(Located { + location, + custom: Some(ret), + node: ExprKind::Call { + func, + args, + keywords, + }, + }) + } + fn infer_identifier(&mut self, id: &str) -> InferenceResult { if let Some(ty) = self.variable_mapping.get(id) { Ok(*ty) } else { - match self.resolver.get_symbol_type(id) { - Some(SymbolType::TypeName(_)) => { - Err("Expected expression instead of type".to_string()) - } - Some(SymbolType::Identifier(ty)) => Ok(ty), - None => { - let ty = self.unifier.get_fresh_var().0; - self.variable_mapping.insert(id.to_string(), ty); - Ok(ty) - } - } + Ok(self.resolver.get_symbol_type(id).unwrap_or_else(|| { + let ty = self.unifier.get_fresh_var().0; + self.variable_mapping.insert(id.to_string(), ty); + ty + })) } } @@ -268,7 +375,7 @@ impl<'a> Inferencer<'a> { let int32: Result = val.try_into(); // int64 would be handled separately in functions if int32.is_ok() { - Ok(self.primitives.int64) + Ok(self.primitives.int32) } else { Err("Integer out of bound".into()) } From e95bfe1d31dcdcd7028e060482127b36c4d9e74f Mon Sep 17 00:00:00 2001 From: pca006132 Date: Tue, 20 Jul 2021 16:56:04 +0800 Subject: [PATCH 029/131] added statements --- nac3core/src/typecheck/type_inferencer.rs | 69 +++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/nac3core/src/typecheck/type_inferencer.rs b/nac3core/src/typecheck/type_inferencer.rs index 18cb87e7..728bb7cc 100644 --- a/nac3core/src/typecheck/type_inferencer.rs +++ b/nac3core/src/typecheck/type_inferencer.rs @@ -30,6 +30,15 @@ pub struct Inferencer<'a> { primitives: &'a PrimitiveStore, } +struct NaiveFolder(); +impl fold::Fold<()> for NaiveFolder { + type TargetU = Option; + type Error = String; + fn map_user(&mut self, _: ()) -> Result { + Ok(None) + } +} + impl<'a> fold::Fold<()> for Inferencer<'a> { type TargetU = Option; type Error = String; @@ -38,6 +47,66 @@ impl<'a> fold::Fold<()> for Inferencer<'a> { Ok(None) } + fn fold_stmt(&mut self, node: ast::Stmt<()>) -> Result, Self::Error> { + let stmt = match node.node { + // we don't want fold over type annotation + ast::StmtKind::AnnAssign { + target, + annotation, + value, + simple, + } => { + let target = Box::new(fold::fold_expr(self, *target)?); + let value = if let Some(v) = value { + let ty = Box::new(fold::fold_expr(self, *v)?); + self.unifier + .unify(target.custom.unwrap(), ty.custom.unwrap())?; + Some(ty) + } else { + None + }; + let annotation_type = self + .resolver + .parse_type_name(annotation.as_ref()) + .ok_or_else(|| "cannot parse type name".to_string())?; + self.unifier.unify(annotation_type, target.custom.unwrap())?; + let annotation = Box::new(NaiveFolder().fold_expr(*annotation)?); + Located { + location: node.location, + custom: None, + node: ast::StmtKind::AnnAssign { + target, + annotation, + value, + simple, + }, + } + } + _ => fold::fold_stmt(self, node)?, + }; + match &stmt.node { + ast::StmtKind::For { target, iter, .. } => { + let list = self.unifier.add_ty(TypeEnum::TList { + ty: target.custom.unwrap(), + }); + self.unifier.unify(list, iter.custom.unwrap())?; + } + ast::StmtKind::If { test, .. } | ast::StmtKind::While { test, .. } => { + self.unifier + .unify(test.custom.unwrap(), self.primitives.bool)?; + } + ast::StmtKind::Assign { targets, value, .. } => { + for target in targets.iter() { + self.unifier + .unify(target.custom.unwrap(), value.custom.unwrap())?; + } + } + ast::StmtKind::AnnAssign { .. } => {} + _ => return Err("Unsupported statement type".to_string()) + }; + Ok(stmt) + } + fn fold_expr(&mut self, node: ast::Expr<()>) -> Result, Self::Error> { let expr = match node.node { ast::ExprKind::Call { From 0296844d5f200b3731eeb8a8d40da7af55d84be7 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Wed, 21 Jul 2021 13:28:05 +0800 Subject: [PATCH 030/131] cleanup --- nac3core/Cargo.toml | 2 +- nac3core/src/lib.rs | 8 - nac3core/src/typecheck/mod.rs | 1 - nac3core/src/typecheck/test_typedef.rs | 272 ------------------ .../mod.rs} | 0 .../typecheck/{typedef.rs => typedef/mod.rs} | 3 + .../src/typecheck/typedef/test_typedef.rs | 269 +++++++++++++++++ 7 files changed, 273 insertions(+), 282 deletions(-) delete mode 100644 nac3core/src/typecheck/test_typedef.rs rename nac3core/src/typecheck/{type_inferencer.rs => type_inferencer/mod.rs} (100%) rename nac3core/src/typecheck/{typedef.rs => typedef/mod.rs} (99%) create mode 100644 nac3core/src/typecheck/typedef/test_typedef.rs diff --git a/nac3core/Cargo.toml b/nac3core/Cargo.toml index 50ba54ed..402ce18b 100644 --- a/nac3core/Cargo.toml +++ b/nac3core/Cargo.toml @@ -9,10 +9,10 @@ num-bigint = "0.3" num-traits = "0.2" inkwell = { git = "https://github.com/TheDan64/inkwell", branch = "master", features = ["llvm10-0"] } rustpython-parser = { git = "https://github.com/RustPython/RustPython", branch = "master" } -indoc = "1.0" ena = "0.14" itertools = "0.10.1" [dev-dependencies] test-case = "1.2.0" +indoc = "1.0" diff --git a/nac3core/src/lib.rs b/nac3core/src/lib.rs index 74440eef..1cce4c19 100644 --- a/nac3core/src/lib.rs +++ b/nac3core/src/lib.rs @@ -1,12 +1,4 @@ #![warn(clippy::all)] -#![allow(clippy::clone_double_ref)] - -extern crate num_bigint; -extern crate inkwell; -extern crate rustpython_parser; -extern crate indoc; -extern crate ena; -extern crate itertools; mod typecheck; diff --git a/nac3core/src/typecheck/mod.rs b/nac3core/src/typecheck/mod.rs index cfab64c1..6e50fadf 100644 --- a/nac3core/src/typecheck/mod.rs +++ b/nac3core/src/typecheck/mod.rs @@ -2,6 +2,5 @@ pub mod location; mod magic_methods; pub mod symbol_resolver; -mod test_typedef; pub mod typedef; pub mod type_inferencer; diff --git a/nac3core/src/typecheck/test_typedef.rs b/nac3core/src/typecheck/test_typedef.rs deleted file mode 100644 index d85fda7a..00000000 --- a/nac3core/src/typecheck/test_typedef.rs +++ /dev/null @@ -1,272 +0,0 @@ -#[cfg(test)] -mod test { - use super::super::typedef::*; - use itertools::Itertools; - use std::collections::HashMap; - use test_case::test_case; - - struct TestEnvironment { - pub unifier: Unifier, - type_mapping: HashMap, - } - - impl TestEnvironment { - fn new() -> TestEnvironment { - let mut unifier = Unifier::new(); - let mut type_mapping = HashMap::new(); - - type_mapping.insert( - "int".into(), - unifier.add_ty(TypeEnum::TObj { - obj_id: 0, - fields: HashMap::new(), - params: HashMap::new(), - }), - ); - type_mapping.insert( - "float".into(), - unifier.add_ty(TypeEnum::TObj { - obj_id: 1, - fields: HashMap::new(), - params: HashMap::new(), - }), - ); - type_mapping.insert( - "bool".into(), - unifier.add_ty(TypeEnum::TObj { - obj_id: 2, - fields: HashMap::new(), - params: HashMap::new(), - }), - ); - let (v0, id) = unifier.get_fresh_var(); - type_mapping.insert( - "Foo".into(), - unifier.add_ty(TypeEnum::TObj { - obj_id: 3, - fields: [("a".into(), v0)].iter().cloned().collect(), - params: [(id, v0)].iter().cloned().collect(), - }), - ); - - TestEnvironment { - unifier, - type_mapping, - } - } - - fn parse(&mut self, typ: &str, mapping: &Mapping) -> Type { - let result = self.internal_parse(typ, mapping); - assert!(result.1.is_empty()); - result.0 - } - - fn internal_parse<'a, 'b>( - &'a mut self, - typ: &'b str, - mapping: &Mapping, - ) -> (Type, &'b str) { - // for testing only, so we can just panic when the input is malformed - let end = typ - .find(|c| ['[', ',', ']', '='].contains(&c)) - .unwrap_or_else(|| typ.len()); - match &typ[..end] { - "Tuple" => { - let mut s = &typ[end..]; - assert!(&s[0..1] == "["); - let mut ty = Vec::new(); - while &s[0..1] != "]" { - let result = self.internal_parse(&s[1..], mapping); - ty.push(result.0); - s = result.1; - } - (self.unifier.add_ty(TypeEnum::TTuple { ty }), &s[1..]) - } - "List" => { - assert!(&typ[end..end + 1] == "["); - let (ty, s) = self.internal_parse(&typ[end + 1..], mapping); - assert!(&s[0..1] == "]"); - (self.unifier.add_ty(TypeEnum::TList { ty }), &s[1..]) - } - "Record" => { - let mut s = &typ[end..]; - assert!(&s[0..1] == "["); - let mut fields = HashMap::new(); - while &s[0..1] != "]" { - let eq = s.find('=').unwrap(); - let key = s[1..eq].to_string(); - let result = self.internal_parse(&s[eq + 1..], mapping); - fields.insert(key, result.0); - s = result.1; - } - (self.unifier.add_ty(TypeEnum::TRecord { fields }), &s[1..]) - } - x => { - let mut s = &typ[end..]; - let ty = mapping.get(x).cloned().unwrap_or_else(|| { - // mapping should be type variables, type_mapping should be concrete types - // we should not resolve the type of type variables. - let mut ty = *self.type_mapping.get(x).unwrap(); - let te = self.unifier.get_ty(ty); - if let TypeEnum::TObj { params, .. } = &*te.as_ref().borrow() { - if !params.is_empty() { - assert!(&s[0..1] == "["); - let mut p = Vec::new(); - while &s[0..1] != "]" { - let result = self.internal_parse(&s[1..], mapping); - p.push(result.0); - s = result.1; - } - s = &s[1..]; - ty = self - .unifier - .subst(ty, ¶ms.keys().cloned().zip(p.into_iter()).collect()) - .unwrap_or(ty); - } - } - ty - }); - (ty, s) - } - } - } - } - - #[test_case(2, - &[("v1", "v2"), ("v2", "float")], - &[("v1", "float"), ("v2", "float")] - ; "simple variable" - )] - #[test_case(2, - &[("v1", "List[v2]"), ("v1", "List[float]")], - &[("v1", "List[float]"), ("v2", "float")] - ; "list element" - )] - #[test_case(3, - &[ - ("v1", "Record[a=v3,b=v3]"), - ("v2", "Record[b=float,c=v3]"), - ("v1", "v2") - ], - &[ - ("v1", "Record[a=float,b=float,c=float]"), - ("v2", "Record[a=float,b=float,c=float]"), - ("v3", "float") - ] - ; "record merge" - )] - #[test_case(3, - &[ - ("v1", "Record[a=float]"), - ("v2", "Foo[v3]"), - ("v1", "v2") - ], - &[ - ("v1", "Foo[float]"), - ("v3", "float") - ] - ; "record obj merge" - )] - /// Test cases for valid unifications. - fn test_unify( - variable_count: u32, - unify_pairs: &[(&'static str, &'static str)], - verify_pairs: &[(&'static str, &'static str)], - ) { - let unify_count = unify_pairs.len(); - // test all permutations... - for perm in unify_pairs.iter().permutations(unify_count) { - let mut env = TestEnvironment::new(); - let mut mapping = HashMap::new(); - for i in 1..=variable_count { - let v = env.unifier.get_fresh_var(); - mapping.insert(format!("v{}", i), v.0); - } - // unification may have side effect when we do type resolution, so freeze the types - // before doing unification. - let mut pairs = Vec::new(); - for (a, b) in perm.iter() { - let t1 = env.parse(a, &mapping); - let t2 = env.parse(b, &mapping); - pairs.push((t1, t2)); - } - for (t1, t2) in pairs { - env.unifier.unify(t1, t2).unwrap(); - } - for (a, b) in verify_pairs.iter() { - let t1 = env.parse(a, &mapping); - let t2 = env.parse(b, &mapping); - assert!(env.unifier.eq(t1, t2)); - } - } - } - - #[test_case(2, - &[ - ("v1", "Tuple[int]"), - ("v2", "List[int]"), - ], - (("v1", "v2"), "Cannot unify TList with TTuple") - ; "type mismatch" - )] - #[test_case(2, - &[ - ("v1", "Tuple[int]"), - ("v2", "Tuple[float]"), - ], - (("v1", "v2"), "Cannot unify objects with ID 0 and 1") - ; "tuple parameter mismatch" - )] - #[test_case(2, - &[ - ("v1", "Tuple[int,int]"), - ("v2", "Tuple[int]"), - ], - (("v1", "v2"), "Cannot unify tuples with length 2 and 1") - ; "tuple length mismatch" - )] - #[test_case(3, - &[ - ("v1", "Record[a=float,b=int]"), - ("v2", "Foo[v3]"), - ], - (("v1", "v2"), "No such attribute b") - ; "record obj merge" - )] - #[test_case(2, - &[ - ("v1", "List[v2]"), - ], - (("v1", "v2"), "Recursive type is prohibited.") - ; "recursive type for lists" - )] - /// Test cases for invalid unifications. - fn test_invalid_unification( - variable_count: u32, - unify_pairs: &[(&'static str, &'static str)], - errornous_pair: ((&'static str, &'static str), &'static str), - ) { - let mut env = TestEnvironment::new(); - let mut mapping = HashMap::new(); - for i in 1..=variable_count { - let v = env.unifier.get_fresh_var(); - mapping.insert(format!("v{}", i), v.0); - } - // unification may have side effect when we do type resolution, so freeze the types - // before doing unification. - let mut pairs = Vec::new(); - for (a, b) in unify_pairs.iter() { - let t1 = env.parse(a, &mapping); - let t2 = env.parse(b, &mapping); - pairs.push((t1, t2)); - } - let (t1, t2) = ( - env.parse(errornous_pair.0 .0, &mapping), - env.parse(errornous_pair.0 .1, &mapping), - ); - for (a, b) in pairs { - env.unifier.unify(a, b).unwrap(); - } - assert_eq!(env.unifier.unify(t1, t2), Err(errornous_pair.1.to_string())); - } -} diff --git a/nac3core/src/typecheck/type_inferencer.rs b/nac3core/src/typecheck/type_inferencer/mod.rs similarity index 100% rename from nac3core/src/typecheck/type_inferencer.rs rename to nac3core/src/typecheck/type_inferencer/mod.rs diff --git a/nac3core/src/typecheck/typedef.rs b/nac3core/src/typecheck/typedef/mod.rs similarity index 99% rename from nac3core/src/typecheck/typedef.rs rename to nac3core/src/typecheck/typedef/mod.rs index e9abbb16..b57ad609 100644 --- a/nac3core/src/typecheck/typedef.rs +++ b/nac3core/src/typecheck/typedef/mod.rs @@ -6,6 +6,9 @@ use std::iter::once; use std::ops::Deref; use std::rc::Rc; +#[cfg(test)] +mod test_typedef; + #[derive(Copy, Clone, PartialEq, Eq, Debug)] /// Handle for a type, implementated as a key in the unification table. pub struct Type(u32); diff --git a/nac3core/src/typecheck/typedef/test_typedef.rs b/nac3core/src/typecheck/typedef/test_typedef.rs new file mode 100644 index 00000000..0855b164 --- /dev/null +++ b/nac3core/src/typecheck/typedef/test_typedef.rs @@ -0,0 +1,269 @@ +use super::super::typedef::*; +use itertools::Itertools; +use std::collections::HashMap; +use test_case::test_case; + +struct TestEnvironment { + pub unifier: Unifier, + type_mapping: HashMap, +} + +impl TestEnvironment { + fn new() -> TestEnvironment { + let mut unifier = Unifier::new(); + let mut type_mapping = HashMap::new(); + + type_mapping.insert( + "int".into(), + unifier.add_ty(TypeEnum::TObj { + obj_id: 0, + fields: HashMap::new(), + params: HashMap::new(), + }), + ); + type_mapping.insert( + "float".into(), + unifier.add_ty(TypeEnum::TObj { + obj_id: 1, + fields: HashMap::new(), + params: HashMap::new(), + }), + ); + type_mapping.insert( + "bool".into(), + unifier.add_ty(TypeEnum::TObj { + obj_id: 2, + fields: HashMap::new(), + params: HashMap::new(), + }), + ); + let (v0, id) = unifier.get_fresh_var(); + type_mapping.insert( + "Foo".into(), + unifier.add_ty(TypeEnum::TObj { + obj_id: 3, + fields: [("a".into(), v0)].iter().cloned().collect(), + params: [(id, v0)].iter().cloned().collect(), + }), + ); + + TestEnvironment { + unifier, + type_mapping, + } + } + + fn parse(&mut self, typ: &str, mapping: &Mapping) -> Type { + let result = self.internal_parse(typ, mapping); + assert!(result.1.is_empty()); + result.0 + } + + fn internal_parse<'a, 'b>( + &'a mut self, + typ: &'b str, + mapping: &Mapping, + ) -> (Type, &'b str) { + // for testing only, so we can just panic when the input is malformed + let end = typ + .find(|c| ['[', ',', ']', '='].contains(&c)) + .unwrap_or_else(|| typ.len()); + match &typ[..end] { + "Tuple" => { + let mut s = &typ[end..]; + assert!(&s[0..1] == "["); + let mut ty = Vec::new(); + while &s[0..1] != "]" { + let result = self.internal_parse(&s[1..], mapping); + ty.push(result.0); + s = result.1; + } + (self.unifier.add_ty(TypeEnum::TTuple { ty }), &s[1..]) + } + "List" => { + assert!(&typ[end..end + 1] == "["); + let (ty, s) = self.internal_parse(&typ[end + 1..], mapping); + assert!(&s[0..1] == "]"); + (self.unifier.add_ty(TypeEnum::TList { ty }), &s[1..]) + } + "Record" => { + let mut s = &typ[end..]; + assert!(&s[0..1] == "["); + let mut fields = HashMap::new(); + while &s[0..1] != "]" { + let eq = s.find('=').unwrap(); + let key = s[1..eq].to_string(); + let result = self.internal_parse(&s[eq + 1..], mapping); + fields.insert(key, result.0); + s = result.1; + } + (self.unifier.add_ty(TypeEnum::TRecord { fields }), &s[1..]) + } + x => { + let mut s = &typ[end..]; + let ty = mapping.get(x).cloned().unwrap_or_else(|| { + // mapping should be type variables, type_mapping should be concrete types + // we should not resolve the type of type variables. + let mut ty = *self.type_mapping.get(x).unwrap(); + let te = self.unifier.get_ty(ty); + if let TypeEnum::TObj { params, .. } = &*te.as_ref().borrow() { + if !params.is_empty() { + assert!(&s[0..1] == "["); + let mut p = Vec::new(); + while &s[0..1] != "]" { + let result = self.internal_parse(&s[1..], mapping); + p.push(result.0); + s = result.1; + } + s = &s[1..]; + ty = self + .unifier + .subst(ty, ¶ms.keys().cloned().zip(p.into_iter()).collect()) + .unwrap_or(ty); + } + } + ty + }); + (ty, s) + } + } + } +} + +#[test_case(2, + &[("v1", "v2"), ("v2", "float")], + &[("v1", "float"), ("v2", "float")] + ; "simple variable" +)] +#[test_case(2, + &[("v1", "List[v2]"), ("v1", "List[float]")], + &[("v1", "List[float]"), ("v2", "float")] + ; "list element" +)] +#[test_case(3, + &[ + ("v1", "Record[a=v3,b=v3]"), + ("v2", "Record[b=float,c=v3]"), + ("v1", "v2") + ], + &[ + ("v1", "Record[a=float,b=float,c=float]"), + ("v2", "Record[a=float,b=float,c=float]"), + ("v3", "float") + ] + ; "record merge" +)] +#[test_case(3, + &[ + ("v1", "Record[a=float]"), + ("v2", "Foo[v3]"), + ("v1", "v2") + ], + &[ + ("v1", "Foo[float]"), + ("v3", "float") + ] + ; "record obj merge" +)] +/// Test cases for valid unifications. +fn test_unify( + variable_count: u32, + unify_pairs: &[(&'static str, &'static str)], + verify_pairs: &[(&'static str, &'static str)], +) { + let unify_count = unify_pairs.len(); + // test all permutations... + for perm in unify_pairs.iter().permutations(unify_count) { + let mut env = TestEnvironment::new(); + let mut mapping = HashMap::new(); + for i in 1..=variable_count { + let v = env.unifier.get_fresh_var(); + mapping.insert(format!("v{}", i), v.0); + } + // unification may have side effect when we do type resolution, so freeze the types + // before doing unification. + let mut pairs = Vec::new(); + for (a, b) in perm.iter() { + let t1 = env.parse(a, &mapping); + let t2 = env.parse(b, &mapping); + pairs.push((t1, t2)); + } + for (t1, t2) in pairs { + env.unifier.unify(t1, t2).unwrap(); + } + for (a, b) in verify_pairs.iter() { + let t1 = env.parse(a, &mapping); + let t2 = env.parse(b, &mapping); + assert!(env.unifier.eq(t1, t2)); + } + } +} + +#[test_case(2, + &[ + ("v1", "Tuple[int]"), + ("v2", "List[int]"), + ], + (("v1", "v2"), "Cannot unify TList with TTuple") + ; "type mismatch" +)] +#[test_case(2, + &[ + ("v1", "Tuple[int]"), + ("v2", "Tuple[float]"), + ], + (("v1", "v2"), "Cannot unify objects with ID 0 and 1") + ; "tuple parameter mismatch" +)] +#[test_case(2, + &[ + ("v1", "Tuple[int,int]"), + ("v2", "Tuple[int]"), + ], + (("v1", "v2"), "Cannot unify tuples with length 2 and 1") + ; "tuple length mismatch" +)] +#[test_case(3, + &[ + ("v1", "Record[a=float,b=int]"), + ("v2", "Foo[v3]"), + ], + (("v1", "v2"), "No such attribute b") + ; "record obj merge" +)] +#[test_case(2, + &[ + ("v1", "List[v2]"), + ], + (("v1", "v2"), "Recursive type is prohibited.") + ; "recursive type for lists" +)] +/// Test cases for invalid unifications. +fn test_invalid_unification( + variable_count: u32, + unify_pairs: &[(&'static str, &'static str)], + errornous_pair: ((&'static str, &'static str), &'static str), +) { + let mut env = TestEnvironment::new(); + let mut mapping = HashMap::new(); + for i in 1..=variable_count { + let v = env.unifier.get_fresh_var(); + mapping.insert(format!("v{}", i), v.0); + } + // unification may have side effect when we do type resolution, so freeze the types + // before doing unification. + let mut pairs = Vec::new(); + for (a, b) in unify_pairs.iter() { + let t1 = env.parse(a, &mapping); + let t2 = env.parse(b, &mapping); + pairs.push((t1, t2)); + } + let (t1, t2) = ( + env.parse(errornous_pair.0 .0, &mapping), + env.parse(errornous_pair.0 .1, &mapping), + ); + for (a, b) in pairs { + env.unifier.unify(a, b).unwrap(); + } + assert_eq!(env.unifier.unify(t1, t2), Err(errornous_pair.1.to_string())); +} From 25ff24a3202397ebf647c0ad25d708d77ef8c06f Mon Sep 17 00:00:00 2001 From: pca006132 Date: Wed, 21 Jul 2021 14:24:46 +0800 Subject: [PATCH 031/131] modified interface --- nac3core/src/typecheck/typedef/mod.rs | 89 ++++++++++++++++++++++----- 1 file changed, 72 insertions(+), 17 deletions(-) diff --git a/nac3core/src/typecheck/typedef/mod.rs b/nac3core/src/typecheck/typedef/mod.rs index b57ad609..ef034f1e 100644 --- a/nac3core/src/typecheck/typedef/mod.rs +++ b/nac3core/src/typecheck/typedef/mod.rs @@ -1,4 +1,5 @@ use ena::unify::{InPlaceUnificationTable, NoError, UnifyKey, UnifyValue}; +use itertools::Itertools; use std::cell::RefCell; use std::collections::HashMap; use std::fmt::Debug; @@ -143,22 +144,16 @@ impl Debug for TypeCell { } } -pub struct ObjDef { - name: String, - fields: Mapping, -} - pub struct Unifier { unification_table: InPlaceUnificationTable, - obj_def_table: Vec, var_id: u32, } impl Unifier { + /// Get an empty unifier pub fn new() -> Unifier { Unifier { unification_table: InPlaceUnificationTable::new(), - obj_def_table: Vec::new(), var_id: 0, } } @@ -174,11 +169,78 @@ impl Unifier { self.unification_table.probe_value(a).0 } + /// Unify two types, i.e. a = b. pub fn unify(&mut self, a: Type, b: Type) -> Result<(), String> { self.unify_impl(a, b, false) } - /// Unify two types, i.e. a = b. + /// Get a fresh type variable. + pub fn get_fresh_var(&mut self) -> (Type, u32) { + let id = self.var_id + 1; + self.var_id += 1; + (self.add_ty(TypeEnum::TVar { id }), id) + } + + /// Get string representation of the type + pub fn stringify(&mut self, ty: Type, obj_to_name: &mut F, var_to_name: &mut G) -> String + where + F: FnMut(usize) -> String, + G: FnMut(u32) -> String, + { + let ty = self.unification_table.probe_value(ty).0; + let ty = ty.as_ref().borrow(); + match &*ty { + TypeEnum::TVar { id } => var_to_name(*id), + TypeEnum::TSeq { map } => { + let mut fields = map.iter().map(|(k, v)| { + format!("{}={}", k, self.stringify(*v, obj_to_name, var_to_name)) + }); + format!("seq[{}]", fields.join(", ")) + } + TypeEnum::TTuple { ty } => { + let mut fields = ty + .iter() + .map(|v| self.stringify(*v, obj_to_name, var_to_name)); + format!("tuple[{}]", fields.join(", ")) + } + TypeEnum::TList { ty } => { + format!("list[{}]", self.stringify(*ty, obj_to_name, var_to_name)) + } + TypeEnum::TVirtual { ty } => { + format!("virtual[{}]", self.stringify(*ty, obj_to_name, var_to_name)) + } + TypeEnum::TRecord { fields } => { + let mut fields = fields.iter().map(|(k, v)| { + format!("{}={}", k, self.stringify(*v, obj_to_name, var_to_name)) + }); + format!("record[{}]", fields.join(", ")) + } + TypeEnum::TObj { obj_id, params, .. } => { + let name = obj_to_name(*obj_id); + let mut params = params + .values() + .map(|v| self.stringify(*v, obj_to_name, var_to_name)); + format!("{}[{}]", name, params.join(", ")) + } + TypeEnum::TCall { .. } => "call".to_owned(), + TypeEnum::TFunc(signature) => { + let params = signature + .args + .iter() + .map(|arg| { + format!( + "{}={}", + arg.name, + self.stringify(arg.ty, obj_to_name, var_to_name) + ) + }) + .join(", "); + let ret = self.stringify(signature.ret, obj_to_name, var_to_name); + format!("fn[[{}], {}]", params, ret) + } + } + } + fn unify_impl(&mut self, a: Type, b: Type, swapped: bool) -> Result<(), String> { use TypeEnum::*; let (ty_a_cell, ty_b_cell) = { @@ -491,7 +553,7 @@ impl Unifier { /// If this returns Some(T), T would be the substituted type. /// If this returns None, the result type would be the original type /// (no substitution has to be done). - pub fn subst(&mut self, a: Type, mapping: &VarMap) -> Option { + fn subst(&mut self, a: Type, mapping: &VarMap) -> Option { let ty_cell = self.unification_table.probe_value(a); let ty = ty_cell.borrow(); // this function would only be called when we instantiate functions. @@ -640,7 +702,7 @@ impl Unifier { } /// Check whether two types are equal. - pub fn eq(&mut self, a: Type, b: Type) -> bool { + fn eq(&mut self, a: Type, b: Type) -> bool { if a == b { return true; } @@ -700,11 +762,4 @@ impl Unifier { } true } - - /// Get a fresh type variable. - pub fn get_fresh_var(&mut self) -> (Type, u32) { - let id = self.var_id + 1; - self.var_id += 1; - (self.add_ty(TypeEnum::TVar { id }), id) - } } From 2f5c3b3cb7801adb983028457025ead9788225b3 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Wed, 21 Jul 2021 15:36:35 +0800 Subject: [PATCH 032/131] more cleanup and started adding tests --- nac3core/src/typecheck/symbol_resolver.rs | 5 - nac3core/src/typecheck/type_inferencer/mod.rs | 39 +++-- .../src/typecheck/type_inferencer/test.rs | 163 ++++++++++++++++++ nac3core/src/typecheck/typedef/mod.rs | 17 +- .../typedef/{test_typedef.rs => test.rs} | 0 5 files changed, 200 insertions(+), 24 deletions(-) create mode 100644 nac3core/src/typecheck/type_inferencer/test.rs rename nac3core/src/typecheck/typedef/{test_typedef.rs => test.rs} (100%) diff --git a/nac3core/src/typecheck/symbol_resolver.rs b/nac3core/src/typecheck/symbol_resolver.rs index 96003410..669f7632 100644 --- a/nac3core/src/typecheck/symbol_resolver.rs +++ b/nac3core/src/typecheck/symbol_resolver.rs @@ -2,11 +2,6 @@ use super::typedef::Type; use super::location::Location; use rustpython_parser::ast::Expr; -pub enum SymbolType { - TypeName(Type), - Identifier(Type), -} - pub enum SymbolValue<'a> { I32(i32), I64(i64), diff --git a/nac3core/src/typecheck/type_inferencer/mod.rs b/nac3core/src/typecheck/type_inferencer/mod.rs index 728bb7cc..7cd72b41 100644 --- a/nac3core/src/typecheck/type_inferencer/mod.rs +++ b/nac3core/src/typecheck/type_inferencer/mod.rs @@ -14,20 +14,23 @@ use rustpython_parser::ast::{ Arguments, Comprehension, ExprKind, Located, Location, }; +#[cfg(test)] +mod test; + pub struct PrimitiveStore { - int32: Type, - int64: Type, - float: Type, - bool: Type, - none: Type, + pub int32: Type, + pub int64: Type, + pub float: Type, + pub bool: Type, + pub none: Type, } pub struct Inferencer<'a> { - resolver: &'a mut Box, - unifier: &'a mut Unifier, - variable_mapping: HashMap, - calls: &'a mut Vec>, - primitives: &'a PrimitiveStore, + pub resolver: &'a mut Box, + pub unifier: &'a mut Unifier, + pub variable_mapping: HashMap, + pub calls: &'a mut Vec>, + pub primitives: &'a PrimitiveStore, } struct NaiveFolder(); @@ -69,7 +72,8 @@ impl<'a> fold::Fold<()> for Inferencer<'a> { .resolver .parse_type_name(annotation.as_ref()) .ok_or_else(|| "cannot parse type name".to_string())?; - self.unifier.unify(annotation_type, target.custom.unwrap())?; + self.unifier + .unify(annotation_type, target.custom.unwrap())?; let annotation = Box::new(NaiveFolder().fold_expr(*annotation)?); Located { location: node.location, @@ -102,7 +106,7 @@ impl<'a> fold::Fold<()> for Inferencer<'a> { } } ast::StmtKind::AnnAssign { .. } => {} - _ => return Err("Unsupported statement type".to_string()) + _ => return Err("Unsupported statement type".to_string()), }; Ok(stmt) } @@ -358,7 +362,7 @@ impl<'a> Inferencer<'a> { if id == "int64" && args.len() == 1 { if let ExprKind::Constant { value: ast::Constant::Int(val), - .. + kind, } = &args[0].node { let int64: Result = val.try_into(); @@ -377,7 +381,14 @@ impl<'a> Inferencer<'a> { location: func.location, node: ExprKind::Name { id, ctx }, }), - args: vec![self.fold_expr(args.pop().unwrap())?], + args: vec![Located { + location: args[0].location, + custom, + node: ExprKind::Constant { + value: ast::Constant::Int(val.clone()), + kind: kind.clone(), + }, + }], keywords: vec![], }, }); diff --git a/nac3core/src/typecheck/type_inferencer/test.rs b/nac3core/src/typecheck/type_inferencer/test.rs new file mode 100644 index 00000000..23f99644 --- /dev/null +++ b/nac3core/src/typecheck/type_inferencer/test.rs @@ -0,0 +1,163 @@ +use super::super::location::Location; +use super::super::symbol_resolver::*; +use super::super::typedef::*; +use super::*; +use indoc::indoc; +use rustpython_parser::ast; +use rustpython_parser::parser::parse_program; +use test_case::test_case; + +struct Resolver { + type_mapping: HashMap, +} + +impl SymbolResolver for Resolver { + fn get_symbol_type(&mut self, str: &str) -> Option { + self.type_mapping.get(str).cloned() + } + + fn parse_type_name(&mut self, _: &ast::Expr<()>) -> Option { + unimplemented!() + } + + fn get_symbol_value(&mut self, _: &str) -> Option { + unimplemented!() + } + + fn get_symbol_location(&mut self, _: &str) -> Option { + unimplemented!() + } +} + +struct TestEnvironment { + pub unifier: Unifier, + pub resolver: Box, + pub calls: Vec>, + pub primitives: PrimitiveStore, + pub id_to_name: HashMap, +} + +impl TestEnvironment { + fn new() -> TestEnvironment { + let mut unifier = Unifier::new(); + let mut type_mapping = HashMap::new(); + let int32 = unifier.add_ty(TypeEnum::TObj { + obj_id: 0, + fields: HashMap::new(), + params: HashMap::new(), + }); + let int64 = unifier.add_ty(TypeEnum::TObj { + obj_id: 1, + fields: HashMap::new(), + params: HashMap::new(), + }); + let float = unifier.add_ty(TypeEnum::TObj { + obj_id: 2, + fields: HashMap::new(), + params: HashMap::new(), + }); + let bool = unifier.add_ty(TypeEnum::TObj { + obj_id: 3, + fields: HashMap::new(), + params: HashMap::new(), + }); + let none = unifier.add_ty(TypeEnum::TObj { + obj_id: 4, + fields: HashMap::new(), + params: HashMap::new(), + }); + type_mapping.insert("int32".into(), int32); + type_mapping.insert("int64".into(), int64); + type_mapping.insert("float".into(), float); + type_mapping.insert("bool".into(), bool); + type_mapping.insert("none".into(), none); + + let primitives = PrimitiveStore { + int32, + int64, + float, + bool, + none, + }; + + let (v0, id) = unifier.get_fresh_var(); + type_mapping.insert( + "foo".into(), + unifier.add_ty(TypeEnum::TObj { + obj_id: 5, + fields: [("a".into(), v0)].iter().cloned().collect(), + params: [(id, v0)].iter().cloned().collect(), + }), + ); + + let id_to_name = [ + (0, "int32".to_string()), + (1, "int64".to_string()), + (2, "float".to_string()), + (3, "bool".to_string()), + (4, "none".to_string()), + (5, "Foo".to_string()), + ] + .iter() + .cloned() + .collect(); + + let resolver = Box::new(Resolver { type_mapping }) as Box; + + TestEnvironment { + unifier, + resolver, + primitives, + id_to_name, + calls: Vec::new(), + } + } + + fn get_inferencer(&mut self) -> Inferencer { + Inferencer { + resolver: &mut self.resolver, + unifier: &mut self.unifier, + variable_mapping: Default::default(), + calls: &mut self.calls, + primitives: &mut self.primitives, + } + } +} + +#[test_case(indoc! {" + a = 1234 + b = int64(2147483648) + c = 1.234 + d = True + "}, + [("a", "int32"), ("b", "int64"), ("c", "float"), ("d", "bool")].iter().cloned().collect() + ; "primitives test")] +#[test_case(indoc! {" + a = lambda x, y: x + b = lambda x: a(x, x) + c = 1.234 + d = b(c) + "}, + [("a", "fn[[x=float, y=float], float]"), ("b", "fn[[x=float], float]"), ("c", "float"), ("d", "float")].iter().cloned().collect() + ; "lambda test")] +fn test_basic(source: &str, mapping: HashMap<&str, &str>) { + let mut env = TestEnvironment::new(); + let id_to_name = std::mem::take(&mut env.id_to_name); + let mut inferencer = env.get_inferencer(); + let statements = parse_program(source).unwrap(); + statements + .into_iter() + .map(|v| inferencer.fold_stmt(v)) + .collect::, _>>() + .unwrap(); + for (k, v) in mapping.iter() { + let ty = inferencer.variable_mapping.get(*k).unwrap(); + let name = inferencer.unifier.stringify( + *ty, + &mut |v| id_to_name.get(&v).unwrap().clone(), + &mut |v| format!("v{}", v), + ); + assert_eq!(format!("{}: {}", k, v), format!("{}: {}", k, name)); + } +} + diff --git a/nac3core/src/typecheck/typedef/mod.rs b/nac3core/src/typecheck/typedef/mod.rs index ef034f1e..4d00c11e 100644 --- a/nac3core/src/typecheck/typedef/mod.rs +++ b/nac3core/src/typecheck/typedef/mod.rs @@ -8,7 +8,7 @@ use std::ops::Deref; use std::rc::Rc; #[cfg(test)] -mod test_typedef; +mod test; #[derive(Copy, Clone, PartialEq, Eq, Debug)] /// Handle for a type, implementated as a key in the unification table. @@ -217,10 +217,14 @@ impl Unifier { } TypeEnum::TObj { obj_id, params, .. } => { let name = obj_to_name(*obj_id); - let mut params = params - .values() - .map(|v| self.stringify(*v, obj_to_name, var_to_name)); - format!("{}[{}]", name, params.join(", ")) + if params.len() > 0 { + let mut params = params + .values() + .map(|v| self.stringify(*v, obj_to_name, var_to_name)); + format!("{}[{}]", name, params.join(", ")) + } else { + name + } } TypeEnum::TCall { .. } => "call".to_owned(), TypeEnum::TFunc(signature) => { @@ -432,6 +436,9 @@ impl Unifier { return Err(format!("Unknown keyword argument {}", k)); } } + if !required.is_empty() { + return Err("Expected more arguments".to_string()); + } self.unify(*ret, signature.ret)?; *fun.borrow_mut() = Some(instantiated); } diff --git a/nac3core/src/typecheck/typedef/test_typedef.rs b/nac3core/src/typecheck/typedef/test.rs similarity index 100% rename from nac3core/src/typecheck/typedef/test_typedef.rs rename to nac3core/src/typecheck/typedef/test.rs From 3e03398d9b5e1212dd0686893fa3a5950fa3c9b5 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Wed, 21 Jul 2021 15:59:01 +0800 Subject: [PATCH 033/131] obj test --- nac3core/src/typecheck/type_inferencer/mod.rs | 2 +- .../src/typecheck/type_inferencer/test.rs | 50 ++++++++++++++----- 2 files changed, 39 insertions(+), 13 deletions(-) diff --git a/nac3core/src/typecheck/type_inferencer/mod.rs b/nac3core/src/typecheck/type_inferencer/mod.rs index 7cd72b41..23b9660e 100644 --- a/nac3core/src/typecheck/type_inferencer/mod.rs +++ b/nac3core/src/typecheck/type_inferencer/mod.rs @@ -105,7 +105,7 @@ impl<'a> fold::Fold<()> for Inferencer<'a> { .unify(target.custom.unwrap(), value.custom.unwrap())?; } } - ast::StmtKind::AnnAssign { .. } => {} + ast::StmtKind::AnnAssign { .. } | ast::StmtKind::Expr { .. } => {} _ => return Err("Unsupported statement type".to_string()), }; Ok(stmt) diff --git a/nac3core/src/typecheck/type_inferencer/test.rs b/nac3core/src/typecheck/type_inferencer/test.rs index 23f99644..c06208a3 100644 --- a/nac3core/src/typecheck/type_inferencer/test.rs +++ b/nac3core/src/typecheck/type_inferencer/test.rs @@ -66,11 +66,7 @@ impl TestEnvironment { fields: HashMap::new(), params: HashMap::new(), }); - type_mapping.insert("int32".into(), int32); - type_mapping.insert("int64".into(), int64); - type_mapping.insert("float".into(), float); - type_mapping.insert("bool".into(), bool); - type_mapping.insert("none".into(), none); + type_mapping.insert("None".into(), none); let primitives = PrimitiveStore { int32, @@ -81,13 +77,20 @@ impl TestEnvironment { }; let (v0, id) = unifier.get_fresh_var(); + + let foo_ty = unifier.add_ty(TypeEnum::TObj { + obj_id: 5, + fields: [("a".into(), v0)].iter().cloned().collect(), + params: [(id, v0)].iter().cloned().collect(), + }); + type_mapping.insert( - "foo".into(), - unifier.add_ty(TypeEnum::TObj { - obj_id: 5, - fields: [("a".into(), v0)].iter().cloned().collect(), - params: [(id, v0)].iter().cloned().collect(), - }), + "Foo".into(), + unifier.add_ty(TypeEnum::TFunc(FunSignature { + args: vec![], + ret: foo_ty, + vars: [(id, v0)].iter().cloned().collect(), + })), ); let id_to_name = [ @@ -140,6 +143,22 @@ impl TestEnvironment { "}, [("a", "fn[[x=float, y=float], float]"), ("b", "fn[[x=float], float]"), ("c", "float"), ("d", "float")].iter().cloned().collect() ; "lambda test")] +#[test_case(indoc! {" + a = lambda x: x + b = lambda x: x + + foo1 = Foo() + foo2 = Foo() + c = a(foo1.a) + d = b(foo2.a) + + a(True) + b(123) + + "}, + [("a", "fn[[x=bool], bool]"), ("b", "fn[[x=int32], int32]"), ("c", "bool"), + ("d", "int32"), ("foo1", "Foo[bool]"), ("foo2", "Foo[int32]")].iter().cloned().collect() + ; "obj test")] fn test_basic(source: &str, mapping: HashMap<&str, &str>) { let mut env = TestEnvironment::new(); let id_to_name = std::mem::take(&mut env.id_to_name); @@ -150,6 +169,14 @@ fn test_basic(source: &str, mapping: HashMap<&str, &str>) { .map(|v| inferencer.fold_stmt(v)) .collect::, _>>() .unwrap(); + for (k, v) in inferencer.variable_mapping.iter() { + let name = inferencer.unifier.stringify( + *v, + &mut |v| id_to_name.get(&v).unwrap().clone(), + &mut |v| format!("v{}", v), + ); + println!("{}: {}", k, name); + } for (k, v) in mapping.iter() { let ty = inferencer.variable_mapping.get(*k).unwrap(); let name = inferencer.unifier.stringify( @@ -160,4 +187,3 @@ fn test_basic(source: &str, mapping: HashMap<&str, &str>) { assert_eq!(format!("{}: {}", k, v), format!("{}: {}", k, name)); } } - From b3d849ea7af6027ef7e9c9e2573abfe51d233f98 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Wed, 21 Jul 2021 16:06:06 +0800 Subject: [PATCH 034/131] list test --- nac3core/src/typecheck/type_inferencer/mod.rs | 6 ++++-- nac3core/src/typecheck/type_inferencer/test.rs | 7 +++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/nac3core/src/typecheck/type_inferencer/mod.rs b/nac3core/src/typecheck/type_inferencer/mod.rs index 23b9660e..624182b6 100644 --- a/nac3core/src/typecheck/type_inferencer/mod.rs +++ b/nac3core/src/typecheck/type_inferencer/mod.rs @@ -304,7 +304,9 @@ impl<'a> Inferencer<'a> { Ok(Located { location, - custom: Some(list), + custom: Some(new_context.unifier.add_ty(TypeEnum::TList { + ty: elt.custom.unwrap(), + })), node: ExprKind::ListComp { elt: Box::new(elt), generators: vec![ast::Comprehension { @@ -474,7 +476,7 @@ impl<'a> Inferencer<'a> { for t in elts.iter() { self.unifier.unify(ty, t.custom.unwrap())?; } - Ok(ty) + Ok(self.unifier.add_ty(TypeEnum::TList { ty })) } fn infer_tuple(&mut self, elts: &[ast::Expr>]) -> InferenceResult { diff --git a/nac3core/src/typecheck/type_inferencer/test.rs b/nac3core/src/typecheck/type_inferencer/test.rs index c06208a3..af937623 100644 --- a/nac3core/src/typecheck/type_inferencer/test.rs +++ b/nac3core/src/typecheck/type_inferencer/test.rs @@ -159,6 +159,13 @@ impl TestEnvironment { [("a", "fn[[x=bool], bool]"), ("b", "fn[[x=int32], int32]"), ("c", "bool"), ("d", "int32"), ("foo1", "Foo[bool]"), ("foo2", "Foo[int32]")].iter().cloned().collect() ; "obj test")] +#[test_case(indoc! {" + f = lambda x: True + a = [1, 2, 3] + b = [f(x) for x in a] + "}, + [("a", "list[int32]"), ("b", "list[bool]"), ("f", "fn[[x=int32], bool]")].iter().cloned().collect() + ; "listcomp test")] fn test_basic(source: &str, mapping: HashMap<&str, &str>) { let mut env = TestEnvironment::new(); let id_to_name = std::mem::take(&mut env.id_to_name); From 4f8169012886a293ddbc72d480f2cee79bbf383f Mon Sep 17 00:00:00 2001 From: pca006132 Date: Wed, 21 Jul 2021 16:10:11 +0800 Subject: [PATCH 035/131] modified occur check --- nac3core/src/typecheck/type_inferencer/test.rs | 2 +- nac3core/src/typecheck/typedef/mod.rs | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/nac3core/src/typecheck/type_inferencer/test.rs b/nac3core/src/typecheck/type_inferencer/test.rs index af937623..3e85c245 100644 --- a/nac3core/src/typecheck/type_inferencer/test.rs +++ b/nac3core/src/typecheck/type_inferencer/test.rs @@ -162,7 +162,7 @@ impl TestEnvironment { #[test_case(indoc! {" f = lambda x: True a = [1, 2, 3] - b = [f(x) for x in a] + b = [f(x) for x in a if f(x)] "}, [("a", "list[int32]"), ("b", "list[bool]"), ("f", "fn[[x=int32], bool]")].iter().cloned().collect() ; "listcomp test")] diff --git a/nac3core/src/typecheck/typedef/mod.rs b/nac3core/src/typecheck/typedef/mod.rs index 4d00c11e..680bcc67 100644 --- a/nac3core/src/typecheck/typedef/mod.rs +++ b/nac3core/src/typecheck/typedef/mod.rs @@ -259,12 +259,13 @@ impl Unifier { let (ty_a, ty_b) = { (ty_a_cell.borrow(), ty_b_cell.borrow()) }; - self.occur_check(a, b)?; match (&*ty_a, &*ty_b) { (TypeEnum::TVar { .. }, _) => { + self.occur_check(a, b)?; self.set_a_to_b(a, b); } (TSeq { map: map1 }, TSeq { .. }) => { + self.occur_check(a, b)?; drop(ty_b); if let TypeEnum::TSeq { map: map2 } = &mut *ty_b_cell.as_ref().borrow_mut() { // unify them to map2 @@ -281,6 +282,7 @@ impl Unifier { self.set_a_to_b(a, b); } (TSeq { map: map1 }, TTuple { ty: types }) => { + self.occur_check(a, b)?; let len = types.len() as i32; for (k, v) in map1.iter() { // handle negative index @@ -297,6 +299,7 @@ impl Unifier { self.set_a_to_b(a, b); } (TSeq { map: map1 }, TList { ty }) => { + self.occur_check(a, b)?; for v in map1.values() { self.unify(*v, *ty)?; } @@ -320,6 +323,7 @@ impl Unifier { self.set_a_to_b(a, b); } (TRecord { fields: fields1 }, TRecord { .. }) => { + self.occur_check(a, b)?; drop(ty_b); if let TypeEnum::TRecord { fields: fields2 } = &mut *ty_b_cell.as_ref().borrow_mut() { @@ -341,6 +345,7 @@ impl Unifier { fields: fields2, .. }, ) => { + self.occur_check(a, b)?; for (key, value) in fields1.iter() { if let Some(ty) = fields2.get(key) { self.unify(*ty, *value)?; @@ -351,6 +356,7 @@ impl Unifier { self.set_a_to_b(a, b); } (TRecord { .. }, TVirtual { ty }) => { + self.occur_check(a, b)?; self.unify(a, *ty)?; } ( @@ -387,6 +393,7 @@ impl Unifier { self.set_a_to_b(a, b); } (TCall { calls }, TFunc(signature)) => { + self.occur_check(a, b)?; let required: Vec = signature .args .iter() From 09c92188525b693ff7fa427c1f0d6030fe63b6fb Mon Sep 17 00:00:00 2001 From: pca006132 Date: Thu, 22 Jul 2021 11:37:29 +0800 Subject: [PATCH 036/131] use custom unification table implementation as the ena implementation did not expose the underlying vector store, we cannot map over it to get a table without Rc> so that we can send it around... --- Cargo.lock | 1 - nac3core/Cargo.toml | 1 - nac3core/src/typecheck/mod.rs | 1 + nac3core/src/typecheck/typedef/mod.rs | 74 ++++---------- nac3core/src/typecheck/unification_table.rs | 104 ++++++++++++++++++++ 5 files changed, 122 insertions(+), 59 deletions(-) create mode 100644 nac3core/src/typecheck/unification_table.rs diff --git a/Cargo.lock b/Cargo.lock index d08e77c0..d4561e97 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -384,7 +384,6 @@ checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc" name = "nac3core" version = "0.1.0" dependencies = [ - "ena", "indoc 1.0.3", "inkwell", "itertools", diff --git a/nac3core/Cargo.toml b/nac3core/Cargo.toml index 402ce18b..5d3753f3 100644 --- a/nac3core/Cargo.toml +++ b/nac3core/Cargo.toml @@ -9,7 +9,6 @@ num-bigint = "0.3" num-traits = "0.2" inkwell = { git = "https://github.com/TheDan64/inkwell", branch = "master", features = ["llvm10-0"] } rustpython-parser = { git = "https://github.com/RustPython/RustPython", branch = "master" } -ena = "0.14" itertools = "0.10.1" [dev-dependencies] diff --git a/nac3core/src/typecheck/mod.rs b/nac3core/src/typecheck/mod.rs index 6e50fadf..6b55ddba 100644 --- a/nac3core/src/typecheck/mod.rs +++ b/nac3core/src/typecheck/mod.rs @@ -4,3 +4,4 @@ mod magic_methods; pub mod symbol_resolver; pub mod typedef; pub mod type_inferencer; +mod unification_table; diff --git a/nac3core/src/typecheck/typedef/mod.rs b/nac3core/src/typecheck/typedef/mod.rs index 680bcc67..9a355c33 100644 --- a/nac3core/src/typecheck/typedef/mod.rs +++ b/nac3core/src/typecheck/typedef/mod.rs @@ -1,55 +1,20 @@ -use ena::unify::{InPlaceUnificationTable, NoError, UnifyKey, UnifyValue}; use itertools::Itertools; use std::cell::RefCell; use std::collections::HashMap; -use std::fmt::Debug; use std::iter::once; -use std::ops::Deref; use std::rc::Rc; +use super::unification_table::{UnificationKey, UnificationTable}; + #[cfg(test)] mod test; -#[derive(Copy, Clone, PartialEq, Eq, Debug)] /// Handle for a type, implementated as a key in the unification table. -pub struct Type(u32); +pub type Type = UnificationKey; #[derive(Clone)] pub struct TypeCell(Rc>); -impl UnifyValue for TypeCell { - type Error = NoError; - fn unify_values(_: &Self, value2: &Self) -> Result { - // WARN: depends on the implementation details of ena. - // We do not use this to do unification, instead we perform unification - // and assign the type by `union_value(key, new_value)`, which set the - // value as `unify_values(key.value, new_value)`. So, we need to return - // the right one. - Ok(value2.clone()) - } -} - -impl UnifyKey for Type { - type Value = TypeCell; - fn index(&self) -> u32 { - self.0 - } - fn from_index(u: u32) -> Self { - Type(u) - } - fn tag() -> &'static str { - "TypeID" - } -} - -impl Deref for TypeCell { - type Target = Rc>; - - fn deref(&self) -> &::Target { - &self.0 - } -} - pub type Mapping = HashMap; type VarMap = Mapping; @@ -78,6 +43,7 @@ pub struct FunSignature { // We use a lot of `Rc`/`RefCell`s here as we want to simplify our code. // We may not really need so much `Rc`s, but we would have to do complicated // stuffs otherwise. +#[derive(Clone)] pub enum TypeEnum { TVar { // TODO: upper/lower bound @@ -138,14 +104,8 @@ impl TypeEnum { } } -impl Debug for TypeCell { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str(&self.borrow().get_type_name()) - } -} - pub struct Unifier { - unification_table: InPlaceUnificationTable, + unification_table: UnificationTable>>, var_id: u32, } @@ -153,7 +113,7 @@ impl Unifier { /// Get an empty unifier pub fn new() -> Unifier { Unifier { - unification_table: InPlaceUnificationTable::new(), + unification_table: UnificationTable::new(), var_id: 0, } } @@ -161,12 +121,12 @@ impl Unifier { /// Register a type to the unifier. /// Returns a key in the unification_table. pub fn add_ty(&mut self, a: TypeEnum) -> Type { - self.unification_table.new_key(TypeCell(Rc::new(a.into()))) + self.unification_table.new_key(Rc::new(a.into())) } /// Get the TypeEnum of a type. pub fn get_ty(&mut self, a: Type) -> Rc> { - self.unification_table.probe_value(a).0 + self.unification_table.probe_value(a).clone() } /// Unify two types, i.e. a = b. @@ -187,7 +147,7 @@ impl Unifier { F: FnMut(usize) -> String, G: FnMut(u32) -> String, { - let ty = self.unification_table.probe_value(ty).0; + let ty = self.unification_table.probe_value(ty).clone(); let ty = ty.as_ref().borrow(); match &*ty { TypeEnum::TVar { id } => var_to_name(*id), @@ -252,8 +212,8 @@ impl Unifier { return Ok(()); } ( - self.unification_table.probe_value(a), - self.unification_table.probe_value(b), + self.unification_table.probe_value(a).clone(), + self.unification_table.probe_value(b).clone(), ) }; @@ -484,9 +444,9 @@ impl Unifier { fn set_a_to_b(&mut self, a: Type, b: Type) { // unify a and b together, and set the value to b's value. let table = &mut self.unification_table; - let ty_b = table.probe_value(b); - table.union(a, b); - table.union_value(a, ty_b); + let ty_b = table.probe_value(b).clone(); + table.unify(a, b); + table.set_value(a, ty_b) } fn incompatible_types(&self, a: &TypeEnum, b: &TypeEnum) -> Result<(), String> { @@ -501,7 +461,7 @@ impl Unifier { if self.unification_table.unioned(a, b) { return Err("Recursive type is prohibited.".to_owned()); } - let ty = self.unification_table.probe_value(b); + let ty = self.unification_table.probe_value(b).clone(); let ty = ty.borrow(); match &*ty { @@ -568,7 +528,7 @@ impl Unifier { /// If this returns None, the result type would be the original type /// (no substitution has to be done). fn subst(&mut self, a: Type, mapping: &VarMap) -> Option { - let ty_cell = self.unification_table.probe_value(a); + let ty_cell = self.unification_table.probe_value(a).clone(); let ty = ty_cell.borrow(); // this function would only be called when we instantiate functions. // function type signature should ONLY contain concrete types and type @@ -725,7 +685,7 @@ impl Unifier { if table.unioned(a, b) { return true; } - (table.probe_value(a), table.probe_value(b)) + (table.probe_value(a).clone(), table.probe_value(b).clone()) }; let ty_a = ty_a.borrow(); diff --git a/nac3core/src/typecheck/unification_table.rs b/nac3core/src/typecheck/unification_table.rs new file mode 100644 index 00000000..60ec8086 --- /dev/null +++ b/nac3core/src/typecheck/unification_table.rs @@ -0,0 +1,104 @@ +use std::cell::RefCell; +use std::rc::Rc; + +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub struct UnificationKey(usize); + +pub struct UnificationTable { + parents: Vec, + ranks: Vec, + values: Vec, +} + +impl UnificationTable { + pub fn new() -> UnificationTable { + UnificationTable { + parents: Vec::new(), + ranks: Vec::new(), + values: Vec::new(), + } + } + + pub fn new_key(&mut self, v: V) -> UnificationKey { + let index = self.parents.len(); + self.parents.push(index); + self.ranks.push(0); + self.values.push(v); + UnificationKey(index) + } + + pub fn unify(&mut self, a: UnificationKey, b: UnificationKey) { + let mut a = self.find(a); + let mut b = self.find(b); + if a == b { + return; + } + if self.ranks[a] < self.ranks[b] { + std::mem::swap(&mut a, &mut b); + } + self.parents[b] = a; + if self.ranks[a] == self.ranks[b] { + self.ranks[a] += 1; + } + } + + pub fn probe_value(&mut self, a: UnificationKey) -> &V { + let index = self.find(a); + &self.values[index] + } + + pub fn set_value(&mut self, a: UnificationKey, v: V) { + let index = self.find(a); + self.values[index] = v; + } + + pub fn unioned(&mut self, a: UnificationKey, b: UnificationKey) -> bool { + self.find(a) == self.find(b) + } + + fn find(&mut self, key: UnificationKey) -> usize { + let mut root = key.0; + let mut parent = self.parents[root]; + while root != parent { + // a = parent.parent + let a = self.parents[parent]; + // root.parent = parent.parent + self.parents[root] = a; + root = parent; + // parent = root.parent + parent = a; + } + parent + } +} + +impl UnificationTable>> +where + V: Clone, +{ + pub fn into_send(self) -> UnificationTable { + let values = self + .values + .iter() + .map(|v| v.as_ref().borrow().clone()) + .collect(); + UnificationTable { + parents: self.parents, + ranks: self.ranks, + values, + } + } + + pub fn from_send(table: UnificationTable) -> UnificationTable>> { + let values = table + .values + .into_iter() + .map(|v| Rc::new(RefCell::new(v))) + .collect(); + UnificationTable { + parents: table.parents, + ranks: table.ranks, + values, + } + } +} From d484fa1e5cf5b97ba1317a619eac48c272a3c44e Mon Sep 17 00:00:00 2001 From: pca006132 Date: Thu, 22 Jul 2021 11:49:00 +0800 Subject: [PATCH 037/131] added return type check --- nac3core/src/typecheck/type_inferencer/mod.rs | 17 +++++++++++++++++ nac3core/src/typecheck/type_inferencer/test.rs | 1 + nac3core/src/typecheck/typedef/mod.rs | 2 +- 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/nac3core/src/typecheck/type_inferencer/mod.rs b/nac3core/src/typecheck/type_inferencer/mod.rs index 624182b6..45ef892d 100644 --- a/nac3core/src/typecheck/type_inferencer/mod.rs +++ b/nac3core/src/typecheck/type_inferencer/mod.rs @@ -31,6 +31,7 @@ pub struct Inferencer<'a> { pub variable_mapping: HashMap, pub calls: &'a mut Vec>, pub primitives: &'a PrimitiveStore, + pub return_type: Option } struct NaiveFolder(); @@ -106,6 +107,20 @@ impl<'a> fold::Fold<()> for Inferencer<'a> { } } ast::StmtKind::AnnAssign { .. } | ast::StmtKind::Expr { .. } => {} + ast::StmtKind::Return { value } => { + match (value, self.return_type) { + (Some(v), Some(v1)) => { + self.unifier.unify(v.custom.unwrap(), v1)?; + } + (Some(_), None) => { + return Err("Unexpected return value".to_string()); + } + (None, Some(_)) => { + return Err("Expected return value".to_string()); + } + (None, None) => {} + } + } _ => return Err("Unsupported statement type".to_string()), }; Ok(stmt) @@ -227,6 +242,7 @@ impl<'a> Inferencer<'a> { variable_mapping, calls: self.calls, primitives: self.primitives, + return_type: self.return_type }; let fun = FunSignature { args: fn_args @@ -275,6 +291,7 @@ impl<'a> Inferencer<'a> { variable_mapping, calls: self.calls, primitives: self.primitives, + return_type: self.return_type }; let elt = new_context.fold_expr(elt)?; let generator = generators.pop().unwrap(); diff --git a/nac3core/src/typecheck/type_inferencer/test.rs b/nac3core/src/typecheck/type_inferencer/test.rs index 3e85c245..c85408f7 100644 --- a/nac3core/src/typecheck/type_inferencer/test.rs +++ b/nac3core/src/typecheck/type_inferencer/test.rs @@ -123,6 +123,7 @@ impl TestEnvironment { variable_mapping: Default::default(), calls: &mut self.calls, primitives: &mut self.primitives, + return_type: None } } } diff --git a/nac3core/src/typecheck/typedef/mod.rs b/nac3core/src/typecheck/typedef/mod.rs index 9a355c33..421f58ce 100644 --- a/nac3core/src/typecheck/typedef/mod.rs +++ b/nac3core/src/typecheck/typedef/mod.rs @@ -177,7 +177,7 @@ impl Unifier { } TypeEnum::TObj { obj_id, params, .. } => { let name = obj_to_name(*obj_id); - if params.len() > 0 { + if !params.is_empty() { let mut params = params .values() .map(|v| self.stringify(*v, obj_to_name, var_to_name)); From c315227a28aa80b0c4b83b80c5206322f0e0c0fe Mon Sep 17 00:00:00 2001 From: pca006132 Date: Thu, 22 Jul 2021 15:36:37 +0800 Subject: [PATCH 038/131] init function check --- nac3core/src/typecheck/function_check.rs | 76 +++++++++++++++++++ nac3core/src/typecheck/mod.rs | 1 + nac3core/src/typecheck/type_inferencer/mod.rs | 52 +++++-------- nac3core/src/typecheck/typedef/mod.rs | 12 ++- 4 files changed, 107 insertions(+), 34 deletions(-) create mode 100644 nac3core/src/typecheck/function_check.rs diff --git a/nac3core/src/typecheck/function_check.rs b/nac3core/src/typecheck/function_check.rs new file mode 100644 index 00000000..1e8a2de2 --- /dev/null +++ b/nac3core/src/typecheck/function_check.rs @@ -0,0 +1,76 @@ +use super::type_inferencer::Inferencer; +use super::typedef::Type; +use rustpython_parser::ast::{self, Expr, ExprKind, StmtKind}; +use std::iter::once; + +impl<'a> Inferencer<'a> { + fn check_expr( + &mut self, + expr: &Expr>, + defined_identifiers: &[String], + ) -> Result<(), String> { + if let Some(ty) = &expr.custom { + let ty = self.unifier.get_ty(*ty); + let ty = ty.as_ref().borrow(); + if ty.is_concrete() { + return Err(format!( + "expected concrete type at {:?} but got {}", + expr.location, + ty.get_type_name() + )); + } + } + match &expr.node { + ExprKind::Name { id, .. } => { + if !defined_identifiers.contains(id) { + return Err(format!("unknown identifier {} (use before def?)", id)); + } + } + ExprKind::List { elts, .. } + | ExprKind::Tuple { elts, .. } + | ExprKind::BoolOp { values: elts, .. } => { + for elt in elts.iter() { + self.check_expr(elt, defined_identifiers)?; + } + } + ExprKind::Attribute { value, .. } => { + self.check_expr(value.as_ref(), defined_identifiers)?; + } + ExprKind::BinOp { left, right, .. } => { + self.check_expr(left.as_ref(), defined_identifiers)?; + self.check_expr(right.as_ref(), defined_identifiers)?; + } + ExprKind::UnaryOp { operand, .. } => { + self.check_expr(operand.as_ref(), defined_identifiers)?; + } + ExprKind::Compare { + left, comparators, .. + } => { + for elt in once(left.as_ref()).chain(comparators.iter()) { + self.check_expr(elt, defined_identifiers)?; + } + } + ExprKind::Subscript { value, slice, .. } => { + self.check_expr(value.as_ref(), defined_identifiers)?; + self.check_expr(slice.as_ref(), defined_identifiers)?; + } + ExprKind::IfExp { test, body, orelse } => { + self.check_expr(test.as_ref(), defined_identifiers)?; + self.check_expr(body.as_ref(), defined_identifiers)?; + self.check_expr(orelse.as_ref(), defined_identifiers)?; + } + ExprKind::Slice { lower, upper, step } => { + for elt in [lower.as_ref(), upper.as_ref(), step.as_ref()] + .iter() + .flatten() + { + self.check_expr(elt, defined_identifiers)?; + } + } + ExprKind::ListComp { .. } => unimplemented!(), + ExprKind::Lambda { .. } => unimplemented!(), + _ => {} + } + Ok(()) + } +} diff --git a/nac3core/src/typecheck/mod.rs b/nac3core/src/typecheck/mod.rs index 6b55ddba..fbee8ebe 100644 --- a/nac3core/src/typecheck/mod.rs +++ b/nac3core/src/typecheck/mod.rs @@ -5,3 +5,4 @@ pub mod symbol_resolver; pub mod typedef; pub mod type_inferencer; mod unification_table; +mod function_check; diff --git a/nac3core/src/typecheck/type_inferencer/mod.rs b/nac3core/src/typecheck/type_inferencer/mod.rs index 45ef892d..b7d296cc 100644 --- a/nac3core/src/typecheck/type_inferencer/mod.rs +++ b/nac3core/src/typecheck/type_inferencer/mod.rs @@ -31,7 +31,7 @@ pub struct Inferencer<'a> { pub variable_mapping: HashMap, pub calls: &'a mut Vec>, pub primitives: &'a PrimitiveStore, - pub return_type: Option + pub return_type: Option, } struct NaiveFolder(); @@ -107,20 +107,18 @@ impl<'a> fold::Fold<()> for Inferencer<'a> { } } ast::StmtKind::AnnAssign { .. } | ast::StmtKind::Expr { .. } => {} - ast::StmtKind::Return { value } => { - match (value, self.return_type) { - (Some(v), Some(v1)) => { - self.unifier.unify(v.custom.unwrap(), v1)?; - } - (Some(_), None) => { - return Err("Unexpected return value".to_string()); - } - (None, Some(_)) => { - return Err("Expected return value".to_string()); - } - (None, None) => {} + ast::StmtKind::Return { value } => match (value, self.return_type) { + (Some(v), Some(v1)) => { + self.unifier.unify(v.custom.unwrap(), v1)?; } - } + (Some(_), None) => { + return Err("Unexpected return value".to_string()); + } + (None, Some(_)) => { + return Err("Expected return value".to_string()); + } + (None, None) => {} + }, _ => return Err("Unsupported statement type".to_string()), }; Ok(stmt) @@ -151,7 +149,7 @@ impl<'a> fold::Fold<()> for Inferencer<'a> { attr, ctx: _, } => Some(self.infer_attribute(value, attr)?), - ast::ExprKind::BoolOp { op: _, values } => Some(self.infer_bool_ops(values)?), + ast::ExprKind::BoolOp { values, .. } => Some(self.infer_bool_ops(values)?), ast::ExprKind::BinOp { left, op, right } => Some(self.infer_bin_ops(left, op, right)?), ast::ExprKind::UnaryOp { op, operand } => Some(self.infer_unary_ops(op, operand)?), ast::ExprKind::Compare { @@ -242,7 +240,7 @@ impl<'a> Inferencer<'a> { variable_mapping, calls: self.calls, primitives: self.primitives, - return_type: self.return_type + return_type: self.return_type, }; let fun = FunSignature { args: fn_args @@ -291,7 +289,7 @@ impl<'a> Inferencer<'a> { variable_mapping, calls: self.calls, primitives: self.primitives, - return_type: self.return_type + return_type: self.return_type, }; let elt = new_context.fold_expr(elt)?; let generator = generators.pop().unwrap(); @@ -392,23 +390,11 @@ impl<'a> Inferencer<'a> { return Err("Integer out of bound".into()); } return Ok(Located { - location, + location: args[0].location, custom, - node: ExprKind::Call { - func: Box::new(Located { - custom: None, - location: func.location, - node: ExprKind::Name { id, ctx }, - }), - args: vec![Located { - location: args[0].location, - custom, - node: ExprKind::Constant { - value: ast::Constant::Int(val.clone()), - kind: kind.clone(), - }, - }], - keywords: vec![], + node: ExprKind::Constant { + value: ast::Constant::Int(val.clone()), + kind: kind.clone(), }, }); } diff --git a/nac3core/src/typecheck/typedef/mod.rs b/nac3core/src/typecheck/typedef/mod.rs index 421f58ce..6274c7c2 100644 --- a/nac3core/src/typecheck/typedef/mod.rs +++ b/nac3core/src/typecheck/typedef/mod.rs @@ -46,7 +46,6 @@ pub struct FunSignature { #[derive(Clone)] pub enum TypeEnum { TVar { - // TODO: upper/lower bound id: u32, }, TSeq { @@ -102,6 +101,17 @@ impl TypeEnum { TypeEnum::TFunc { .. } => "TFunc", } } + + pub fn is_concrete(&self) -> bool { + matches!( + self, + TypeEnum::TTuple { .. } + | TypeEnum::TList { .. } + | TypeEnum::TObj { .. } + | TypeEnum::TVirtual { .. } + | TypeEnum::TFunc { .. } + ) + } } pub struct Unifier { From 88c45172b27ecce80f83f47f3a76fabc07e41d54 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Thu, 22 Jul 2021 17:07:49 +0800 Subject: [PATCH 039/131] basic check for use-before-def --- nac3core/src/typecheck/function_check.rs | 175 ++++++++++++++++-- nac3core/src/typecheck/type_inferencer/mod.rs | 9 +- .../src/typecheck/type_inferencer/test.rs | 21 ++- 3 files changed, 179 insertions(+), 26 deletions(-) diff --git a/nac3core/src/typecheck/function_check.rs b/nac3core/src/typecheck/function_check.rs index 1e8a2de2..537656a2 100644 --- a/nac3core/src/typecheck/function_check.rs +++ b/nac3core/src/typecheck/function_check.rs @@ -1,20 +1,41 @@ use super::type_inferencer::Inferencer; use super::typedef::Type; -use rustpython_parser::ast::{self, Expr, ExprKind, StmtKind}; +use rustpython_parser::ast::{self, Expr, ExprKind, Stmt, StmtKind}; use std::iter::once; impl<'a> Inferencer<'a> { + fn check_pattern( + &mut self, + pattern: &Expr>, + defined_identifiers: &mut Vec, + ) { + match &pattern.node { + ExprKind::Name { id, .. } => { + if !defined_identifiers.contains(id) { + defined_identifiers.push(id.clone()); + } + } + ExprKind::Tuple { elts, .. } => { + for elt in elts.iter() { + self.check_pattern(elt, defined_identifiers); + } + } + _ => unimplemented!(), + } + } + fn check_expr( &mut self, expr: &Expr>, defined_identifiers: &[String], ) -> Result<(), String> { + // there are some cases where the custom field is None if let Some(ty) = &expr.custom { let ty = self.unifier.get_ty(*ty); let ty = ty.as_ref().borrow(); - if ty.is_concrete() { + if !ty.is_concrete() { return Err(format!( - "expected concrete type at {:?} but got {}", + "expected concrete type at {} but got {}", expr.location, ty.get_type_name() )); @@ -23,7 +44,7 @@ impl<'a> Inferencer<'a> { match &expr.node { ExprKind::Name { id, .. } => { if !defined_identifiers.contains(id) { - return Err(format!("unknown identifier {} (use before def?)", id)); + return Err(format!("unknown identifier {} (use before def?) at {}", id, expr.location)); } } ExprKind::List { elts, .. } @@ -34,14 +55,14 @@ impl<'a> Inferencer<'a> { } } ExprKind::Attribute { value, .. } => { - self.check_expr(value.as_ref(), defined_identifiers)?; + self.check_expr(value, defined_identifiers)?; } ExprKind::BinOp { left, right, .. } => { - self.check_expr(left.as_ref(), defined_identifiers)?; - self.check_expr(right.as_ref(), defined_identifiers)?; + self.check_expr(left, defined_identifiers)?; + self.check_expr(right, defined_identifiers)?; } ExprKind::UnaryOp { operand, .. } => { - self.check_expr(operand.as_ref(), defined_identifiers)?; + self.check_expr(operand, defined_identifiers)?; } ExprKind::Compare { left, comparators, .. @@ -51,13 +72,13 @@ impl<'a> Inferencer<'a> { } } ExprKind::Subscript { value, slice, .. } => { - self.check_expr(value.as_ref(), defined_identifiers)?; - self.check_expr(slice.as_ref(), defined_identifiers)?; + self.check_expr(value, defined_identifiers)?; + self.check_expr(slice, defined_identifiers)?; } ExprKind::IfExp { test, body, orelse } => { - self.check_expr(test.as_ref(), defined_identifiers)?; - self.check_expr(body.as_ref(), defined_identifiers)?; - self.check_expr(orelse.as_ref(), defined_identifiers)?; + self.check_expr(test, defined_identifiers)?; + self.check_expr(body, defined_identifiers)?; + self.check_expr(orelse, defined_identifiers)?; } ExprKind::Slice { lower, upper, step } => { for elt in [lower.as_ref(), upper.as_ref(), step.as_ref()] @@ -67,10 +88,132 @@ impl<'a> Inferencer<'a> { self.check_expr(elt, defined_identifiers)?; } } - ExprKind::ListComp { .. } => unimplemented!(), - ExprKind::Lambda { .. } => unimplemented!(), - _ => {} + ExprKind::Lambda { args, body } => { + let mut defined_identifiers = defined_identifiers.to_vec(); + for arg in args.args.iter() { + if !defined_identifiers.contains(&arg.node.arg) { + defined_identifiers.push(arg.node.arg.clone()); + } + } + self.check_expr(body, &defined_identifiers)?; + } + ExprKind::ListComp { + elt, generators, .. + } => { + // in our type inference stage, we already make sure that there is only 1 generator + let ast::Comprehension { + target, iter, ifs, .. + } = &generators[0]; + self.check_expr(iter, defined_identifiers)?; + let mut defined_identifiers = defined_identifiers.to_vec(); + self.check_pattern(target, &mut defined_identifiers); + for term in once(elt.as_ref()).chain(ifs.iter()) { + self.check_expr(term, &defined_identifiers)?; + } + } + ExprKind::Call { + func, + args, + keywords, + } => { + for expr in once(func.as_ref()) + .chain(args.iter()) + .chain(keywords.iter().map(|v| v.node.value.as_ref())) + { + self.check_expr(expr, defined_identifiers)?; + } + } + ExprKind::Constant { .. } => {} + _ => { + println!("{:?}", expr.node); + unimplemented!() + } } Ok(()) } + + fn check_stmt( + &mut self, + stmt: &Stmt>, + defined_identifiers: &mut Vec, + ) -> Result { + match &stmt.node { + StmtKind::For { + target, + iter, + body, + orelse, + .. + } => { + self.check_expr(iter, defined_identifiers)?; + for stmt in orelse.iter() { + self.check_stmt(stmt, defined_identifiers)?; + } + let mut defined_identifiers = defined_identifiers.clone(); + self.check_pattern(target, &mut defined_identifiers); + for stmt in body.iter() { + self.check_stmt(stmt, &mut defined_identifiers)?; + } + Ok(false) + } + StmtKind::If { test, body, orelse } => { + self.check_expr(test, defined_identifiers)?; + let mut body_identifiers = defined_identifiers.clone(); + let mut orelse_identifiers = defined_identifiers.clone(); + let body_returned = self.check_block(body, &mut body_identifiers)?; + let orelse_returned = self.check_block(orelse, &mut orelse_identifiers)?; + + for ident in body_identifiers.iter() { + if !defined_identifiers.contains(ident) && orelse_identifiers.contains(ident) { + defined_identifiers.push(ident.clone()) + } + } + Ok(body_returned && orelse_returned) + } + StmtKind::While { test, body, orelse } => { + self.check_expr(test, defined_identifiers)?; + let mut defined_identifiers = defined_identifiers.clone(); + self.check_block(body, &mut defined_identifiers)?; + self.check_block(orelse, &mut defined_identifiers)?; + Ok(false) + } + StmtKind::Expr { value } => { + self.check_expr(value, defined_identifiers)?; + Ok(false) + } + StmtKind::Assign { targets, value, .. } => { + self.check_expr(value, defined_identifiers)?; + for target in targets { + self.check_pattern(target, defined_identifiers); + } + Ok(false) + } + StmtKind::AnnAssign { target, value, .. } => { + if let Some(value) = value { + self.check_expr(value, defined_identifiers)?; + self.check_pattern(target, defined_identifiers); + } + Ok(false) + } + // break, return, raise, etc. + _ => Ok(false), + } + } + + pub fn check_block( + &mut self, + block: &[Stmt>], + defined_identifiers: &mut Vec, + ) -> Result { + let mut ret = false; + for stmt in block { + if ret { + return Err(format!("dead code at {:?}", stmt.location)); + } + if self.check_stmt(stmt, defined_identifiers)? { + ret = true; + } + } + Ok(ret) + } } diff --git a/nac3core/src/typecheck/type_inferencer/mod.rs b/nac3core/src/typecheck/type_inferencer/mod.rs index b7d296cc..1f27daba 100644 --- a/nac3core/src/typecheck/type_inferencer/mod.rs +++ b/nac3core/src/typecheck/type_inferencer/mod.rs @@ -107,6 +107,7 @@ impl<'a> fold::Fold<()> for Inferencer<'a> { } } ast::StmtKind::AnnAssign { .. } | ast::StmtKind::Expr { .. } => {} + ast::StmtKind::Break | ast::StmtKind::Continue => {} ast::StmtKind::Return { value } => match (value, self.return_type) { (Some(v), Some(v1)) => { self.unifier.unify(v.custom.unwrap(), v1)?; @@ -130,12 +131,14 @@ impl<'a> fold::Fold<()> for Inferencer<'a> { func, args, keywords, - } => self.fold_call(node.location, *func, args, keywords)?, + } => { + return self.fold_call(node.location, *func, args, keywords); + } ast::ExprKind::Lambda { args, body } => { - self.fold_lambda(node.location, *args, *body)? + return self.fold_lambda(node.location, *args, *body); } ast::ExprKind::ListComp { elt, generators } => { - self.fold_listcomp(node.location, *elt, generators)? + return self.fold_listcomp(node.location, *elt, generators); } _ => fold::fold_expr(self, node)?, }; diff --git a/nac3core/src/typecheck/type_inferencer/test.rs b/nac3core/src/typecheck/type_inferencer/test.rs index c85408f7..be9b1506 100644 --- a/nac3core/src/typecheck/type_inferencer/test.rs +++ b/nac3core/src/typecheck/type_inferencer/test.rs @@ -8,12 +8,12 @@ use rustpython_parser::parser::parse_program; use test_case::test_case; struct Resolver { - type_mapping: HashMap, + identifier_mapping: HashMap, } impl SymbolResolver for Resolver { fn get_symbol_type(&mut self, str: &str) -> Option { - self.type_mapping.get(str).cloned() + self.identifier_mapping.get(str).cloned() } fn parse_type_name(&mut self, _: &ast::Expr<()>) -> Option { @@ -35,12 +35,13 @@ struct TestEnvironment { pub calls: Vec>, pub primitives: PrimitiveStore, pub id_to_name: HashMap, + pub identifier_mapping: HashMap, } impl TestEnvironment { fn new() -> TestEnvironment { let mut unifier = Unifier::new(); - let mut type_mapping = HashMap::new(); + let mut identifier_mapping = HashMap::new(); let int32 = unifier.add_ty(TypeEnum::TObj { obj_id: 0, fields: HashMap::new(), @@ -66,7 +67,7 @@ impl TestEnvironment { fields: HashMap::new(), params: HashMap::new(), }); - type_mapping.insert("None".into(), none); + identifier_mapping.insert("None".into(), none); let primitives = PrimitiveStore { int32, @@ -84,7 +85,7 @@ impl TestEnvironment { params: [(id, v0)].iter().cloned().collect(), }); - type_mapping.insert( + identifier_mapping.insert( "Foo".into(), unifier.add_ty(TypeEnum::TFunc(FunSignature { args: vec![], @@ -105,13 +106,14 @@ impl TestEnvironment { .cloned() .collect(); - let resolver = Box::new(Resolver { type_mapping }) as Box; + let resolver = Box::new(Resolver { identifier_mapping: identifier_mapping.clone() }) as Box; TestEnvironment { unifier, resolver, primitives, id_to_name, + identifier_mapping, calls: Vec::new(), } } @@ -168,15 +170,20 @@ impl TestEnvironment { [("a", "list[int32]"), ("b", "list[bool]"), ("f", "fn[[x=int32], bool]")].iter().cloned().collect() ; "listcomp test")] fn test_basic(source: &str, mapping: HashMap<&str, &str>) { + println!("source:\n{}", source); let mut env = TestEnvironment::new(); let id_to_name = std::mem::take(&mut env.id_to_name); + let mut defined_identifiers = env.identifier_mapping.keys().cloned().collect(); let mut inferencer = env.get_inferencer(); let statements = parse_program(source).unwrap(); - statements + let statements = statements .into_iter() .map(|v| inferencer.fold_stmt(v)) .collect::, _>>() .unwrap(); + + inferencer.check_block(&statements, &mut defined_identifiers).unwrap(); + for (k, v) in inferencer.variable_mapping.iter() { let name = inferencer.unifier.stringify( *v, From ddcf4b7e3912834b14a02c1261fc58fa792ece19 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Fri, 23 Jul 2021 15:25:44 +0800 Subject: [PATCH 040/131] refactored typedef --- nac3core/rustfmt.toml | 1 + nac3core/src/typecheck/function_check.rs | 2 +- nac3core/src/typecheck/type_inferencer/mod.rs | 249 +++--- nac3core/src/typecheck/typedef/mod.rs | 749 ++++++++---------- nac3core/src/typecheck/typedef/test.rs | 83 +- 5 files changed, 498 insertions(+), 586 deletions(-) create mode 100644 nac3core/rustfmt.toml diff --git a/nac3core/rustfmt.toml b/nac3core/rustfmt.toml new file mode 100644 index 00000000..cfaa54ae --- /dev/null +++ b/nac3core/rustfmt.toml @@ -0,0 +1 @@ + use_small_heuristics = "Max" diff --git a/nac3core/src/typecheck/function_check.rs b/nac3core/src/typecheck/function_check.rs index 537656a2..7d3a3729 100644 --- a/nac3core/src/typecheck/function_check.rs +++ b/nac3core/src/typecheck/function_check.rs @@ -32,7 +32,7 @@ impl<'a> Inferencer<'a> { // there are some cases where the custom field is None if let Some(ty) = &expr.custom { let ty = self.unifier.get_ty(*ty); - let ty = ty.as_ref().borrow(); + let ty = ty.as_ref(); if !ty.is_concrete() { return Err(format!( "expected concrete type at {} but got {}", diff --git a/nac3core/src/typecheck/type_inferencer/mod.rs b/nac3core/src/typecheck/type_inferencer/mod.rs index 1f27daba..8e5aec2a 100644 --- a/nac3core/src/typecheck/type_inferencer/mod.rs +++ b/nac3core/src/typecheck/type_inferencer/mod.rs @@ -54,17 +54,11 @@ impl<'a> fold::Fold<()> for Inferencer<'a> { fn fold_stmt(&mut self, node: ast::Stmt<()>) -> Result, Self::Error> { let stmt = match node.node { // we don't want fold over type annotation - ast::StmtKind::AnnAssign { - target, - annotation, - value, - simple, - } => { + ast::StmtKind::AnnAssign { target, annotation, value, simple } => { let target = Box::new(fold::fold_expr(self, *target)?); let value = if let Some(v) = value { let ty = Box::new(fold::fold_expr(self, *v)?); - self.unifier - .unify(target.custom.unwrap(), ty.custom.unwrap())?; + self.unifier.unify(target.custom.unwrap(), ty.custom.unwrap())?; Some(ty) } else { None @@ -73,37 +67,27 @@ impl<'a> fold::Fold<()> for Inferencer<'a> { .resolver .parse_type_name(annotation.as_ref()) .ok_or_else(|| "cannot parse type name".to_string())?; - self.unifier - .unify(annotation_type, target.custom.unwrap())?; + self.unifier.unify(annotation_type, target.custom.unwrap())?; let annotation = Box::new(NaiveFolder().fold_expr(*annotation)?); Located { location: node.location, custom: None, - node: ast::StmtKind::AnnAssign { - target, - annotation, - value, - simple, - }, + node: ast::StmtKind::AnnAssign { target, annotation, value, simple }, } } _ => fold::fold_stmt(self, node)?, }; match &stmt.node { ast::StmtKind::For { target, iter, .. } => { - let list = self.unifier.add_ty(TypeEnum::TList { - ty: target.custom.unwrap(), - }); + let list = self.unifier.add_ty(TypeEnum::TList { ty: target.custom.unwrap() }); self.unifier.unify(list, iter.custom.unwrap())?; } ast::StmtKind::If { test, .. } | ast::StmtKind::While { test, .. } => { - self.unifier - .unify(test.custom.unwrap(), self.primitives.bool)?; + self.unifier.unify(test.custom.unwrap(), self.primitives.bool)?; } ast::StmtKind::Assign { targets, value, .. } => { for target in targets.iter() { - self.unifier - .unify(target.custom.unwrap(), value.custom.unwrap())?; + self.unifier.unify(target.custom.unwrap(), value.custom.unwrap())?; } } ast::StmtKind::AnnAssign { .. } | ast::StmtKind::Expr { .. } => {} @@ -127,11 +111,7 @@ impl<'a> fold::Fold<()> for Inferencer<'a> { fn fold_expr(&mut self, node: ast::Expr<()>) -> Result, Self::Error> { let expr = match node.node { - ast::ExprKind::Call { - func, - args, - keywords, - } => { + ast::ExprKind::Call { func, args, keywords } => { return self.fold_call(node.location, *func, args, keywords); } ast::ExprKind::Lambda { args, body } => { @@ -147,19 +127,15 @@ impl<'a> fold::Fold<()> for Inferencer<'a> { ast::ExprKind::Name { id, .. } => Some(self.infer_identifier(id)?), ast::ExprKind::List { elts, .. } => Some(self.infer_list(elts)?), ast::ExprKind::Tuple { elts, .. } => Some(self.infer_tuple(elts)?), - ast::ExprKind::Attribute { - value, - attr, - ctx: _, - } => Some(self.infer_attribute(value, attr)?), + ast::ExprKind::Attribute { value, attr, ctx: _ } => { + Some(self.infer_attribute(value, attr)?) + } ast::ExprKind::BoolOp { values, .. } => Some(self.infer_bool_ops(values)?), ast::ExprKind::BinOp { left, op, right } => Some(self.infer_bin_ops(left, op, right)?), ast::ExprKind::UnaryOp { op, operand } => Some(self.infer_unary_ops(op, operand)?), - ast::ExprKind::Compare { - left, - ops, - comparators, - } => Some(self.infer_compare(left, ops, comparators)?), + ast::ExprKind::Compare { left, ops, comparators } => { + Some(self.infer_compare(left, ops, comparators)?) + } ast::ExprKind::Subscript { value, slice, .. } => { Some(self.infer_subscript(value.as_ref(), slice.as_ref())?) } @@ -172,11 +148,7 @@ impl<'a> fold::Fold<()> for Inferencer<'a> { ast::ExprKind::Slice { .. } => None, // we don't need it for slice _ => return Err("not supported yet".into()), }; - Ok(ast::Expr { - custom, - location: expr.location, - node: expr.node, - }) + Ok(ast::Expr { custom, location: expr.location, node: expr.node }) } } @@ -196,16 +168,12 @@ impl<'a> Inferencer<'a> { params: Vec, ret: Type, ) -> InferenceResult { - let call = Rc::new(Call { - posargs: params, - kwargs: HashMap::new(), - ret, - fun: RefCell::new(None), - }); + let call = + Rc::new(Call { posargs: params, kwargs: HashMap::new(), ret, fun: RefCell::new(None) }); self.calls.push(call.clone()); - let call = self.unifier.add_ty(TypeEnum::TCall { calls: vec![call] }); + let call = self.unifier.add_ty(TypeEnum::TCall(vec![call].into())); let fields = once((method, call)).collect(); - let record = self.unifier.add_ty(TypeEnum::TRecord { fields }); + let record = self.unifier.add_record(fields); self.constrain(obj, record)?; Ok(ret) } @@ -248,11 +216,7 @@ impl<'a> Inferencer<'a> { let fun = FunSignature { args: fn_args .iter() - .map(|(k, ty)| FuncArg { - name: k.clone(), - ty: *ty, - is_optional: false, - }) + .map(|(k, ty)| FuncArg { name: k.clone(), ty: *ty, is_optional: false }) .collect(), ret, vars: Default::default(), @@ -266,10 +230,7 @@ impl<'a> Inferencer<'a> { } Ok(Located { location, - node: ExprKind::Lambda { - args: args.into(), - body: body.into(), - }, + node: ExprKind::Lambda { args: args.into(), body: body.into() }, custom: Some(self.unifier.add_ty(TypeEnum::TFunc(fun))), }) } @@ -282,7 +243,7 @@ impl<'a> Inferencer<'a> { ) -> Result>, String> { if generators.len() != 1 { return Err( - "Only 1 generator statement for list comprehension is supported.".to_string(), + "Only 1 generator statement for list comprehension is supported.".to_string() ); } let variable_mapping = self.variable_mapping.clone(); @@ -309,22 +270,16 @@ impl<'a> Inferencer<'a> { // iter should be a list of targets... // actually it should be an iterator of targets, but we don't have iter type for now - let list = new_context.unifier.add_ty(TypeEnum::TList { - ty: target.custom.unwrap(), - }); + let list = new_context.unifier.add_ty(TypeEnum::TList { ty: target.custom.unwrap() }); new_context.unifier.unify(iter.custom.unwrap(), list)?; // if conditions should be bool for v in ifs.iter() { - new_context - .unifier - .unify(v.custom.unwrap(), new_context.primitives.bool)?; + new_context.unifier.unify(v.custom.unwrap(), new_context.primitives.bool)?; } Ok(Located { location, - custom: Some(new_context.unifier.add_ty(TypeEnum::TList { - ty: elt.custom.unwrap(), - })), + custom: Some(new_context.unifier.add_ty(TypeEnum::TList { ty: elt.custom.unwrap() })), node: ExprKind::ListComp { elt: Box::new(elt), generators: vec![ast::Comprehension { @@ -344,77 +299,68 @@ impl<'a> Inferencer<'a> { mut args: Vec>, keywords: Vec>, ) -> Result>, String> { - let func = if let Located { - location: func_location, - custom, - node: ExprKind::Name { id, ctx }, - } = func - { - // handle special functions that cannot be typed in the usual way... - if id == "virtual" { - if args.is_empty() || args.len() > 2 || !keywords.is_empty() { - return Err("`virtual` can only accept 1/2 positional arguments.".to_string()); - } - let arg0 = self.fold_expr(args.remove(0))?; - let ty = if let Some(arg) = args.pop() { - self.resolver - .parse_type_name(&arg) - .ok_or_else(|| "error parsing type".to_string())? - } else { - self.unifier.get_fresh_var().0 - }; - let custom = Some(self.unifier.add_ty(TypeEnum::TVirtual { ty })); - return Ok(Located { - location, - custom, - node: ExprKind::Call { - func: Box::new(Located { - custom: None, - location: func.location, - node: ExprKind::Name { id, ctx }, - }), - args: vec![arg0], - keywords: vec![], - }, - }); - } - // int64 is special because its argument can be a constant larger than int32 - if id == "int64" && args.len() == 1 { - if let ExprKind::Constant { - value: ast::Constant::Int(val), - kind, - } = &args[0].node - { - let int64: Result = val.try_into(); - let custom; - if int64.is_ok() { - custom = Some(self.primitives.int64); - } else { - return Err("Integer out of bound".into()); + let func = + if let Located { location: func_location, custom, node: ExprKind::Name { id, ctx } } = + func + { + // handle special functions that cannot be typed in the usual way... + if id == "virtual" { + if args.is_empty() || args.len() > 2 || !keywords.is_empty() { + return Err( + "`virtual` can only accept 1/2 positional arguments.".to_string() + ); } + let arg0 = self.fold_expr(args.remove(0))?; + let ty = if let Some(arg) = args.pop() { + self.resolver + .parse_type_name(&arg) + .ok_or_else(|| "error parsing type".to_string())? + } else { + self.unifier.get_fresh_var().0 + }; + let custom = Some(self.unifier.add_ty(TypeEnum::TVirtual { ty })); return Ok(Located { - location: args[0].location, + location, custom, - node: ExprKind::Constant { - value: ast::Constant::Int(val.clone()), - kind: kind.clone(), + node: ExprKind::Call { + func: Box::new(Located { + custom: None, + location: func.location, + node: ExprKind::Name { id, ctx }, + }), + args: vec![arg0], + keywords: vec![], }, }); } - } - Located { - location: func_location, - custom, - node: ExprKind::Name { id, ctx }, - } - } else { - func - }; + // int64 is special because its argument can be a constant larger than int32 + if id == "int64" && args.len() == 1 { + if let ExprKind::Constant { value: ast::Constant::Int(val), kind } = + &args[0].node + { + let int64: Result = val.try_into(); + let custom; + if int64.is_ok() { + custom = Some(self.primitives.int64); + } else { + return Err("Integer out of bound".into()); + } + return Ok(Located { + location: args[0].location, + custom, + node: ExprKind::Constant { + value: ast::Constant::Int(val.clone()), + kind: kind.clone(), + }, + }); + } + } + Located { location: func_location, custom, node: ExprKind::Name { id, ctx } } + } else { + func + }; let func = Box::new(self.fold_expr(func)?); - let args = args - .into_iter() - .map(|v| self.fold_expr(v)) - .collect::, _>>()?; + let args = args.into_iter().map(|v| self.fold_expr(v)).collect::, _>>()?; let keywords = keywords .into_iter() .map(|v| fold::fold_keyword(self, v)) @@ -430,18 +376,10 @@ impl<'a> Inferencer<'a> { ret, }); self.calls.push(call.clone()); - let call = self.unifier.add_ty(TypeEnum::TCall { calls: vec![call] }); + let call = self.unifier.add_ty(TypeEnum::TCall(vec![call].into())); self.unifier.unify(func.custom.unwrap(), call)?; - Ok(Located { - location, - custom: Some(ret), - node: ExprKind::Call { - func, - args, - keywords, - }, - }) + Ok(Located { location, custom: Some(ret), node: ExprKind::Call { func, args, keywords } }) } fn infer_identifier(&mut self, id: &str) -> InferenceResult { @@ -493,7 +431,7 @@ impl<'a> Inferencer<'a> { fn infer_attribute(&mut self, value: &ast::Expr>, attr: &str) -> InferenceResult { let (attr_ty, _) = self.unifier.get_fresh_var(); let fields = once((attr.to_string(), attr_ty)).collect(); - let record = self.unifier.add_ty(TypeEnum::TRecord { fields }); + let record = self.unifier.add_record(fields); self.constrain(value.custom.unwrap(), record)?; Ok(attr_ty) } @@ -540,9 +478,8 @@ impl<'a> Inferencer<'a> { ) -> InferenceResult { let boolean = self.primitives.bool; for (a, b, c) in izip!(once(left).chain(comparators), comparators, ops) { - let method = comparison_name(c) - .ok_or_else(|| "unsupported comparator".to_string())? - .to_string(); + let method = + comparison_name(c).ok_or_else(|| "unsupported comparator".to_string())?.to_string(); self.build_method_call(method, a.custom.unwrap(), vec![b.custom.unwrap()], boolean)?; } Ok(boolean) @@ -556,26 +493,18 @@ impl<'a> Inferencer<'a> { let ty = self.unifier.get_fresh_var().0; match &slice.node { ast::ExprKind::Slice { lower, upper, step } => { - for v in [lower.as_ref(), upper.as_ref(), step.as_ref()] - .iter() - .flatten() - { + for v in [lower.as_ref(), upper.as_ref(), step.as_ref()].iter().flatten() { self.constrain(v.custom.unwrap(), self.primitives.int32)?; } let list = self.unifier.add_ty(TypeEnum::TList { ty }); self.constrain(value.custom.unwrap(), list)?; Ok(list) } - ast::ExprKind::Constant { - value: ast::Constant::Int(val), - .. - } => { + ast::ExprKind::Constant { value: ast::Constant::Int(val), .. } => { // the index is a constant, so value can be a sequence. - let ind: i32 = val - .try_into() - .map_err(|_| "Index must be int32".to_string())?; + let ind: i32 = val.try_into().map_err(|_| "Index must be int32".to_string())?; let map = once((ind, ty)).collect(); - let seq = self.unifier.add_ty(TypeEnum::TSeq { map }); + let seq = self.unifier.add_sequence(map); self.constrain(value.custom.unwrap(), seq)?; Ok(ty) } diff --git a/nac3core/src/typecheck/typedef/mod.rs b/nac3core/src/typecheck/typedef/mod.rs index 6274c7c2..52b26df2 100644 --- a/nac3core/src/typecheck/typedef/mod.rs +++ b/nac3core/src/typecheck/typedef/mod.rs @@ -1,4 +1,5 @@ -use itertools::Itertools; +use itertools::{chain, zip, Itertools}; +use std::borrow::Cow; use std::cell::RefCell; use std::collections::HashMap; use std::iter::once; @@ -12,9 +13,6 @@ mod test; /// Handle for a type, implementated as a key in the unification table. pub type Type = UnificationKey; -#[derive(Clone)] -pub struct TypeCell(Rc>); - pub type Mapping = HashMap; type VarMap = Mapping; @@ -40,16 +38,20 @@ pub struct FunSignature { pub vars: VarMap, } -// We use a lot of `Rc`/`RefCell`s here as we want to simplify our code. -// We may not really need so much `Rc`s, but we would have to do complicated -// stuffs otherwise. +#[derive(Clone)] +pub enum TypeVarMeta { + Generic, + Sequence(RefCell>), + Record(RefCell>), +} + #[derive(Clone)] pub enum TypeEnum { TVar { id: u32, - }, - TSeq { - map: Mapping, + meta: TypeVarMeta, + // empty indicates no restriction + range: RefCell>, }, TTuple { ty: Vec, @@ -57,9 +59,6 @@ pub enum TypeEnum { TList { ty: Type, }, - TRecord { - fields: Mapping, - }, TObj { obj_id: usize, fields: Mapping, @@ -68,33 +67,16 @@ pub enum TypeEnum { TVirtual { ty: Type, }, - TCall { - calls: Vec>, - }, + TCall(RefCell>>), TFunc(FunSignature), } -// Order: -// TVar -// |--> TSeq -// | |--> TTuple -// | `--> TList -// |--> TRecord -// | |--> TObj -// | `--> TVirtual -// `--> TCall -// `--> TFunc - impl TypeEnum { pub fn get_type_name(&self) -> &'static str { - // this function is for debugging only... - // a proper to_str implementation requires the context match self { TypeEnum::TVar { .. } => "TVar", - TypeEnum::TSeq { .. } => "TSeq", TypeEnum::TTuple { .. } => "TTuple", TypeEnum::TList { .. } => "TList", - TypeEnum::TRecord { .. } => "TRecord", TypeEnum::TObj { .. } => "TObj", TypeEnum::TVirtual { .. } => "TVirtual", TypeEnum::TCall { .. } => "TCall", @@ -103,176 +85,168 @@ impl TypeEnum { } pub fn is_concrete(&self) -> bool { - matches!( - self, - TypeEnum::TTuple { .. } - | TypeEnum::TList { .. } - | TypeEnum::TObj { .. } - | TypeEnum::TVirtual { .. } - | TypeEnum::TFunc { .. } - ) + !matches!(self, TypeEnum::TVar { .. }) } } pub struct Unifier { - unification_table: UnificationTable>>, + unification_table: UnificationTable>, var_id: u32, } impl Unifier { /// Get an empty unifier pub fn new() -> Unifier { - Unifier { - unification_table: UnificationTable::new(), - var_id: 0, - } + Unifier { unification_table: UnificationTable::new(), var_id: 0 } } /// Register a type to the unifier. /// Returns a key in the unification_table. pub fn add_ty(&mut self, a: TypeEnum) -> Type { - self.unification_table.new_key(Rc::new(a.into())) + self.unification_table.new_key(Rc::new(a)) + } + + pub fn add_record(&mut self, fields: Mapping) -> Type { + let id = self.var_id + 1; + self.var_id += 1; + self.add_ty(TypeEnum::TVar { + id, + range: vec![].into(), + meta: TypeVarMeta::Record(fields.into()), + }) + } + + pub fn add_sequence(&mut self, sequence: Mapping) -> Type { + let id = self.var_id + 1; + self.var_id += 1; + self.add_ty(TypeEnum::TVar { + id, + range: vec![].into(), + meta: TypeVarMeta::Sequence(sequence.into()), + }) } /// Get the TypeEnum of a type. - pub fn get_ty(&mut self, a: Type) -> Rc> { + pub fn get_ty(&mut self, a: Type) -> Rc { self.unification_table.probe_value(a).clone() } - /// Unify two types, i.e. a = b. - pub fn unify(&mut self, a: Type, b: Type) -> Result<(), String> { - self.unify_impl(a, b, false) + pub fn get_fresh_var(&mut self) -> (Type, u32) { + self.get_fresh_var_with_range(&[]) } /// Get a fresh type variable. - pub fn get_fresh_var(&mut self) -> (Type, u32) { + pub fn get_fresh_var_with_range(&mut self, range: &[Type]) -> (Type, u32) { let id = self.var_id + 1; self.var_id += 1; - (self.add_ty(TypeEnum::TVar { id }), id) + let range = range.to_vec().into(); + (self.add_ty(TypeEnum::TVar { id, range, meta: TypeVarMeta::Generic }), id) } - /// Get string representation of the type - pub fn stringify(&mut self, ty: Type, obj_to_name: &mut F, var_to_name: &mut G) -> String - where - F: FnMut(usize) -> String, - G: FnMut(u32) -> String, - { - let ty = self.unification_table.probe_value(ty).clone(); - let ty = ty.as_ref().borrow(); - match &*ty { - TypeEnum::TVar { id } => var_to_name(*id), - TypeEnum::TSeq { map } => { - let mut fields = map.iter().map(|(k, v)| { - format!("{}={}", k, self.stringify(*v, obj_to_name, var_to_name)) - }); - format!("seq[{}]", fields.join(", ")) - } - TypeEnum::TTuple { ty } => { - let mut fields = ty - .iter() - .map(|v| self.stringify(*v, obj_to_name, var_to_name)); - format!("tuple[{}]", fields.join(", ")) - } - TypeEnum::TList { ty } => { - format!("list[{}]", self.stringify(*ty, obj_to_name, var_to_name)) - } - TypeEnum::TVirtual { ty } => { - format!("virtual[{}]", self.stringify(*ty, obj_to_name, var_to_name)) - } - TypeEnum::TRecord { fields } => { - let mut fields = fields.iter().map(|(k, v)| { - format!("{}={}", k, self.stringify(*v, obj_to_name, var_to_name)) - }); - format!("record[{}]", fields.join(", ")) - } - TypeEnum::TObj { obj_id, params, .. } => { - let name = obj_to_name(*obj_id); - if !params.is_empty() { - let mut params = params - .values() - .map(|v| self.stringify(*v, obj_to_name, var_to_name)); - format!("{}[{}]", name, params.join(", ")) - } else { - name - } - } - TypeEnum::TCall { .. } => "call".to_owned(), - TypeEnum::TFunc(signature) => { - let params = signature - .args - .iter() - .map(|arg| { - format!( - "{}={}", - arg.name, - self.stringify(arg.ty, obj_to_name, var_to_name) - ) - }) - .join(", "); - let ret = self.stringify(signature.ret, obj_to_name, var_to_name); - format!("fn[[{}], {}]", params, ret) - } + pub fn unify(&mut self, a: Type, b: Type) -> Result<(), String> { + if self.unification_table.unioned(a, b) { + Ok(()) + } else { + self.unify_impl(a, b, false) } } fn unify_impl(&mut self, a: Type, b: Type, swapped: bool) -> Result<(), String> { use TypeEnum::*; - let (ty_a_cell, ty_b_cell) = { - if self.unification_table.unioned(a, b) { - return Ok(()); - } + use TypeVarMeta::*; + let (ty_a, ty_b) = { ( self.unification_table.probe_value(a).clone(), self.unification_table.probe_value(b).clone(), ) }; - - let (ty_a, ty_b) = { (ty_a_cell.borrow(), ty_b_cell.borrow()) }; - match (&*ty_a, &*ty_b) { - (TypeEnum::TVar { .. }, _) => { + (TVar { meta: meta1, range: range1, .. }, TVar { meta: meta2, range: range2, .. }) => { self.occur_check(a, b)?; - self.set_a_to_b(a, b); - } - (TSeq { map: map1 }, TSeq { .. }) => { - self.occur_check(a, b)?; - drop(ty_b); - if let TypeEnum::TSeq { map: map2 } = &mut *ty_b_cell.as_ref().borrow_mut() { - // unify them to map2 - for (key, value) in map1.iter() { - if let Some(ty) = map2.get(key) { - self.unify(*ty, *value)?; - } else { - map2.insert(*key, *value); + self.occur_check(b, a)?; + match (meta1, meta2) { + (Generic, _) => {} + (_, Generic) => { + return self.unify_impl(b, a, true); + } + (Record(fields1), Record(fields2)) => { + let mut fields2 = fields2.borrow_mut(); + for (key, value) in fields1.borrow().iter() { + if let Some(ty) = fields2.get(key) { + self.unify(*ty, *value)?; + } else { + fields2.insert(key.clone(), *value); + } } } - } else { - unreachable!() + (Sequence(map1), Sequence(map2)) => { + let mut map2 = map2.borrow_mut(); + for (key, value) in map1.borrow().iter() { + if let Some(ty) = map2.get(key) { + self.unify(*ty, *value)?; + } else { + map2.insert(*key, *value); + } + } + } + _ => { + return Err("Incompatible".to_string()); + } + } + let range1 = range1.borrow(); + // new range is the intersection of them + // empty range indicates no constraint + if !range1.is_empty() { + let old_range2 = range2.take(); + let mut range2 = range2.borrow_mut(); + if old_range2.is_empty() { + range2.extend_from_slice(&range1); + } + for v1 in old_range2.iter() { + for v2 in range1.iter() { + if !self.shape_match(*v1, *v2) { + continue; + } + self.unify(*v1, *v2)?; + range2.push(*v2); + } + } + if range2.is_empty() { + return Err( + "cannot unify type variables with incompatible value range".to_string() + ); + } } self.set_a_to_b(a, b); } - (TSeq { map: map1 }, TTuple { ty: types }) => { + (TVar { meta: Generic, id, range, .. }, _) => { self.occur_check(a, b)?; - let len = types.len() as i32; - for (k, v) in map1.iter() { + self.check_var_range(*id, b, &range.borrow())?; + self.set_a_to_b(a, b); + } + (TVar { meta: Sequence(map), id, range, .. }, TTuple { ty }) => { + self.occur_check(a, b)?; + let len = ty.len() as i32; + for (k, v) in map.borrow().iter() { // handle negative index let ind = if *k < 0 { len + *k } else { *k }; if ind >= len || ind < 0 { return Err(format!( "Tuple index out of range. (Length: {}, Index: {})", - types.len(), - k + len, k )); } - self.unify(*v, types[ind as usize])?; + self.unify(*v, ty[ind as usize])?; } + self.check_var_range(*id, b, &range.borrow())?; self.set_a_to_b(a, b); } - (TSeq { map: map1 }, TList { ty }) => { + (TVar { meta: Sequence(map), id, range, .. }, TList { ty }) => { self.occur_check(a, b)?; - for v in map1.values() { + for v in map.borrow().values() { self.unify(*v, *ty)?; } + self.check_var_range(*id, b, &range.borrow())?; self.set_a_to_b(a, b); } (TTuple { ty: ty1 }, TTuple { ty: ty2 }) => { @@ -292,59 +266,32 @@ impl Unifier { self.unify(*ty1, *ty2)?; self.set_a_to_b(a, b); } - (TRecord { fields: fields1 }, TRecord { .. }) => { + (TVar { meta: Record(map), id, range, .. }, TObj { fields, .. }) => { self.occur_check(a, b)?; - drop(ty_b); - if let TypeEnum::TRecord { fields: fields2 } = &mut *ty_b_cell.as_ref().borrow_mut() - { - for (key, value) in fields1.iter() { - if let Some(ty) = fields2.get(key) { - self.unify(*ty, *value)?; - } else { - fields2.insert(key.clone(), *value); - } - } - } else { - unreachable!() - } - self.set_a_to_b(a, b); - } - ( - TRecord { fields: fields1 }, - TObj { - fields: fields2, .. - }, - ) => { - self.occur_check(a, b)?; - for (key, value) in fields1.iter() { - if let Some(ty) = fields2.get(key) { - self.unify(*ty, *value)?; + for (k, v) in map.borrow().iter() { + if let Some(ty) = fields.get(k) { + self.unify(*ty, *v)?; } else { - return Err(format!("No such attribute {}", key)); + return Err(format!("No such attribute {}", k)); } } + self.check_var_range(*id, b, &range.borrow())?; self.set_a_to_b(a, b); } - (TRecord { .. }, TVirtual { ty }) => { + (TVar { meta: Record(_), id, range, .. }, TVirtual { ty }) => { + // TODO: look at this rule self.occur_check(a, b)?; + self.check_var_range(*id, b, &range.borrow())?; self.unify(a, *ty)?; } ( - TObj { - obj_id: id1, - params: params1, - .. - }, - TObj { - obj_id: id2, - params: params2, - .. - }, + TObj { obj_id: id1, params: params1, .. }, + TObj { obj_id: id2, params: params2, .. }, ) => { if id1 != id2 { return Err(format!("Cannot unify objects with ID {} and {}", id1, id2)); } - for (x, y) in params1.values().zip(params2.values()) { + for (x, y) in zip(params1.values(), params2.values()) { self.unify(*x, *y)?; } self.set_a_to_b(a, b); @@ -353,16 +300,10 @@ impl Unifier { self.unify(*ty1, *ty2)?; self.set_a_to_b(a, b); } - (TCall { calls: c1 }, TCall { .. }) => { - drop(ty_b); - if let TypeEnum::TCall { calls: c2 } = &mut *ty_b_cell.as_ref().borrow_mut() { - c2.extend(c1.iter().cloned()); - } else { - unreachable!() - } - self.set_a_to_b(a, b); + (TCall(calls1), TCall(calls2)) => { + calls2.borrow_mut().extend_from_slice(&calls1.borrow()); } - (TCall { calls }, TFunc(signature)) => { + (TCall(calls), TFunc(signature)) => { self.occur_check(a, b)?; let required: Vec = signature .args @@ -371,29 +312,20 @@ impl Unifier { .map(|v| v.name.clone()) .rev() .collect(); - for c in calls { - let Call { - posargs, - kwargs, - ret, - fun, - } = c.as_ref(); + for c in calls.borrow().iter() { + let Call { posargs, kwargs, ret, fun } = c.as_ref(); let instantiated = self.instantiate_fun(b, signature); let signature; let r = self.get_ty(instantiated); - let r = r.as_ref().borrow(); + let r = r.as_ref(); if let TypeEnum::TFunc(s) = &*r { signature = s; } else { unreachable!(); } let mut required = required.clone(); - let mut all_names: Vec<_> = signature - .args - .iter() - .map(|v| (v.name.clone(), v.ty)) - .rev() - .collect(); + let mut all_names: Vec<_> = + signature.args.iter().map(|v| (v.name.clone(), v.ty)).rev().collect(); for (i, t) in posargs.iter().enumerate() { if signature.args.len() <= i { return Err("Too many arguments.".to_string()); @@ -451,6 +383,88 @@ impl Unifier { Ok(()) } + /// Get string representation of the type + pub fn stringify(&mut self, ty: Type, obj_to_name: &mut F, var_to_name: &mut G) -> String + where + F: FnMut(usize) -> String, + G: FnMut(u32) -> String, + { + use TypeVarMeta::*; + let ty = self.unification_table.probe_value(ty).clone(); + match ty.as_ref() { + TypeEnum::TVar { id, meta: Generic, .. } => var_to_name(*id), + TypeEnum::TVar { meta: Sequence(map), .. } => { + let fields = map.borrow().iter().map(|(k, v)| { + format!("{}={}", k, self.stringify(*v, obj_to_name, var_to_name)) + }).join(", "); + format!("seq[{}]", fields) + } + TypeEnum::TVar { meta: Record(fields), .. } => { + let fields = fields.borrow().iter().map(|(k, v)| { + format!("{}={}", k, self.stringify(*v, obj_to_name, var_to_name)) + }).join(", "); + format!("record[{}]", fields) + } + TypeEnum::TTuple { ty } => { + let mut fields = ty + .iter() + .map(|v| self.stringify(*v, obj_to_name, var_to_name)); + format!("tuple[{}]", fields.join(", ")) + } + TypeEnum::TList { ty } => { + format!("list[{}]", self.stringify(*ty, obj_to_name, var_to_name)) + } + TypeEnum::TVirtual { ty } => { + format!("virtual[{}]", self.stringify(*ty, obj_to_name, var_to_name)) + } + TypeEnum::TObj { obj_id, params, .. } => { + let name = obj_to_name(*obj_id); + if !params.is_empty() { + let mut params = params + .values() + .map(|v| self.stringify(*v, obj_to_name, var_to_name)); + format!("{}[{}]", name, params.join(", ")) + } else { + name + } + } + TypeEnum::TCall { .. } => "call".to_owned(), + TypeEnum::TFunc(signature) => { + let params = signature + .args + .iter() + .map(|arg| { + format!( + "{}={}", + arg.name, + self.stringify(arg.ty, obj_to_name, var_to_name) + ) + }) + .join(", "); + let ret = self.stringify(signature.ret, obj_to_name, var_to_name); + format!("fn[[{}], {}]", params, ret) + } + } + } + + fn check_var_range(&mut self, id: u32, b: Type, range: &[Type]) -> Result<(), String> { + let mut in_range = range.is_empty(); + for t in range.iter() { + if self.shape_match(*t, b) { + self.unify(*t, b)?; + in_range = true; + } + } + if !in_range { + return Err(format!( + "Cannot unify {} with {} due to incompatible value range", + id, + self.get_ty(b).get_type_name() + )); + } + Ok(()) + } + fn set_a_to_b(&mut self, a: Type, b: Type) { // unify a and b together, and set the value to b's value. let table = &mut self.unification_table; @@ -460,77 +474,40 @@ impl Unifier { } fn incompatible_types(&self, a: &TypeEnum, b: &TypeEnum) -> Result<(), String> { - Err(format!( - "Cannot unify {} with {}", - a.get_type_name(), - b.get_type_name() - )) + Err(format!("Cannot unify {} with {}", a.get_type_name(), b.get_type_name())) } - fn occur_check(&mut self, a: Type, b: Type) -> Result<(), String> { - if self.unification_table.unioned(a, b) { - return Err("Recursive type is prohibited.".to_owned()); + /// Instantiate a function if it hasn't been instntiated. + /// Returns Some(T) where T is the instantiated type. + /// Returns None if the function is already instantiated. + fn instantiate_fun(&mut self, ty: Type, fun: &FunSignature) -> Type { + let mut instantiated = false; + let mut vars = Vec::new(); + for (k, v) in fun.vars.iter() { + if let TypeEnum::TVar { id, range, .. } = + self.unification_table.probe_value(*v).as_ref() + { + if k != id { + instantiated = true; + break; + } + // actually, if the first check succeeded, the function should be uninstatiated. + // The cloned values must be used and would not be wasted. + vars.push((*k, range.clone())); + } else { + instantiated = true; + break; + } + } + if instantiated { + ty + } else { + let mapping = vars + .into_iter() + .map(|(k, range)| (k, self.get_fresh_var_with_range(range.borrow().as_ref()).0)) + .collect(); + self.subst(ty, &mapping).unwrap_or(ty) } - let ty = self.unification_table.probe_value(b).clone(); - let ty = ty.borrow(); - - match &*ty { - TypeEnum::TVar { .. } => { - // TODO: occur check for bounds... - } - TypeEnum::TSeq { map } => { - for t in map.values() { - self.occur_check(a, *t)?; - } - } - TypeEnum::TTuple { ty } => { - for t in ty.iter() { - self.occur_check(a, *t)?; - } - } - TypeEnum::TList { ty } | TypeEnum::TVirtual { ty } => { - self.occur_check(a, *ty)?; - } - TypeEnum::TRecord { fields } => { - for t in fields.values() { - self.occur_check(a, *t)?; - } - } - TypeEnum::TObj { params: map, .. } => { - for t in map.values() { - self.occur_check(a, *t)?; - } - } - TypeEnum::TCall { calls } => { - for t in calls - .iter() - .map(|call| { - call.posargs - .iter() - .chain(call.kwargs.values()) - .chain(once(&call.ret)) - }) - .flatten() - { - self.occur_check(a, *t)?; - } - } - TypeEnum::TFunc(FunSignature { - args, - ret, - vars: params, - }) => { - for t in args - .iter() - .map(|v| &v.ty) - .chain(params.values()) - .chain(once(ret)) - { - self.occur_check(a, *t)?; - } - } - }; - Ok(()) } /// Substitute type variables within a type into other types. @@ -538,48 +515,41 @@ impl Unifier { /// If this returns None, the result type would be the original type /// (no substitution has to be done). fn subst(&mut self, a: Type, mapping: &VarMap) -> Option { - let ty_cell = self.unification_table.probe_value(a).clone(); - let ty = ty_cell.borrow(); + use TypeVarMeta::*; + let ty = self.unification_table.probe_value(a).clone(); // this function would only be called when we instantiate functions. // function type signature should ONLY contain concrete types and type // variables, i.e. things like TRecord, TCall should not occur, and we // should be safe to not implement the substitution for those variants. match &*ty { - TypeEnum::TVar { id } => mapping.get(&id).cloned(), - TypeEnum::TSeq { map } => self - .subst_map(map, mapping) - .map(|m| self.add_ty(TypeEnum::TSeq { map: m })), + TypeEnum::TVar { id, meta: Generic, .. } => mapping.get(&id).cloned(), TypeEnum::TTuple { ty } => { - let mut new_ty = None; + let mut new_ty = Cow::from(ty); for (i, t) in ty.iter().enumerate() { if let Some(t1) = self.subst(*t, mapping) { - if new_ty.is_none() { - new_ty = Some(ty.clone()); - } - new_ty.as_mut().unwrap()[i] = t1; + new_ty.to_mut()[i] = t1; } } - new_ty.map(|t| self.add_ty(TypeEnum::TTuple { ty: t })) + if matches!(new_ty, Cow::Owned(_)) { + Some(self.add_ty(TypeEnum::TTuple { ty: new_ty.into_owned() })) + } else { + None + } } - TypeEnum::TList { ty } => self - .subst(*ty, mapping) - .map(|t| self.add_ty(TypeEnum::TList { ty: t })), - TypeEnum::TVirtual { ty } => self - .subst(*ty, mapping) - .map(|t| self.add_ty(TypeEnum::TVirtual { ty: t })), - TypeEnum::TObj { - obj_id, - fields, - params, - } => { + TypeEnum::TList { ty } => { + self.subst(*ty, mapping).map(|t| self.add_ty(TypeEnum::TList { ty: t })) + } + TypeEnum::TVirtual { ty } => { + self.subst(*ty, mapping).map(|t| self.add_ty(TypeEnum::TVirtual { ty: t })) + } + TypeEnum::TObj { obj_id, fields, params } => { // Type variables in field types must be present in the type parameter. // If the mapping does not contain any type variables in the // parameter list, we don't need to substitute the fields. // This is also used to prevent infinite substitution... let need_subst = params.values().any(|v| { - let ty_cell = self.unification_table.probe_value(*v); - let ty = ty_cell.borrow(); - if let TypeEnum::TVar { id } = &*ty { + let ty = self.unification_table.probe_value(*v); + if let TypeEnum::TVar { id, .. } = ty.as_ref() { mapping.contains_key(&id) } else { false @@ -587,50 +557,29 @@ impl Unifier { }); if need_subst { let obj_id = *obj_id; - let params = self - .subst_map(¶ms, mapping) - .unwrap_or_else(|| params.clone()); - let fields = self - .subst_map(&fields, mapping) - .unwrap_or_else(|| fields.clone()); - Some(self.add_ty(TypeEnum::TObj { - obj_id, - params, - fields, - })) + let params = self.subst_map(¶ms, mapping).unwrap_or_else(|| params.clone()); + let fields = self.subst_map(&fields, mapping).unwrap_or_else(|| fields.clone()); + Some(self.add_ty(TypeEnum::TObj { obj_id, params, fields })) } else { None } } - TypeEnum::TFunc(FunSignature { - args, - ret, - vars: params, - }) => { + TypeEnum::TFunc(FunSignature { args, ret, vars: params }) => { let new_params = self.subst_map(params, mapping); let new_ret = self.subst(*ret, mapping); - let mut new_args = None; + let mut new_args = Cow::from(args); for (i, t) in args.iter().enumerate() { if let Some(t1) = self.subst(t.ty, mapping) { - if new_args.is_none() { - new_args = Some(args.clone()); - } - new_args.as_mut().unwrap()[i] = FuncArg { - name: t.name.clone(), - ty: t1, - is_optional: t.is_optional, - }; + let mut t = t.clone(); + t.ty = t1; + new_args.to_mut()[i] = t; } } - if new_params.is_some() || new_ret.is_some() || new_args.is_some() { + if new_params.is_some() || new_ret.is_some() || matches!(new_args, Cow::Owned(..)) { let params = new_params.unwrap_or_else(|| params.clone()); let ret = new_ret.unwrap_or_else(|| *ret); - let args = new_args.unwrap_or_else(|| args.clone()); - Some(self.add_ty(TypeEnum::TFunc(FunSignature { - args, - ret, - vars: params, - }))) + let args = new_args.into_owned(); + Some(self.add_ty(TypeEnum::TFunc(FunSignature { args, ret, vars: params }))) } else { None } @@ -655,95 +604,73 @@ impl Unifier { map2 } - /// Instantiate a function if it hasn't been instntiated. - /// Returns Some(T) where T is the instantiated type. - /// Returns None if the function is already instantiated. - fn instantiate_fun(&mut self, ty: Type, fun: &FunSignature) -> Type { - let mut instantiated = false; - for (k, v) in fun.vars.iter() { - if let TypeEnum::TVar { id } = - &*self.unification_table.probe_value(*v).as_ref().borrow() - { - if k != id { - instantiated = true; - break; + fn occur_check(&mut self, a: Type, b: Type) -> Result<(), String> { + use TypeVarMeta::*; + if self.unification_table.unioned(a, b) { + return Err("Recursive type is prohibited.".to_owned()); + } + let ty = self.unification_table.probe_value(b).clone(); + + match ty.as_ref() { + TypeEnum::TVar { meta: Generic, .. } => {} + TypeEnum::TVar { meta: Sequence(map), .. } => { + for t in map.borrow().values() { + self.occur_check(a, *t)?; + } + } + TypeEnum::TVar { meta: Record(map), .. } => { + for t in map.borrow().values() { + self.occur_check(a, *t)?; + } + } + TypeEnum::TCall(calls) => { + for t in calls + .borrow() + .iter() + .map(|call| chain!(call.posargs.iter(), call.kwargs.values(), once(&call.ret))) + .flatten() + { + self.occur_check(a, *t)?; + } + } + TypeEnum::TTuple { ty } => { + for t in ty.iter() { + self.occur_check(a, *t)?; + } + } + TypeEnum::TList { ty } | TypeEnum::TVirtual { ty } => { + self.occur_check(a, *ty)?; + } + TypeEnum::TObj { params: map, .. } => { + for t in map.values() { + self.occur_check(a, *t)?; + } + } + TypeEnum::TFunc(FunSignature { args, ret, vars: params }) => { + for t in chain!(args.iter().map(|v| &v.ty), params.values(), once(ret)) { + self.occur_check(a, *t)?; } - } else { - instantiated = true; - break; } } - if instantiated { - ty - } else { - let mapping = fun - .vars - .iter() - .map(|(k, _)| (*k, self.get_fresh_var().0)) - .collect(); - self.subst(ty, &mapping).unwrap_or(ty) - } + Ok(()) } - /// Check whether two types are equal. - fn eq(&mut self, a: Type, b: Type) -> bool { - if a == b { - return true; - } - let (ty_a, ty_b) = { - let table = &mut self.unification_table; - if table.unioned(a, b) { - return true; - } - (table.probe_value(a).clone(), table.probe_value(b).clone()) - }; - - let ty_a = ty_a.borrow(); - let ty_b = ty_b.borrow(); - - match (&*ty_a, &*ty_b) { - (TypeEnum::TVar { id: id1 }, TypeEnum::TVar { id: id2 }) => id1 == id2, - (TypeEnum::TSeq { map: map1 }, TypeEnum::TSeq { map: map2 }) => self.map_eq(map1, map2), - (TypeEnum::TTuple { ty: ty1 }, TypeEnum::TTuple { ty: ty2 }) => { + pub fn shape_match(&mut self, a: Type, b: Type) -> bool { + use TypeEnum::*; + let a = self.get_ty(a); + let b = self.get_ty(b); + match (a.as_ref(), b.as_ref()) { + (TVar { .. }, _) => true, + (_, TVar { .. }) => true, + (TTuple { ty: ty1 }, TTuple { ty: ty2 }) => { ty1.len() == ty2.len() - && ty1.iter().zip(ty2.iter()).all(|(t1, t2)| self.eq(*t1, *t2)) + && zip(ty1.iter(), ty2.iter()).all(|(a, b)| self.shape_match(*a, *b)) } - (TypeEnum::TList { ty: ty1 }, TypeEnum::TList { ty: ty2 }) - | (TypeEnum::TVirtual { ty: ty1 }, TypeEnum::TVirtual { ty: ty2 }) => { - self.eq(*ty1, *ty2) - } - (TypeEnum::TRecord { fields: fields1 }, TypeEnum::TRecord { fields: fields2 }) => { - self.map_eq(fields1, fields2) - } - ( - TypeEnum::TObj { - obj_id: id1, - params: params1, - .. - }, - TypeEnum::TObj { - obj_id: id2, - params: params2, - .. - }, - ) => id1 == id2 && self.map_eq(params1, params2), - // TCall and TFunc are not yet implemented + (TList { ty: ty1 }, TList { ty: ty2 }) + | (TVirtual { ty: ty1 }, TVirtual { ty: ty2 }) => self.shape_match(*ty1, *ty2), + (TObj { obj_id: id1, .. }, TObj { obj_id: id2, .. }) => id1 == id2, + // don't deal with function shape for now _ => false, } } - - fn map_eq(&mut self, map1: &Mapping, map2: &Mapping) -> bool - where - K: std::hash::Hash + std::cmp::Eq + std::clone::Clone, - { - if map1.len() != map2.len() { - return false; - } - for (k, v) in map1.iter() { - if !map2.get(k).map(|v1| self.eq(*v, *v1)).unwrap_or(false) { - return false; - } - } - true - } } diff --git a/nac3core/src/typecheck/typedef/test.rs b/nac3core/src/typecheck/typedef/test.rs index 0855b164..8732b1d4 100644 --- a/nac3core/src/typecheck/typedef/test.rs +++ b/nac3core/src/typecheck/typedef/test.rs @@ -1,8 +1,69 @@ -use super::super::typedef::*; +use super::*; use itertools::Itertools; use std::collections::HashMap; use test_case::test_case; +impl Unifier { + /// Check whether two types are equal. + fn eq(&mut self, a: Type, b: Type) -> bool { + use TypeVarMeta::*; + if a == b { + return true; + } + let (ty_a, ty_b) = { + let table = &mut self.unification_table; + if table.unioned(a, b) { + return true; + } + (table.probe_value(a).clone(), table.probe_value(b).clone()) + }; + + match (&*ty_a, &*ty_b) { + ( + TypeEnum::TVar { meta: Generic, id: id1, .. }, + TypeEnum::TVar { meta: Generic, id: id2, .. }, + ) => id1 == id2, + ( + TypeEnum::TVar { meta: Sequence(map1), .. }, + TypeEnum::TVar { meta: Sequence(map2), .. }, + ) => self.map_eq(&map1.borrow(), &map2.borrow()), + (TypeEnum::TTuple { ty: ty1 }, TypeEnum::TTuple { ty: ty2 }) => { + ty1.len() == ty2.len() + && ty1.iter().zip(ty2.iter()).all(|(t1, t2)| self.eq(*t1, *t2)) + } + (TypeEnum::TList { ty: ty1 }, TypeEnum::TList { ty: ty2 }) + | (TypeEnum::TVirtual { ty: ty1 }, TypeEnum::TVirtual { ty: ty2 }) => { + self.eq(*ty1, *ty2) + } + ( + TypeEnum::TVar { meta: Record(fields1), .. }, + TypeEnum::TVar { meta: Record(fields2), .. }, + ) => self.map_eq(&fields1.borrow(), &fields2.borrow()), + ( + TypeEnum::TObj { obj_id: id1, params: params1, .. }, + TypeEnum::TObj { obj_id: id2, params: params2, .. }, + ) => id1 == id2 && self.map_eq(params1, params2), + // TCall and TFunc are not yet implemented + _ => false, + } + } + + fn map_eq(&mut self, map1: &Mapping, map2: &Mapping) -> bool + where + K: std::hash::Hash + std::cmp::Eq + std::clone::Clone, + { + if map1.len() != map2.len() { + return false; + } + for (k, v) in map1.iter() { + if !map2.get(k).map(|v1| self.eq(*v, *v1)).unwrap_or(false) { + return false; + } + } + true + } +} + struct TestEnvironment { pub unifier: Unifier, type_mapping: HashMap, @@ -47,10 +108,7 @@ impl TestEnvironment { }), ); - TestEnvironment { - unifier, - type_mapping, - } + TestEnvironment { unifier, type_mapping } } fn parse(&mut self, typ: &str, mapping: &Mapping) -> Type { @@ -65,9 +123,7 @@ impl TestEnvironment { mapping: &Mapping, ) -> (Type, &'b str) { // for testing only, so we can just panic when the input is malformed - let end = typ - .find(|c| ['[', ',', ']', '='].contains(&c)) - .unwrap_or_else(|| typ.len()); + let end = typ.find(|c| ['[', ',', ']', '='].contains(&c)).unwrap_or_else(|| typ.len()); match &typ[..end] { "Tuple" => { let mut s = &typ[end..]; @@ -97,7 +153,7 @@ impl TestEnvironment { fields.insert(key, result.0); s = result.1; } - (self.unifier.add_ty(TypeEnum::TRecord { fields }), &s[1..]) + (self.unifier.add_record(fields), &s[1..]) } x => { let mut s = &typ[end..]; @@ -106,7 +162,7 @@ impl TestEnvironment { // we should not resolve the type of type variables. let mut ty = *self.type_mapping.get(x).unwrap(); let te = self.unifier.get_ty(ty); - if let TypeEnum::TObj { params, .. } = &*te.as_ref().borrow() { + if let TypeEnum::TObj { params, .. } = &*te.as_ref() { if !params.is_empty() { assert!(&s[0..1] == "["); let mut p = Vec::new(); @@ -192,6 +248,7 @@ fn test_unify( env.unifier.unify(t1, t2).unwrap(); } for (a, b) in verify_pairs.iter() { + println!("{} = {}", a, b); let t1 = env.parse(a, &mapping); let t2 = env.parse(b, &mapping); assert!(env.unifier.eq(t1, t2)); @@ -258,10 +315,8 @@ fn test_invalid_unification( let t2 = env.parse(b, &mapping); pairs.push((t1, t2)); } - let (t1, t2) = ( - env.parse(errornous_pair.0 .0, &mapping), - env.parse(errornous_pair.0 .1, &mapping), - ); + let (t1, t2) = + (env.parse(errornous_pair.0 .0, &mapping), env.parse(errornous_pair.0 .1, &mapping)); for (a, b) in pairs { env.unifier.unify(a, b).unwrap(); } From d140164a381717a3cc2dca1f4ca7518de206c45e Mon Sep 17 00:00:00 2001 From: pca006132 Date: Fri, 23 Jul 2021 16:19:00 +0800 Subject: [PATCH 041/131] fixed virtual unification --- nac3core/src/typecheck/typedef/mod.rs | 50 ++++++++++++++++----------- 1 file changed, 30 insertions(+), 20 deletions(-) diff --git a/nac3core/src/typecheck/typedef/mod.rs b/nac3core/src/typecheck/typedef/mod.rs index 52b26df2..ac1db799 100644 --- a/nac3core/src/typecheck/typedef/mod.rs +++ b/nac3core/src/typecheck/typedef/mod.rs @@ -278,11 +278,24 @@ impl Unifier { self.check_var_range(*id, b, &range.borrow())?; self.set_a_to_b(a, b); } - (TVar { meta: Record(_), id, range, .. }, TVirtual { ty }) => { - // TODO: look at this rule + (TVar { meta: Record(map), id, range, .. }, TVirtual { ty }) => { self.occur_check(a, b)?; + let ty = self.get_ty(*ty); + if let TObj { fields, .. } = ty.as_ref() { + for (k, v) in map.borrow().iter() { + if let Some(ty) = fields.get(k) { + if !matches!(self.get_ty(*ty).as_ref(), TFunc { .. }) { + return Err(format!("Cannot access field {} for virtual type", k)); + } + self.unify(*v, *ty)?; + } + } + } else { + // require annotation... + return Err("Requires type annotation for virtual".to_string()); + } self.check_var_range(*id, b, &range.borrow())?; - self.unify(a, *ty)?; + self.unify(a, b)?; } ( TObj { obj_id: id1, params: params1, .. }, @@ -394,21 +407,23 @@ impl Unifier { match ty.as_ref() { TypeEnum::TVar { id, meta: Generic, .. } => var_to_name(*id), TypeEnum::TVar { meta: Sequence(map), .. } => { - let fields = map.borrow().iter().map(|(k, v)| { - format!("{}={}", k, self.stringify(*v, obj_to_name, var_to_name)) - }).join(", "); + let fields = map + .borrow() + .iter() + .map(|(k, v)| format!("{}={}", k, self.stringify(*v, obj_to_name, var_to_name))) + .join(", "); format!("seq[{}]", fields) } TypeEnum::TVar { meta: Record(fields), .. } => { - let fields = fields.borrow().iter().map(|(k, v)| { - format!("{}={}", k, self.stringify(*v, obj_to_name, var_to_name)) - }).join(", "); + let fields = fields + .borrow() + .iter() + .map(|(k, v)| format!("{}={}", k, self.stringify(*v, obj_to_name, var_to_name))) + .join(", "); format!("record[{}]", fields) } TypeEnum::TTuple { ty } => { - let mut fields = ty - .iter() - .map(|v| self.stringify(*v, obj_to_name, var_to_name)); + let mut fields = ty.iter().map(|v| self.stringify(*v, obj_to_name, var_to_name)); format!("tuple[{}]", fields.join(", ")) } TypeEnum::TList { ty } => { @@ -420,9 +435,8 @@ impl Unifier { TypeEnum::TObj { obj_id, params, .. } => { let name = obj_to_name(*obj_id); if !params.is_empty() { - let mut params = params - .values() - .map(|v| self.stringify(*v, obj_to_name, var_to_name)); + let mut params = + params.values().map(|v| self.stringify(*v, obj_to_name, var_to_name)); format!("{}[{}]", name, params.join(", ")) } else { name @@ -434,11 +448,7 @@ impl Unifier { .args .iter() .map(|arg| { - format!( - "{}={}", - arg.name, - self.stringify(arg.ty, obj_to_name, var_to_name) - ) + format!("{}={}", arg.name, self.stringify(arg.ty, obj_to_name, var_to_name)) }) .join(", "); let ret = self.stringify(signature.ret, obj_to_name, var_to_name); From d7df93bef1a8bda865f3e63830d3be50faaaa051 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Fri, 23 Jul 2021 17:22:05 +0800 Subject: [PATCH 042/131] fixed range check --- nac3core/src/typecheck/typedef/mod.rs | 62 +++++++++---------- nac3core/src/typecheck/typedef/test.rs | 82 ++++++++++++++++++++++++++ 2 files changed, 113 insertions(+), 31 deletions(-) diff --git a/nac3core/src/typecheck/typedef/mod.rs b/nac3core/src/typecheck/typedef/mod.rs index ac1db799..0164641f 100644 --- a/nac3core/src/typecheck/typedef/mod.rs +++ b/nac3core/src/typecheck/typedef/mod.rs @@ -207,7 +207,6 @@ impl Unifier { if !self.shape_match(*v1, *v2) { continue; } - self.unify(*v1, *v2)?; range2.push(*v2); } } @@ -221,7 +220,7 @@ impl Unifier { } (TVar { meta: Generic, id, range, .. }, _) => { self.occur_check(a, b)?; - self.check_var_range(*id, b, &range.borrow())?; + self.check_var_compatible(*id, b, &range.borrow())?; self.set_a_to_b(a, b); } (TVar { meta: Sequence(map), id, range, .. }, TTuple { ty }) => { @@ -238,7 +237,7 @@ impl Unifier { } self.unify(*v, ty[ind as usize])?; } - self.check_var_range(*id, b, &range.borrow())?; + self.check_var_compatible(*id, b, &range.borrow())?; self.set_a_to_b(a, b); } (TVar { meta: Sequence(map), id, range, .. }, TList { ty }) => { @@ -246,7 +245,7 @@ impl Unifier { for v in map.borrow().values() { self.unify(*v, *ty)?; } - self.check_var_range(*id, b, &range.borrow())?; + self.check_var_compatible(*id, b, &range.borrow())?; self.set_a_to_b(a, b); } (TTuple { ty: ty1 }, TTuple { ty: ty2 }) => { @@ -275,7 +274,7 @@ impl Unifier { return Err(format!("No such attribute {}", k)); } } - self.check_var_range(*id, b, &range.borrow())?; + self.check_var_compatible(*id, b, &range.borrow())?; self.set_a_to_b(a, b); } (TVar { meta: Record(map), id, range, .. }, TVirtual { ty }) => { @@ -288,14 +287,16 @@ impl Unifier { return Err(format!("Cannot access field {} for virtual type", k)); } self.unify(*v, *ty)?; + } else { + return Err(format!("No such attribute {}", k)); } } } else { // require annotation... return Err("Requires type annotation for virtual".to_string()); } - self.check_var_range(*id, b, &range.borrow())?; - self.unify(a, b)?; + self.check_var_compatible(*id, b, &range.borrow())?; + self.set_a_to_b(a, b); } ( TObj { obj_id: id1, params: params1, .. }, @@ -457,24 +458,6 @@ impl Unifier { } } - fn check_var_range(&mut self, id: u32, b: Type, range: &[Type]) -> Result<(), String> { - let mut in_range = range.is_empty(); - for t in range.iter() { - if self.shape_match(*t, b) { - self.unify(*t, b)?; - in_range = true; - } - } - if !in_range { - return Err(format!( - "Cannot unify {} with {} due to incompatible value range", - id, - self.get_ty(b).get_type_name() - )); - } - Ok(()) - } - fn set_a_to_b(&mut self, a: Type, b: Type) { // unify a and b together, and set the value to b's value. let table = &mut self.unification_table; @@ -665,13 +648,13 @@ impl Unifier { Ok(()) } - pub fn shape_match(&mut self, a: Type, b: Type) -> bool { + fn shape_match(&mut self, a: Type, b: Type) -> bool { use TypeEnum::*; - let a = self.get_ty(a); - let b = self.get_ty(b); - match (a.as_ref(), b.as_ref()) { - (TVar { .. }, _) => true, - (_, TVar { .. }) => true, + let x = self.get_ty(a); + let y = self.get_ty(b); + match (x.as_ref(), y.as_ref()) { + (TVar { id, range, .. }, _) => self.check_var_compatible(*id, b, &range.borrow()).is_ok(), + (_, TVar { id, range, .. }) => self.check_var_compatible(*id, a, &range.borrow()).is_ok(), (TTuple { ty: ty1 }, TTuple { ty: ty2 }) => { ty1.len() == ty2.len() && zip(ty1.iter(), ty2.iter()).all(|(a, b)| self.shape_match(*a, *b)) @@ -683,4 +666,21 @@ impl Unifier { _ => false, } } + + fn check_var_compatible(&mut self, id: u32, b: Type, range: &[Type]) -> Result<(), String> { + let mut in_range = range.is_empty(); + for t in range.iter() { + if self.shape_match(*t, b) { + in_range = true; + } + } + if !in_range { + return Err(format!( + "Cannot unify type variable {} with {} due to incompatible value range", + id, + self.get_ty(b).get_type_name() + )); + } + Ok(()) + } } diff --git a/nac3core/src/typecheck/typedef/test.rs b/nac3core/src/typecheck/typedef/test.rs index 8732b1d4..f7e09084 100644 --- a/nac3core/src/typecheck/typedef/test.rs +++ b/nac3core/src/typecheck/typedef/test.rs @@ -322,3 +322,85 @@ fn test_invalid_unification( } assert_eq!(env.unifier.unify(t1, t2), Err(errornous_pair.1.to_string())); } + +#[test] +fn test_virtual() { + let mut env = TestEnvironment::new(); + let int = env.parse("int", &HashMap::new()); + let fun = env.unifier.add_ty(TypeEnum::TFunc(FunSignature { + args: vec![], + ret: int, + vars: HashMap::new(), + })); + let bar = env.unifier.add_ty(TypeEnum::TObj { + obj_id: 5, + fields: [("f".to_string(), fun), ("a".to_string(), int)].iter().cloned().collect(), + params: HashMap::new(), + }); + let v0 = env.unifier.get_fresh_var().0; + let v1 = env.unifier.get_fresh_var().0; + + let a = env.unifier.add_ty(TypeEnum::TVirtual { ty: bar }); + let b = env.unifier.add_ty(TypeEnum::TVirtual { ty: v0 }); + let c = env.unifier.add_record([("f".to_string(), v1)].iter().cloned().collect()); + env.unifier.unify(a, b).unwrap(); + env.unifier.unify(b, c).unwrap(); + assert!(env.unifier.eq(v1, fun)); + + let d = env.unifier.add_record([("a".to_string(), v1)].iter().cloned().collect()); + assert_eq!(env.unifier.unify(b, d), Err("Cannot access field a for virtual type".to_string())); + + let d = env.unifier.add_record([("b".to_string(), v1)].iter().cloned().collect()); + assert_eq!(env.unifier.unify(b, d), Err("No such attribute b".to_string())); +} + +#[test] +fn test_typevar_range() { + let mut env = TestEnvironment::new(); + let int = env.parse("int", &HashMap::new()); + let boolean = env.parse("bool", &HashMap::new()); + let float = env.parse("float", &HashMap::new()); + let int_list = env.parse("List[int]", &HashMap::new()); + let float_list = env.parse("List[float]", &HashMap::new()); + + // unification between v and int + // where v in (int, bool) + let v = env.unifier.get_fresh_var_with_range(&[int, boolean]).0; + env.unifier.unify(int, v).unwrap(); + + // unification between v and List[int] + // where v in (int, bool) + let v = env.unifier.get_fresh_var_with_range(&[int, boolean]).0; + assert_eq!( + env.unifier.unify(int_list, v), + Err("Cannot unify type variable 3 with TList due to incompatible value range".to_string()) + ); + + // unification between v and float + // where v in (int, bool) + let v = env.unifier.get_fresh_var_with_range(&[int, boolean]).0; + assert_eq!( + env.unifier.unify(float, v), + Err("Cannot unify type variable 4 with TObj due to incompatible value range".to_string()) + ); + + let v1 = env.unifier.get_fresh_var_with_range(&[int, boolean]).0; + let v1_list = env.unifier.add_ty(TypeEnum::TList { ty: v1 }); + let v = env.unifier.get_fresh_var_with_range(&[int, v1_list]).0; + // unification between v and int + // where v in (int, List[v1]), v1 in (int, bool) + env.unifier.unify(int, v).unwrap(); + + let v = env.unifier.get_fresh_var_with_range(&[int, v1_list]).0; + // unification between v and List[int] + // where v in (int, List[v1]), v1 in (int, bool) + env.unifier.unify(int_list, v).unwrap(); + + let v = env.unifier.get_fresh_var_with_range(&[int, v1_list]).0; + // unification between v and List[float] + // where v in (int, List[v1]), v1 in (int, bool) + assert_eq!( + env.unifier.unify(float_list, v), + Err("Cannot unify type variable 8 with TList due to incompatible value range".to_string()) + ); +} From 53ebe8d8b238b68f4bb4ceb3bf5ed4803d7e17af Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Sat, 24 Jul 2021 09:31:24 +0800 Subject: [PATCH 043/131] shell.nix: set LLVM_SYS_100_PREFIX. Closes #4 --- shell.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/shell.nix b/shell.nix index 858e68b9..c091f175 100644 --- a/shell.nix +++ b/shell.nix @@ -6,4 +6,5 @@ in buildInputs = with pkgs; [ llvm_10 clang_10 cargo rustc libffi libxml2 clippy ]; + LLVM_SYS_100_PREFIX="${pkgs.llvm_10}"; } From 8618837816ce0b6e9ef7ef80e29f2ff5927d7e0c Mon Sep 17 00:00:00 2001 From: pca006132 Date: Mon, 26 Jul 2021 11:55:37 +0800 Subject: [PATCH 044/131] fixed range unification --- nac3core/src/typecheck/typedef/mod.rs | 126 ++++++++++++++++++++----- nac3core/src/typecheck/typedef/test.rs | 36 +++++++ 2 files changed, 137 insertions(+), 25 deletions(-) diff --git a/nac3core/src/typecheck/typedef/mod.rs b/nac3core/src/typecheck/typedef/mod.rs index 0164641f..33cda9db 100644 --- a/nac3core/src/typecheck/typedef/mod.rs +++ b/nac3core/src/typecheck/typedef/mod.rs @@ -204,10 +204,9 @@ impl Unifier { } for v1 in old_range2.iter() { for v2 in range1.iter() { - if !self.shape_match(*v1, *v2) { - continue; + if let Ok(result) = self.shape_match(*v1, *v2){ + range2.push(result.unwrap_or(*v2)); } - range2.push(*v2); } } if range2.is_empty() { @@ -648,39 +647,116 @@ impl Unifier { Ok(()) } - fn shape_match(&mut self, a: Type, b: Type) -> bool { + fn shape_match(&mut self, a: Type, b: Type) -> Result, ()> { use TypeEnum::*; let x = self.get_ty(a); let y = self.get_ty(b); match (x.as_ref(), y.as_ref()) { - (TVar { id, range, .. }, _) => self.check_var_compatible(*id, b, &range.borrow()).is_ok(), - (_, TVar { id, range, .. }) => self.check_var_compatible(*id, a, &range.borrow()).is_ok(), - (TTuple { ty: ty1 }, TTuple { ty: ty2 }) => { - ty1.len() == ty2.len() - && zip(ty1.iter(), ty2.iter()).all(|(a, b)| self.shape_match(*a, *b)) + (TVar { range: range1, .. }, TVar { meta, range: range2, .. }) => { + // we should restrict range2 + let range1 = range1.borrow(); + // new range is the intersection of them + // empty range indicates no constraint + if !range1.is_empty() { + let range2 = range2.borrow(); + let mut range = Vec::new(); + if range2.is_empty() { + range.extend_from_slice(&range1); + } + for v1 in range2.iter() { + for v2 in range1.iter() { + let result = self.shape_match(*v1, *v2); + if let Ok(result) = result { + range.push(result.unwrap_or(*v2)); + } + } + } + if range.is_empty() { + Err(()) + } else { + let id = self.var_id + 1; + self.var_id += 1; + let ty = TVar { id, meta: meta.clone(), range: range.into() }; + Ok(Some(self.unification_table.new_key(ty.into()))) + } + } else { + Ok(Some(b)) + } + } + (_, TVar { range, .. }) => { + // range should be restricted to the left hand side + let range = range.borrow(); + if range.is_empty() { + Ok(Some(a)) + } else { + for v in range.iter() { + let result = self.shape_match(a, *v); + if let Ok(result) = result { + return Ok(result.or(Some(a))); + } + } + Err(()) + } + } + (TVar { id, range, .. }, _) => { + self.check_var_compatible(*id, b, &range.borrow()).or(Err(())) + } + (TTuple { ty: ty1 }, TTuple { ty: ty2 }) => { + if ty1.len() != ty2.len() { + return Err(()); + } + let mut need_new = false; + let mut ty = ty1.clone(); + for (a, b) in zip(ty1.iter(), ty2.iter()) { + let result = self.shape_match(*a, *b)?; + ty.push(result.unwrap_or(*a)); + if result.is_some() { + need_new = true; + } + } + if need_new { + Ok(Some(self.add_ty(TTuple { ty }))) + } else { + Ok(None) + } + } + (TList { ty: ty1 }, TList { ty: ty2 }) => { + Ok(self.shape_match(*ty1, *ty2)?.map(|ty| self.add_ty(TList { ty }))) + } + (TVirtual { ty: ty1 }, TVirtual { ty: ty2 }) => { + Ok(self.shape_match(*ty1, *ty2)?.map(|ty| self.add_ty(TVirtual { ty }))) + } + (TObj { obj_id: id1, .. }, TObj { obj_id: id2, .. }) => { + if id1 == id2 { + Ok(None) + } else { + Err(()) + } } - (TList { ty: ty1 }, TList { ty: ty2 }) - | (TVirtual { ty: ty1 }, TVirtual { ty: ty2 }) => self.shape_match(*ty1, *ty2), - (TObj { obj_id: id1, .. }, TObj { obj_id: id2, .. }) => id1 == id2, // don't deal with function shape for now - _ => false, + _ => Err(()), } } - fn check_var_compatible(&mut self, id: u32, b: Type, range: &[Type]) -> Result<(), String> { - let mut in_range = range.is_empty(); + fn check_var_compatible( + &mut self, + id: u32, + b: Type, + range: &[Type], + ) -> Result, String> { + if range.is_empty() { + return Ok(None); + } for t in range.iter() { - if self.shape_match(*t, b) { - in_range = true; + let result = self.shape_match(*t, b); + if let Ok(result) = result { + return Ok(result); } } - if !in_range { - return Err(format!( - "Cannot unify type variable {} with {} due to incompatible value range", - id, - self.get_ty(b).get_type_name() - )); - } - Ok(()) + return Err(format!( + "Cannot unify type variable {} with {} due to incompatible value range", + id, + self.get_ty(b).get_type_name() + )); } } diff --git a/nac3core/src/typecheck/typedef/test.rs b/nac3core/src/typecheck/typedef/test.rs index f7e09084..993e7333 100644 --- a/nac3core/src/typecheck/typedef/test.rs +++ b/nac3core/src/typecheck/typedef/test.rs @@ -403,4 +403,40 @@ fn test_typevar_range() { env.unifier.unify(float_list, v), Err("Cannot unify type variable 8 with TList due to incompatible value range".to_string()) ); + + let a = env.unifier.get_fresh_var_with_range(&[int, float]).0; + let b = env.unifier.get_fresh_var_with_range(&[boolean, float]).0; + env.unifier.unify(a, b).unwrap(); + env.unifier.unify(a, float).unwrap(); + + let a = env.unifier.get_fresh_var_with_range(&[int, float]).0; + let b = env.unifier.get_fresh_var_with_range(&[boolean, float]).0; + env.unifier.unify(a, b).unwrap(); + assert_eq!( + env.unifier.unify(a, int), + Err("Cannot unify type variable 12 with TObj due to incompatible value range".into()) + ); + + let a = env.unifier.get_fresh_var_with_range(&[int, float]).0; + let b = env.unifier.get_fresh_var_with_range(&[boolean, float]).0; + let a_list = env.unifier.add_ty(TypeEnum::TList { ty: a}); + let a_list = env.unifier.get_fresh_var_with_range(&[a_list]).0; + let b_list = env.unifier.add_ty(TypeEnum::TList { ty: b}); + let b_list = env.unifier.get_fresh_var_with_range(&[b_list]).0; + env.unifier.unify(a_list, b_list).unwrap(); + let float_list = env.unifier.add_ty(TypeEnum::TList { ty: float}); + env.unifier.unify(a_list, float_list).unwrap(); + // previous unifications should not affect a and b + env.unifier.unify(a, int).unwrap(); + + let a = env.unifier.get_fresh_var_with_range(&[int, float]).0; + let b = env.unifier.get_fresh_var_with_range(&[boolean, float]).0; + let a_list = env.unifier.add_ty(TypeEnum::TList { ty: a}); + let b_list = env.unifier.add_ty(TypeEnum::TList { ty: b}); + env.unifier.unify(a_list, b_list).unwrap(); + let int_list = env.unifier.add_ty(TypeEnum::TList { ty: int}); + assert_eq!( + env.unifier.unify(a_list, int_list), + Err("Cannot unify type variable 19 with TObj due to incompatible value range".into()) + ); } From 0941de9ee1faceb73c4291b3f5a020997cb0a470 Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Mon, 26 Jul 2021 12:29:16 +0800 Subject: [PATCH 045/131] Revert "shell.nix: set LLVM_SYS_100_PREFIX. Closes #4" This reverts commit 53ebe8d8b238b68f4bb4ceb3bf5ed4803d7e17af. --- shell.nix | 1 - 1 file changed, 1 deletion(-) diff --git a/shell.nix b/shell.nix index c091f175..858e68b9 100644 --- a/shell.nix +++ b/shell.nix @@ -6,5 +6,4 @@ in buildInputs = with pkgs; [ llvm_10 clang_10 cargo rustc libffi libxml2 clippy ]; - LLVM_SYS_100_PREFIX="${pkgs.llvm_10}"; } From bf31c48bbae997507adb0604378619d3975cabb0 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Mon, 26 Jul 2021 14:20:09 +0800 Subject: [PATCH 046/131] fixed missing unification --- nac3core/src/typecheck/typedef/mod.rs | 45 ++++++++++++++------------ nac3core/src/typecheck/typedef/test.rs | 11 +++++++ 2 files changed, 36 insertions(+), 20 deletions(-) diff --git a/nac3core/src/typecheck/typedef/mod.rs b/nac3core/src/typecheck/typedef/mod.rs index 33cda9db..25a2251a 100644 --- a/nac3core/src/typecheck/typedef/mod.rs +++ b/nac3core/src/typecheck/typedef/mod.rs @@ -204,7 +204,7 @@ impl Unifier { } for v1 in old_range2.iter() { for v2 in range1.iter() { - if let Ok(result) = self.shape_match(*v1, *v2){ + if let Ok(result) = self.get_intersection(*v1, *v2){ range2.push(result.unwrap_or(*v2)); } } @@ -219,8 +219,9 @@ impl Unifier { } (TVar { meta: Generic, id, range, .. }, _) => { self.occur_check(a, b)?; - self.check_var_compatible(*id, b, &range.borrow())?; - self.set_a_to_b(a, b); + let x = self.check_var_compatibility(*id, b, &range.borrow())?.unwrap_or(b); + self.unify(x, b)?; + self.set_a_to_b(a, x); } (TVar { meta: Sequence(map), id, range, .. }, TTuple { ty }) => { self.occur_check(a, b)?; @@ -236,16 +237,18 @@ impl Unifier { } self.unify(*v, ty[ind as usize])?; } - self.check_var_compatible(*id, b, &range.borrow())?; - self.set_a_to_b(a, b); + let x = self.check_var_compatibility(*id, b, &range.borrow())?.unwrap_or(b); + self.unify(x, b)?; + self.set_a_to_b(a, x); } (TVar { meta: Sequence(map), id, range, .. }, TList { ty }) => { self.occur_check(a, b)?; for v in map.borrow().values() { self.unify(*v, *ty)?; } - self.check_var_compatible(*id, b, &range.borrow())?; - self.set_a_to_b(a, b); + let x = self.check_var_compatibility(*id, b, &range.borrow())?.unwrap_or(b); + self.unify(x, b)?; + self.set_a_to_b(a, x); } (TTuple { ty: ty1 }, TTuple { ty: ty2 }) => { if ty1.len() != ty2.len() { @@ -273,8 +276,9 @@ impl Unifier { return Err(format!("No such attribute {}", k)); } } - self.check_var_compatible(*id, b, &range.borrow())?; - self.set_a_to_b(a, b); + let x = self.check_var_compatibility(*id, b, &range.borrow())?.unwrap_or(b); + self.unify(x, b)?; + self.set_a_to_b(a, x); } (TVar { meta: Record(map), id, range, .. }, TVirtual { ty }) => { self.occur_check(a, b)?; @@ -294,8 +298,9 @@ impl Unifier { // require annotation... return Err("Requires type annotation for virtual".to_string()); } - self.check_var_compatible(*id, b, &range.borrow())?; - self.set_a_to_b(a, b); + let x = self.check_var_compatibility(*id, b, &range.borrow())?.unwrap_or(b); + self.unify(x, b)?; + self.set_a_to_b(a, x); } ( TObj { obj_id: id1, params: params1, .. }, @@ -647,7 +652,7 @@ impl Unifier { Ok(()) } - fn shape_match(&mut self, a: Type, b: Type) -> Result, ()> { + fn get_intersection(&mut self, a: Type, b: Type) -> Result, ()> { use TypeEnum::*; let x = self.get_ty(a); let y = self.get_ty(b); @@ -665,7 +670,7 @@ impl Unifier { } for v1 in range2.iter() { for v2 in range1.iter() { - let result = self.shape_match(*v1, *v2); + let result = self.get_intersection(*v1, *v2); if let Ok(result) = result { range.push(result.unwrap_or(*v2)); } @@ -690,7 +695,7 @@ impl Unifier { Ok(Some(a)) } else { for v in range.iter() { - let result = self.shape_match(a, *v); + let result = self.get_intersection(a, *v); if let Ok(result) = result { return Ok(result.or(Some(a))); } @@ -699,7 +704,7 @@ impl Unifier { } } (TVar { id, range, .. }, _) => { - self.check_var_compatible(*id, b, &range.borrow()).or(Err(())) + self.check_var_compatibility(*id, b, &range.borrow()).or(Err(())) } (TTuple { ty: ty1 }, TTuple { ty: ty2 }) => { if ty1.len() != ty2.len() { @@ -708,7 +713,7 @@ impl Unifier { let mut need_new = false; let mut ty = ty1.clone(); for (a, b) in zip(ty1.iter(), ty2.iter()) { - let result = self.shape_match(*a, *b)?; + let result = self.get_intersection(*a, *b)?; ty.push(result.unwrap_or(*a)); if result.is_some() { need_new = true; @@ -721,10 +726,10 @@ impl Unifier { } } (TList { ty: ty1 }, TList { ty: ty2 }) => { - Ok(self.shape_match(*ty1, *ty2)?.map(|ty| self.add_ty(TList { ty }))) + Ok(self.get_intersection(*ty1, *ty2)?.map(|ty| self.add_ty(TList { ty }))) } (TVirtual { ty: ty1 }, TVirtual { ty: ty2 }) => { - Ok(self.shape_match(*ty1, *ty2)?.map(|ty| self.add_ty(TVirtual { ty }))) + Ok(self.get_intersection(*ty1, *ty2)?.map(|ty| self.add_ty(TVirtual { ty }))) } (TObj { obj_id: id1, .. }, TObj { obj_id: id2, .. }) => { if id1 == id2 { @@ -738,7 +743,7 @@ impl Unifier { } } - fn check_var_compatible( + fn check_var_compatibility( &mut self, id: u32, b: Type, @@ -748,7 +753,7 @@ impl Unifier { return Ok(None); } for t in range.iter() { - let result = self.shape_match(*t, b); + let result = self.get_intersection(*t, b); if let Ok(result) = result { return Ok(result); } diff --git a/nac3core/src/typecheck/typedef/test.rs b/nac3core/src/typecheck/typedef/test.rs index 993e7333..be7401f0 100644 --- a/nac3core/src/typecheck/typedef/test.rs +++ b/nac3core/src/typecheck/typedef/test.rs @@ -439,4 +439,15 @@ fn test_typevar_range() { env.unifier.unify(a_list, int_list), Err("Cannot unify type variable 19 with TObj due to incompatible value range".into()) ); + + let a = env.unifier.get_fresh_var_with_range(&[int, float]).0; + let b = env.unifier.get_fresh_var().0; + let a_list = env.unifier.add_ty(TypeEnum::TList { ty: a}); + let a_list = env.unifier.get_fresh_var_with_range(&[a_list]).0; + let b_list = env.unifier.add_ty(TypeEnum::TList { ty: b}); + env.unifier.unify(a_list, b_list).unwrap(); + assert_eq!( + env.unifier.unify(b, boolean), + Err("Cannot unify type variable 21 with TObj due to incompatible value range".into()) + ); } From 8d0856a58df266dfd8411192574e92c4617b42f2 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Mon, 26 Jul 2021 14:38:18 +0800 Subject: [PATCH 047/131] added documentation --- nac3core/src/typecheck/typedef/mod.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/nac3core/src/typecheck/typedef/mod.rs b/nac3core/src/typecheck/typedef/mod.rs index 25a2251a..54b8aeb2 100644 --- a/nac3core/src/typecheck/typedef/mod.rs +++ b/nac3core/src/typecheck/typedef/mod.rs @@ -219,6 +219,13 @@ impl Unifier { } (TVar { meta: Generic, id, range, .. }, _) => { self.occur_check(a, b)?; + // We check for the range of the type variable to see if unification is allowed. + // Note that although b may be compatible with a, we may have to constrain type + // variables in b to make sure that instantiations of b would always be compatible + // with a. + // The return value x of check_var_compatibility would be a new type that is + // guaranteed to be compatible with a under all possible instantiations. So we + // unify x with b to recursively apply the constrains, and then set a to x. let x = self.check_var_compatibility(*id, b, &range.borrow())?.unwrap_or(b); self.unify(x, b)?; self.set_a_to_b(a, x); @@ -319,6 +326,8 @@ impl Unifier { self.set_a_to_b(a, b); } (TCall(calls1), TCall(calls2)) => { + // we do not unify individual calls, instead we defer until the unification wtih a + // function definition. calls2.borrow_mut().extend_from_slice(&calls1.borrow()); } (TCall(calls), TFunc(signature)) => { @@ -330,6 +339,7 @@ impl Unifier { .map(|v| v.name.clone()) .rev() .collect(); + // we unify every calls to the function signature. for c in calls.borrow().iter() { let Call { posargs, kwargs, ret, fun } = c.as_ref(); let instantiated = self.instantiate_fun(b, signature); @@ -341,6 +351,8 @@ impl Unifier { } else { unreachable!(); } + // we check to make sure that all required arguments (those without default + // arguments) are provided, and do not provide the same argument twice. let mut required = required.clone(); let mut all_names: Vec<_> = signature.args.iter().map(|v| (v.name.clone(), v.ty)).rev().collect(); From 1d13b16f945c98a6f54909d443e95acf44060169 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Mon, 26 Jul 2021 16:00:29 +0800 Subject: [PATCH 048/131] updated function check --- nac3core/src/typecheck/function_check.rs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/nac3core/src/typecheck/function_check.rs b/nac3core/src/typecheck/function_check.rs index 7d3a3729..e0db2afd 100644 --- a/nac3core/src/typecheck/function_check.rs +++ b/nac3core/src/typecheck/function_check.rs @@ -132,6 +132,7 @@ impl<'a> Inferencer<'a> { Ok(()) } + // check statements for proper identifier def-use and return on all paths fn check_stmt( &mut self, stmt: &Stmt>, @@ -195,7 +196,19 @@ impl<'a> Inferencer<'a> { } Ok(false) } - // break, return, raise, etc. + StmtKind::Return { value } => { + if let Some(value) = value { + self.check_expr(value, defined_identifiers)?; + } + Ok(true) + } + StmtKind::Raise { exc, .. } => { + if let Some(value) = exc { + self.check_expr(value, defined_identifiers)?; + } + Ok(true) + } + // break, raise, etc. _ => Ok(false), } } From 5f0490cd84e9c86a6b82fc42ff72e9ddb8f7c157 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Tue, 27 Jul 2021 11:58:35 +0800 Subject: [PATCH 049/131] added virtual test --- nac3core/src/typecheck/type_inferencer/mod.rs | 4 + .../src/typecheck/type_inferencer/test.rs | 104 +++++++++++++++--- 2 files changed, 91 insertions(+), 17 deletions(-) diff --git a/nac3core/src/typecheck/type_inferencer/mod.rs b/nac3core/src/typecheck/type_inferencer/mod.rs index 8e5aec2a..5eecb182 100644 --- a/nac3core/src/typecheck/type_inferencer/mod.rs +++ b/nac3core/src/typecheck/type_inferencer/mod.rs @@ -28,6 +28,7 @@ pub struct PrimitiveStore { pub struct Inferencer<'a> { pub resolver: &'a mut Box, pub unifier: &'a mut Unifier, + pub virtual_checks: &'a mut Vec<(Type, Type)>, pub variable_mapping: HashMap, pub calls: &'a mut Vec>, pub primitives: &'a PrimitiveStore, @@ -208,6 +209,7 @@ impl<'a> Inferencer<'a> { let mut new_context = Inferencer { resolver: self.resolver, unifier: self.unifier, + virtual_checks: self.virtual_checks, variable_mapping, calls: self.calls, primitives: self.primitives, @@ -250,6 +252,7 @@ impl<'a> Inferencer<'a> { let mut new_context = Inferencer { resolver: self.resolver, unifier: self.unifier, + virtual_checks: self.virtual_checks, variable_mapping, calls: self.calls, primitives: self.primitives, @@ -318,6 +321,7 @@ impl<'a> Inferencer<'a> { } else { self.unifier.get_fresh_var().0 }; + self.virtual_checks.push((arg0.custom.unwrap(), ty)); let custom = Some(self.unifier.add_ty(TypeEnum::TVirtual { ty })); return Ok(Located { location, diff --git a/nac3core/src/typecheck/type_inferencer/test.rs b/nac3core/src/typecheck/type_inferencer/test.rs index be9b1506..598aedf9 100644 --- a/nac3core/src/typecheck/type_inferencer/test.rs +++ b/nac3core/src/typecheck/type_inferencer/test.rs @@ -3,12 +3,14 @@ use super::super::symbol_resolver::*; use super::super::typedef::*; use super::*; use indoc::indoc; +use itertools::zip; use rustpython_parser::ast; use rustpython_parser::parser::parse_program; use test_case::test_case; struct Resolver { identifier_mapping: HashMap, + class_names: HashMap, } impl SymbolResolver for Resolver { @@ -16,8 +18,12 @@ impl SymbolResolver for Resolver { self.identifier_mapping.get(str).cloned() } - fn parse_type_name(&mut self, _: &ast::Expr<()>) -> Option { - unimplemented!() + fn parse_type_name(&mut self, ty: &ast::Expr<()>) -> Option { + if let ExprKind::Name { id, .. } = &ty.node { + self.class_names.get(id).cloned() + } else { + unimplemented!() + } } fn get_symbol_value(&mut self, _: &str) -> Option { @@ -36,6 +42,7 @@ struct TestEnvironment { pub primitives: PrimitiveStore, pub id_to_name: HashMap, pub identifier_mapping: HashMap, + pub virtual_checks: Vec<(Type, Type)>, } impl TestEnvironment { @@ -69,13 +76,7 @@ impl TestEnvironment { }); identifier_mapping.insert("None".into(), none); - let primitives = PrimitiveStore { - int32, - int64, - float, - bool, - none, - }; + let primitives = PrimitiveStore { int32, int64, float, bool, none }; let (v0, id) = unifier.get_fresh_var(); @@ -94,6 +95,40 @@ impl TestEnvironment { })), ); + let fun = unifier.add_ty(TypeEnum::TFunc(FunSignature { + args: vec![], + ret: int32, + vars: Default::default(), + })); + let bar = unifier.add_ty(TypeEnum::TObj { + obj_id: 6, + fields: [("a".into(), int32), ("b".into(), fun)].iter().cloned().collect(), + params: Default::default(), + }); + identifier_mapping.insert( + "Bar".into(), + unifier.add_ty(TypeEnum::TFunc(FunSignature { + args: vec![], + ret: bar, + vars: Default::default(), + })), + ); + + let bar2 = unifier.add_ty(TypeEnum::TObj { + obj_id: 7, + fields: [("a".into(), bool), ("b".into(), fun)].iter().cloned().collect(), + params: Default::default(), + }); + identifier_mapping.insert( + "Bar2".into(), + unifier.add_ty(TypeEnum::TFunc(FunSignature { + args: vec![], + ret: bar2, + vars: Default::default(), + })), + ); + let class_names = [("Bar".into(), bar), ("Bar2".into(), bar2)].iter().cloned().collect(); + let id_to_name = [ (0, "int32".to_string()), (1, "int64".to_string()), @@ -101,12 +136,16 @@ impl TestEnvironment { (3, "bool".to_string()), (4, "none".to_string()), (5, "Foo".to_string()), + (6, "Bar".to_string()), + (7, "Bar2".to_string()), ] .iter() .cloned() .collect(); - let resolver = Box::new(Resolver { identifier_mapping: identifier_mapping.clone() }) as Box; + let resolver = + Box::new(Resolver { identifier_mapping: identifier_mapping.clone(), class_names }) + as Box; TestEnvironment { unifier, @@ -115,6 +154,7 @@ impl TestEnvironment { id_to_name, identifier_mapping, calls: Vec::new(), + virtual_checks: Vec::new(), } } @@ -125,7 +165,8 @@ impl TestEnvironment { variable_mapping: Default::default(), calls: &mut self.calls, primitives: &mut self.primitives, - return_type: None + virtual_checks: &mut self.virtual_checks, + return_type: None, } } } @@ -136,7 +177,8 @@ impl TestEnvironment { c = 1.234 d = True "}, - [("a", "int32"), ("b", "int64"), ("c", "float"), ("d", "bool")].iter().cloned().collect() + [("a", "int32"), ("b", "int64"), ("c", "float"), ("d", "bool")].iter().cloned().collect(), + &[] ; "primitives test")] #[test_case(indoc! {" a = lambda x, y: x @@ -144,7 +186,8 @@ impl TestEnvironment { c = 1.234 d = b(c) "}, - [("a", "fn[[x=float, y=float], float]"), ("b", "fn[[x=float], float]"), ("c", "float"), ("d", "float")].iter().cloned().collect() + [("a", "fn[[x=float, y=float], float]"), ("b", "fn[[x=float], float]"), ("c", "float"), ("d", "float")].iter().cloned().collect(), + &[] ; "lambda test")] #[test_case(indoc! {" a = lambda x: x @@ -160,20 +203,31 @@ impl TestEnvironment { "}, [("a", "fn[[x=bool], bool]"), ("b", "fn[[x=int32], int32]"), ("c", "bool"), - ("d", "int32"), ("foo1", "Foo[bool]"), ("foo2", "Foo[int32]")].iter().cloned().collect() + ("d", "int32"), ("foo1", "Foo[bool]"), ("foo2", "Foo[int32]")].iter().cloned().collect(), + &[] ; "obj test")] #[test_case(indoc! {" f = lambda x: True a = [1, 2, 3] b = [f(x) for x in a if f(x)] "}, - [("a", "list[int32]"), ("b", "list[bool]"), ("f", "fn[[x=int32], bool]")].iter().cloned().collect() + [("a", "list[int32]"), ("b", "list[bool]"), ("f", "fn[[x=int32], bool]")].iter().cloned().collect(), + &[] ; "listcomp test")] -fn test_basic(source: &str, mapping: HashMap<&str, &str>) { +#[test_case(indoc! {" + a = virtual(Bar(), Bar) + b = a.b() + a = virtual(Bar2()) + "}, + [("a", "virtual[Bar]"), ("b", "int32")].iter().cloned().collect(), + &[("Bar", "Bar"), ("Bar2", "Bar")] + ; "virtual test")] +fn test_basic(source: &str, mapping: HashMap<&str, &str>, virtuals: &[(&str, &str)]) { println!("source:\n{}", source); let mut env = TestEnvironment::new(); let id_to_name = std::mem::take(&mut env.id_to_name); - let mut defined_identifiers = env.identifier_mapping.keys().cloned().collect(); + let mut defined_identifiers: Vec<_> = env.identifier_mapping.keys().cloned().collect(); + defined_identifiers.push("virtual".to_string()); let mut inferencer = env.get_inferencer(); let statements = parse_program(source).unwrap(); let statements = statements @@ -201,4 +255,20 @@ fn test_basic(source: &str, mapping: HashMap<&str, &str>) { ); assert_eq!(format!("{}: {}", k, v), format!("{}: {}", k, name)); } + assert_eq!(inferencer.virtual_checks.len(), virtuals.len()); + for ((a, b), (x, y)) in zip(inferencer.virtual_checks.iter(), virtuals) { + let a = inferencer.unifier.stringify( + *a, + &mut |v| id_to_name.get(&v).unwrap().clone(), + &mut |v| format!("v{}", v), + ); + let b = inferencer.unifier.stringify( + *b, + &mut |v| id_to_name.get(&v).unwrap().clone(), + &mut |v| format!("v{}", v), + ); + + assert_eq!(&a, x); + assert_eq!(&b, y); + } } From e15473d2c939bca28bd1f567945b6a2794c63278 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Tue, 27 Jul 2021 14:39:53 +0800 Subject: [PATCH 050/131] fixed pattern matching --- nac3core/src/typecheck/function_check.rs | 18 +++++++++++------- nac3core/src/typecheck/type_inferencer/test.rs | 7 +++++++ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/nac3core/src/typecheck/function_check.rs b/nac3core/src/typecheck/function_check.rs index e0db2afd..29868941 100644 --- a/nac3core/src/typecheck/function_check.rs +++ b/nac3core/src/typecheck/function_check.rs @@ -8,19 +8,23 @@ impl<'a> Inferencer<'a> { &mut self, pattern: &Expr>, defined_identifiers: &mut Vec, - ) { + ) -> Result<(), String> { match &pattern.node { ExprKind::Name { id, .. } => { if !defined_identifiers.contains(id) { defined_identifiers.push(id.clone()); } + Ok(()) } ExprKind::Tuple { elts, .. } => { for elt in elts.iter() { - self.check_pattern(elt, defined_identifiers); + self.check_pattern(elt, defined_identifiers)?; } + Ok(()) + } + _ => { + self.check_expr(pattern, defined_identifiers) } - _ => unimplemented!(), } } @@ -106,7 +110,7 @@ impl<'a> Inferencer<'a> { } = &generators[0]; self.check_expr(iter, defined_identifiers)?; let mut defined_identifiers = defined_identifiers.to_vec(); - self.check_pattern(target, &mut defined_identifiers); + self.check_pattern(target, &mut defined_identifiers)?; for term in once(elt.as_ref()).chain(ifs.iter()) { self.check_expr(term, &defined_identifiers)?; } @@ -151,7 +155,7 @@ impl<'a> Inferencer<'a> { self.check_stmt(stmt, defined_identifiers)?; } let mut defined_identifiers = defined_identifiers.clone(); - self.check_pattern(target, &mut defined_identifiers); + self.check_pattern(target, &mut defined_identifiers)?; for stmt in body.iter() { self.check_stmt(stmt, &mut defined_identifiers)?; } @@ -185,14 +189,14 @@ impl<'a> Inferencer<'a> { StmtKind::Assign { targets, value, .. } => { self.check_expr(value, defined_identifiers)?; for target in targets { - self.check_pattern(target, defined_identifiers); + self.check_pattern(target, defined_identifiers)?; } Ok(false) } StmtKind::AnnAssign { target, value, .. } => { if let Some(value) = value { self.check_expr(value, defined_identifiers)?; - self.check_pattern(target, defined_identifiers); + self.check_pattern(target, defined_identifiers)?; } Ok(false) } diff --git a/nac3core/src/typecheck/type_inferencer/test.rs b/nac3core/src/typecheck/type_inferencer/test.rs index 598aedf9..bae64f28 100644 --- a/nac3core/src/typecheck/type_inferencer/test.rs +++ b/nac3core/src/typecheck/type_inferencer/test.rs @@ -222,6 +222,13 @@ impl TestEnvironment { [("a", "virtual[Bar]"), ("b", "int32")].iter().cloned().collect(), &[("Bar", "Bar"), ("Bar2", "Bar")] ; "virtual test")] +#[test_case(indoc! {" + a = [virtual(Bar(), Bar), virtual(Bar2())] + b = [x.b() for x in a] + "}, + [("a", "list[virtual[Bar]]"), ("b", "list[int32]")].iter().cloned().collect(), + &[("Bar", "Bar"), ("Bar2", "Bar")] + ; "virtual list test")] fn test_basic(source: &str, mapping: HashMap<&str, &str>, virtuals: &[(&str, &str)]) { println!("source:\n{}", source); let mut env = TestEnvironment::new(); From f665ea358b08a7c3df864bb0f495c69dba35bcd8 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Wed, 28 Jul 2021 10:44:58 +0800 Subject: [PATCH 051/131] fixed fold --- nac3core/src/typecheck/type_inferencer/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nac3core/src/typecheck/type_inferencer/mod.rs b/nac3core/src/typecheck/type_inferencer/mod.rs index 5eecb182..754ecd97 100644 --- a/nac3core/src/typecheck/type_inferencer/mod.rs +++ b/nac3core/src/typecheck/type_inferencer/mod.rs @@ -56,9 +56,9 @@ impl<'a> fold::Fold<()> for Inferencer<'a> { let stmt = match node.node { // we don't want fold over type annotation ast::StmtKind::AnnAssign { target, annotation, value, simple } => { - let target = Box::new(fold::fold_expr(self, *target)?); + let target = Box::new(self.fold_expr(*target)?); let value = if let Some(v) = value { - let ty = Box::new(fold::fold_expr(self, *v)?); + let ty = Box::new(self.fold_expr(*v)?); self.unifier.unify(target.custom.unwrap(), ty.custom.unwrap())?; Some(ty) } else { From 832513e210f77de786746cd2bcc07e2b65db5e56 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Wed, 28 Jul 2021 17:25:19 +0800 Subject: [PATCH 052/131] new is_concrete type check --- nac3core/src/typecheck/function_check.rs | 6 ++-- nac3core/src/typecheck/type_inferencer/mod.rs | 31 +++++++++---------- .../src/typecheck/type_inferencer/test.rs | 14 ++++----- nac3core/src/typecheck/typedef/mod.rs | 25 +++++++++++---- 4 files changed, 43 insertions(+), 33 deletions(-) diff --git a/nac3core/src/typecheck/function_check.rs b/nac3core/src/typecheck/function_check.rs index 29868941..c114f67f 100644 --- a/nac3core/src/typecheck/function_check.rs +++ b/nac3core/src/typecheck/function_check.rs @@ -35,13 +35,11 @@ impl<'a> Inferencer<'a> { ) -> Result<(), String> { // there are some cases where the custom field is None if let Some(ty) = &expr.custom { - let ty = self.unifier.get_ty(*ty); - let ty = ty.as_ref(); - if !ty.is_concrete() { + if !self.unifier.is_concrete(*ty, &self.function_data.bound_variables) { return Err(format!( "expected concrete type at {} but got {}", expr.location, - ty.get_type_name() + self.unifier.get_ty(*ty).get_type_name() )); } } diff --git a/nac3core/src/typecheck/type_inferencer/mod.rs b/nac3core/src/typecheck/type_inferencer/mod.rs index 754ecd97..f852fab9 100644 --- a/nac3core/src/typecheck/type_inferencer/mod.rs +++ b/nac3core/src/typecheck/type_inferencer/mod.rs @@ -25,14 +25,18 @@ pub struct PrimitiveStore { pub none: Type, } +pub struct FunctionData { + pub resolver: Box, + pub return_type: Option, + pub bound_variables: Vec, +} + pub struct Inferencer<'a> { - pub resolver: &'a mut Box, + pub function_data: &'a mut FunctionData, pub unifier: &'a mut Unifier, + pub primitives: &'a PrimitiveStore, pub virtual_checks: &'a mut Vec<(Type, Type)>, pub variable_mapping: HashMap, - pub calls: &'a mut Vec>, - pub primitives: &'a PrimitiveStore, - pub return_type: Option, } struct NaiveFolder(); @@ -65,6 +69,7 @@ impl<'a> fold::Fold<()> for Inferencer<'a> { None }; let annotation_type = self + .function_data .resolver .parse_type_name(annotation.as_ref()) .ok_or_else(|| "cannot parse type name".to_string())?; @@ -93,7 +98,7 @@ impl<'a> fold::Fold<()> for Inferencer<'a> { } ast::StmtKind::AnnAssign { .. } | ast::StmtKind::Expr { .. } => {} ast::StmtKind::Break | ast::StmtKind::Continue => {} - ast::StmtKind::Return { value } => match (value, self.return_type) { + ast::StmtKind::Return { value } => match (value, self.function_data.return_type) { (Some(v), Some(v1)) => { self.unifier.unify(v.custom.unwrap(), v1)?; } @@ -171,7 +176,6 @@ impl<'a> Inferencer<'a> { ) -> InferenceResult { let call = Rc::new(Call { posargs: params, kwargs: HashMap::new(), ret, fun: RefCell::new(None) }); - self.calls.push(call.clone()); let call = self.unifier.add_ty(TypeEnum::TCall(vec![call].into())); let fields = once((method, call)).collect(); let record = self.unifier.add_record(fields); @@ -207,13 +211,11 @@ impl<'a> Inferencer<'a> { variable_mapping.extend(fn_args.iter().cloned()); let ret = self.unifier.get_fresh_var().0; let mut new_context = Inferencer { - resolver: self.resolver, + function_data: self.function_data, unifier: self.unifier, + primitives: self.primitives, virtual_checks: self.virtual_checks, variable_mapping, - calls: self.calls, - primitives: self.primitives, - return_type: self.return_type, }; let fun = FunSignature { args: fn_args @@ -250,13 +252,11 @@ impl<'a> Inferencer<'a> { } let variable_mapping = self.variable_mapping.clone(); let mut new_context = Inferencer { - resolver: self.resolver, + function_data: self.function_data, unifier: self.unifier, virtual_checks: self.virtual_checks, variable_mapping, - calls: self.calls, primitives: self.primitives, - return_type: self.return_type, }; let elt = new_context.fold_expr(elt)?; let generator = generators.pop().unwrap(); @@ -315,7 +315,7 @@ impl<'a> Inferencer<'a> { } let arg0 = self.fold_expr(args.remove(0))?; let ty = if let Some(arg) = args.pop() { - self.resolver + self.function_data.resolver .parse_type_name(&arg) .ok_or_else(|| "error parsing type".to_string())? } else { @@ -379,7 +379,6 @@ impl<'a> Inferencer<'a> { fun: RefCell::new(None), ret, }); - self.calls.push(call.clone()); let call = self.unifier.add_ty(TypeEnum::TCall(vec![call].into())); self.unifier.unify(func.custom.unwrap(), call)?; @@ -390,7 +389,7 @@ impl<'a> Inferencer<'a> { if let Some(ty) = self.variable_mapping.get(id) { Ok(*ty) } else { - Ok(self.resolver.get_symbol_type(id).unwrap_or_else(|| { + Ok(self.function_data.resolver.get_symbol_type(id).unwrap_or_else(|| { let ty = self.unifier.get_fresh_var().0; self.variable_mapping.insert(id.to_string(), ty); ty diff --git a/nac3core/src/typecheck/type_inferencer/test.rs b/nac3core/src/typecheck/type_inferencer/test.rs index bae64f28..16a5ffff 100644 --- a/nac3core/src/typecheck/type_inferencer/test.rs +++ b/nac3core/src/typecheck/type_inferencer/test.rs @@ -37,8 +37,7 @@ impl SymbolResolver for Resolver { struct TestEnvironment { pub unifier: Unifier, - pub resolver: Box, - pub calls: Vec>, + pub function_data: FunctionData, pub primitives: PrimitiveStore, pub id_to_name: HashMap, pub identifier_mapping: HashMap, @@ -149,24 +148,25 @@ impl TestEnvironment { TestEnvironment { unifier, - resolver, + function_data: FunctionData { + resolver, + bound_variables: Vec::new(), + return_type: None + }, primitives, id_to_name, identifier_mapping, - calls: Vec::new(), virtual_checks: Vec::new(), } } fn get_inferencer(&mut self) -> Inferencer { Inferencer { - resolver: &mut self.resolver, + function_data: &mut self.function_data, unifier: &mut self.unifier, variable_mapping: Default::default(), - calls: &mut self.calls, primitives: &mut self.primitives, virtual_checks: &mut self.virtual_checks, - return_type: None, } } } diff --git a/nac3core/src/typecheck/typedef/mod.rs b/nac3core/src/typecheck/typedef/mod.rs index 54b8aeb2..c97926da 100644 --- a/nac3core/src/typecheck/typedef/mod.rs +++ b/nac3core/src/typecheck/typedef/mod.rs @@ -83,10 +83,6 @@ impl TypeEnum { TypeEnum::TFunc { .. } => "TFunc", } } - - pub fn is_concrete(&self) -> bool { - !matches!(self, TypeEnum::TVar { .. }) - } } pub struct Unifier { @@ -143,6 +139,23 @@ impl Unifier { (self.add_ty(TypeEnum::TVar { id, range, meta: TypeVarMeta::Generic }), id) } + pub fn is_concrete(&mut self, a: Type, allowed_typevars: &[Type]) -> bool { + use TypeEnum::*; + match &*self.get_ty(a) { + TVar { .. } => allowed_typevars.iter().any(|b| self.unification_table.unioned(a, *b)), + TCall { .. } => false, + TList { ty } => self.is_concrete(*ty, allowed_typevars), + TTuple { ty } => ty.iter().all(|ty| self.is_concrete(*ty, allowed_typevars)), + TObj { params: vars, .. } => { + vars.values().all(|ty| self.is_concrete(*ty, allowed_typevars)) + } + // functions are instantiated for each call sites, so the function type can contain + // type variables. + TFunc { .. } => true, + TVirtual { ty } => self.is_concrete(*ty, allowed_typevars), + } + } + pub fn unify(&mut self, a: Type, b: Type) -> Result<(), String> { if self.unification_table.unioned(a, b) { Ok(()) @@ -204,7 +217,7 @@ impl Unifier { } for v1 in old_range2.iter() { for v2 in range1.iter() { - if let Ok(result) = self.get_intersection(*v1, *v2){ + if let Ok(result) = self.get_intersection(*v1, *v2) { range2.push(result.unwrap_or(*v2)); } } @@ -486,7 +499,7 @@ impl Unifier { Err(format!("Cannot unify {} with {}", a.get_type_name(), b.get_type_name())) } - /// Instantiate a function if it hasn't been instntiated. + /// Instantiate a function if it hasn't been instantiated. /// Returns Some(T) where T is the instantiated type. /// Returns None if the function is already instantiated. fn instantiate_fun(&mut self, ty: Type, fun: &FunSignature) -> Type { From 09e76efcf723813d41b7d4f0216821db9977eba3 Mon Sep 17 00:00:00 2001 From: ychenfo Date: Thu, 29 Jul 2021 15:36:19 +0800 Subject: [PATCH 053/131] start adding primitive magic methods --- .../src/typecheck/type_inferencer/test.rs | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/nac3core/src/typecheck/type_inferencer/test.rs b/nac3core/src/typecheck/type_inferencer/test.rs index 16a5ffff..fdbf7f25 100644 --- a/nac3core/src/typecheck/type_inferencer/test.rs +++ b/nac3core/src/typecheck/type_inferencer/test.rs @@ -45,6 +45,49 @@ struct TestEnvironment { } impl TestEnvironment { + pub fn basic_test_env() -> Option { + use rustpython_parser::ast::Operator::*; + let mut unifier = Unifier::new(); + // let mut identifier_mapping = HashMap::new(); + + let int32 = unifier.add_ty(TypeEnum::TObj { + obj_id: 0, + fields: HashMap::new(), + params: HashMap::new(), + }); + let int64 = unifier.add_ty(TypeEnum::TObj { + obj_id: 1, + fields: HashMap::new(), + params: HashMap::new(), + }); + let float = unifier.add_ty(TypeEnum::TObj { + obj_id: 2, + fields: HashMap::new(), + params: HashMap::new(), + }); + let bool = unifier.add_ty(TypeEnum::TObj { + obj_id: 3, + fields: HashMap::new(), + params: HashMap::new(), + }); + let none = unifier.add_ty(TypeEnum::TObj { + obj_id: 4, + fields: HashMap::new(), + params: HashMap::new(), + }); + // identifier_mapping.insert("None".into(), none); + let primitives = PrimitiveStore { int32, int64, float, bool, none }; + + // if let TypeEnum::TObj {ref fields, ref params, .. } = *unifier.get_ty(int32) { + // for op in [Add, Sub, Mult, MatMult, Div, Mod, Pow, LShift, RShift, BitOr, BitXor, BitAnd, FloorDiv].into_iter() { + // let call = Rc::new(Call {posargs: vec![int32], kwargs: HashMap::new(), ret: int32, fun: RefCell::new(None)}); + // }; + // None + // } else { + // None + // } + } + fn new() -> TestEnvironment { let mut unifier = Unifier::new(); let mut identifier_mapping = HashMap::new(); From f2c5a9b352887ff3c25fe4400b0d0337c21d7012 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Fri, 30 Jul 2021 11:01:11 +0800 Subject: [PATCH 054/131] added location -> call mapping This allows code generation module to get function instantiation parameter directly. --- nac3core/src/typecheck/type_inferencer/mod.rs | 21 ++++++++++++++++++- .../src/typecheck/type_inferencer/test.rs | 5 ++++- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/nac3core/src/typecheck/type_inferencer/mod.rs b/nac3core/src/typecheck/type_inferencer/mod.rs index f852fab9..f002eff9 100644 --- a/nac3core/src/typecheck/type_inferencer/mod.rs +++ b/nac3core/src/typecheck/type_inferencer/mod.rs @@ -1,6 +1,6 @@ use std::cell::RefCell; use std::collections::HashMap; -use std::convert::TryInto; +use std::convert::{TryInto, From}; use std::iter::once; use std::rc::Rc; @@ -17,6 +17,21 @@ use rustpython_parser::ast::{ #[cfg(test)] mod test; +#[derive(PartialEq, Eq, Hash, Copy, Clone, Debug)] +pub struct CodeLocation { + row: usize, + col: usize, +} + +impl From for CodeLocation { + fn from(loc: Location) -> CodeLocation { + CodeLocation { + row: loc.row(), + col: loc.column() + } + } +} + pub struct PrimitiveStore { pub int32: Type, pub int64: Type, @@ -37,6 +52,7 @@ pub struct Inferencer<'a> { pub primitives: &'a PrimitiveStore, pub virtual_checks: &'a mut Vec<(Type, Type)>, pub variable_mapping: HashMap, + pub calls: &'a mut HashMap>, } struct NaiveFolder(); @@ -215,6 +231,7 @@ impl<'a> Inferencer<'a> { unifier: self.unifier, primitives: self.primitives, virtual_checks: self.virtual_checks, + calls: self.calls, variable_mapping, }; let fun = FunSignature { @@ -257,6 +274,7 @@ impl<'a> Inferencer<'a> { virtual_checks: self.virtual_checks, variable_mapping, primitives: self.primitives, + calls: self.calls, }; let elt = new_context.fold_expr(elt)?; let generator = generators.pop().unwrap(); @@ -379,6 +397,7 @@ impl<'a> Inferencer<'a> { fun: RefCell::new(None), ret, }); + self.calls.insert(location.into(), call.clone()); let call = self.unifier.add_ty(TypeEnum::TCall(vec![call].into())); self.unifier.unify(func.custom.unwrap(), call)?; diff --git a/nac3core/src/typecheck/type_inferencer/test.rs b/nac3core/src/typecheck/type_inferencer/test.rs index 16a5ffff..c1bceed5 100644 --- a/nac3core/src/typecheck/type_inferencer/test.rs +++ b/nac3core/src/typecheck/type_inferencer/test.rs @@ -42,6 +42,7 @@ struct TestEnvironment { pub id_to_name: HashMap, pub identifier_mapping: HashMap, pub virtual_checks: Vec<(Type, Type)>, + pub calls: HashMap>, } impl TestEnvironment { @@ -151,12 +152,13 @@ impl TestEnvironment { function_data: FunctionData { resolver, bound_variables: Vec::new(), - return_type: None + return_type: None, }, primitives, id_to_name, identifier_mapping, virtual_checks: Vec::new(), + calls: HashMap::new(), } } @@ -167,6 +169,7 @@ impl TestEnvironment { variable_mapping: Default::default(), primitives: &mut self.primitives, virtual_checks: &mut self.virtual_checks, + calls: &mut self.calls, } } } From 743a9384a30b0d3474ec65840044cb99bf5aec71 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Fri, 30 Jul 2021 11:28:27 +0800 Subject: [PATCH 055/131] added rigid type variable --- nac3core/src/typecheck/typedef/mod.rs | 22 ++++++++++++++- nac3core/src/typecheck/typedef/test.rs | 38 ++++++++++++++++++++------ 2 files changed, 51 insertions(+), 9 deletions(-) diff --git a/nac3core/src/typecheck/typedef/mod.rs b/nac3core/src/typecheck/typedef/mod.rs index c97926da..6980f00c 100644 --- a/nac3core/src/typecheck/typedef/mod.rs +++ b/nac3core/src/typecheck/typedef/mod.rs @@ -47,6 +47,9 @@ pub enum TypeVarMeta { #[derive(Clone)] pub enum TypeEnum { + TRigidVar { + id: u32, + }, TVar { id: u32, meta: TypeVarMeta, @@ -74,6 +77,7 @@ pub enum TypeEnum { impl TypeEnum { pub fn get_type_name(&self) -> &'static str { match self { + TypeEnum::TRigidVar { .. } => "TRigidVar", TypeEnum::TVar { .. } => "TVar", TypeEnum::TTuple { .. } => "TTuple", TypeEnum::TList { .. } => "TList", @@ -127,6 +131,12 @@ impl Unifier { self.unification_table.probe_value(a).clone() } + pub fn get_fresh_rigid_var(&mut self) -> (Type, u32) { + let id = self.var_id + 1; + self.var_id += 1; + (self.add_ty(TypeEnum::TRigidVar { id }), id) + } + pub fn get_fresh_var(&mut self) -> (Type, u32) { self.get_fresh_var_with_range(&[]) } @@ -139,9 +149,17 @@ impl Unifier { (self.add_ty(TypeEnum::TVar { id, range, meta: TypeVarMeta::Generic }), id) } + /// Unification would not unify rigid variables with other types, but we want to do this for + /// function instantiations, so we make it explicit. + pub fn replace_rigid_var(&mut self, rigid: Type, b: Type) { + assert!(matches!(&*self.get_ty(rigid), TypeEnum::TRigidVar { .. })); + self.set_a_to_b(rigid, b); + } + pub fn is_concrete(&mut self, a: Type, allowed_typevars: &[Type]) -> bool { use TypeEnum::*; match &*self.get_ty(a) { + TRigidVar { .. } => true, TVar { .. } => allowed_typevars.iter().any(|b| self.unification_table.unioned(a, *b)), TCall { .. } => false, TList { ty } => self.is_concrete(*ty, allowed_typevars), @@ -435,6 +453,7 @@ impl Unifier { use TypeVarMeta::*; let ty = self.unification_table.probe_value(ty).clone(); match ty.as_ref() { + TypeEnum::TRigidVar { id } => var_to_name(*id), TypeEnum::TVar { id, meta: Generic, .. } => var_to_name(*id), TypeEnum::TVar { meta: Sequence(map), .. } => { let fields = map @@ -544,6 +563,7 @@ impl Unifier { // variables, i.e. things like TRecord, TCall should not occur, and we // should be safe to not implement the substitution for those variants. match &*ty { + TypeEnum::TRigidVar { .. } => None, TypeEnum::TVar { id, meta: Generic, .. } => mapping.get(&id).cloned(), TypeEnum::TTuple { ty } => { let mut new_ty = Cow::from(ty); @@ -634,7 +654,7 @@ impl Unifier { let ty = self.unification_table.probe_value(b).clone(); match ty.as_ref() { - TypeEnum::TVar { meta: Generic, .. } => {} + TypeEnum::TRigidVar { .. } | TypeEnum::TVar { meta: Generic, .. } => {} TypeEnum::TVar { meta: Sequence(map), .. } => { for t in map.borrow().values() { self.occur_check(a, *t)?; diff --git a/nac3core/src/typecheck/typedef/test.rs b/nac3core/src/typecheck/typedef/test.rs index be7401f0..cf0cc9c0 100644 --- a/nac3core/src/typecheck/typedef/test.rs +++ b/nac3core/src/typecheck/typedef/test.rs @@ -419,22 +419,22 @@ fn test_typevar_range() { let a = env.unifier.get_fresh_var_with_range(&[int, float]).0; let b = env.unifier.get_fresh_var_with_range(&[boolean, float]).0; - let a_list = env.unifier.add_ty(TypeEnum::TList { ty: a}); + let a_list = env.unifier.add_ty(TypeEnum::TList { ty: a }); let a_list = env.unifier.get_fresh_var_with_range(&[a_list]).0; - let b_list = env.unifier.add_ty(TypeEnum::TList { ty: b}); + let b_list = env.unifier.add_ty(TypeEnum::TList { ty: b }); let b_list = env.unifier.get_fresh_var_with_range(&[b_list]).0; env.unifier.unify(a_list, b_list).unwrap(); - let float_list = env.unifier.add_ty(TypeEnum::TList { ty: float}); + let float_list = env.unifier.add_ty(TypeEnum::TList { ty: float }); env.unifier.unify(a_list, float_list).unwrap(); // previous unifications should not affect a and b env.unifier.unify(a, int).unwrap(); let a = env.unifier.get_fresh_var_with_range(&[int, float]).0; let b = env.unifier.get_fresh_var_with_range(&[boolean, float]).0; - let a_list = env.unifier.add_ty(TypeEnum::TList { ty: a}); - let b_list = env.unifier.add_ty(TypeEnum::TList { ty: b}); + let a_list = env.unifier.add_ty(TypeEnum::TList { ty: a }); + let b_list = env.unifier.add_ty(TypeEnum::TList { ty: b }); env.unifier.unify(a_list, b_list).unwrap(); - let int_list = env.unifier.add_ty(TypeEnum::TList { ty: int}); + let int_list = env.unifier.add_ty(TypeEnum::TList { ty: int }); assert_eq!( env.unifier.unify(a_list, int_list), Err("Cannot unify type variable 19 with TObj due to incompatible value range".into()) @@ -442,12 +442,34 @@ fn test_typevar_range() { let a = env.unifier.get_fresh_var_with_range(&[int, float]).0; let b = env.unifier.get_fresh_var().0; - let a_list = env.unifier.add_ty(TypeEnum::TList { ty: a}); + let a_list = env.unifier.add_ty(TypeEnum::TList { ty: a }); let a_list = env.unifier.get_fresh_var_with_range(&[a_list]).0; - let b_list = env.unifier.add_ty(TypeEnum::TList { ty: b}); + let b_list = env.unifier.add_ty(TypeEnum::TList { ty: b }); env.unifier.unify(a_list, b_list).unwrap(); assert_eq!( env.unifier.unify(b, boolean), Err("Cannot unify type variable 21 with TObj due to incompatible value range".into()) ); } + +#[test] +fn test_rigid_var() { + let mut env = TestEnvironment::new(); + let a = env.unifier.get_fresh_rigid_var().0; + let b = env.unifier.get_fresh_rigid_var().0; + let x = env.unifier.get_fresh_var().0; + let list_a = env.unifier.add_ty(TypeEnum::TList { ty: a }); + let list_x = env.unifier.add_ty(TypeEnum::TList { ty: x }); + let int = env.parse("int", &HashMap::new()); + let list_int = env.parse("List[int]", &HashMap::new()); + + assert_eq!(env.unifier.unify(a, b), Err("Cannot unify TRigidVar with TRigidVar".to_string())); + env.unifier.unify(list_a, list_x).unwrap(); + assert_eq!( + env.unifier.unify(list_x, list_int), + Err("Cannot unify TObj with TRigidVar".to_string()) + ); + + env.unifier.replace_rigid_var(a, int); + env.unifier.unify(list_x, list_int).unwrap(); +} From 7ad8e2d81d53f92e8d197a4a53bfb6d501315c41 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Fri, 30 Jul 2021 13:50:46 +0800 Subject: [PATCH 056/131] cleanup some error reporting code --- nac3core/src/typecheck/typedef/mod.rs | 28 +++++++++++---------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/nac3core/src/typecheck/typedef/mod.rs b/nac3core/src/typecheck/typedef/mod.rs index 6980f00c..36809e59 100644 --- a/nac3core/src/typecheck/typedef/mod.rs +++ b/nac3core/src/typecheck/typedef/mod.rs @@ -308,11 +308,8 @@ impl Unifier { (TVar { meta: Record(map), id, range, .. }, TObj { fields, .. }) => { self.occur_check(a, b)?; for (k, v) in map.borrow().iter() { - if let Some(ty) = fields.get(k) { - self.unify(*ty, *v)?; - } else { - return Err(format!("No such attribute {}", k)); - } + let ty = fields.get(k).ok_or_else(|| format!("No such attribute {}", k))?; + self.unify(*ty, *v)?; } let x = self.check_var_compatibility(*id, b, &range.borrow())?.unwrap_or(b); self.unify(x, b)?; @@ -323,14 +320,11 @@ impl Unifier { let ty = self.get_ty(*ty); if let TObj { fields, .. } = ty.as_ref() { for (k, v) in map.borrow().iter() { - if let Some(ty) = fields.get(k) { - if !matches!(self.get_ty(*ty).as_ref(), TFunc { .. }) { - return Err(format!("Cannot access field {} for virtual type", k)); - } - self.unify(*v, *ty)?; - } else { - return Err(format!("No such attribute {}", k)); + let ty = fields.get(k).ok_or_else(|| format!("No such attribute {}", k))?; + if !matches!(self.get_ty(*ty).as_ref(), TFunc { .. }) { + return Err(format!("Cannot access field {} for virtual type", k)); } + self.unify(*v, *ty)?; } } else { // require annotation... @@ -400,11 +394,11 @@ impl Unifier { if let Some(i) = required.iter().position(|v| v == k) { required.remove(i); } - if let Some(i) = all_names.iter().position(|v| &v.0 == k) { - self.unify(all_names.remove(i).1, *t)?; - } else { - return Err(format!("Unknown keyword argument {}", k)); - } + let i = all_names + .iter() + .position(|v| &v.0 == k) + .ok_or_else(|| format!("Unknown keyword argument {}", k))?; + self.unify(all_names.remove(i).1, *t)?; } if !required.is_empty() { return Err("Expected more arguments".to_string()); From 9983aa62e6ae80bb5b8da37fa1a813c50217c7cd Mon Sep 17 00:00:00 2001 From: CrescentonC Date: Fri, 30 Jul 2021 15:40:14 +0800 Subject: [PATCH 057/131] add primitive magic methods --- nac3core/src/typecheck/magic_methods.rs | 364 ++++++++++++++++++ .../src/typecheck/type_inferencer/test.rs | 46 ++- 2 files changed, 399 insertions(+), 11 deletions(-) diff --git a/nac3core/src/typecheck/magic_methods.rs b/nac3core/src/typecheck/magic_methods.rs index 7e2955f5..999a634e 100644 --- a/nac3core/src/typecheck/magic_methods.rs +++ b/nac3core/src/typecheck/magic_methods.rs @@ -1,3 +1,4 @@ +use std::{collections::HashMap, rc::Rc}; use rustpython_parser::ast::{Cmpop, Operator, Unaryop}; pub fn binop_name(op: &Operator) -> &'static str { @@ -56,3 +57,366 @@ pub fn comparison_name(op: &Cmpop) -> Option<&'static str> { _ => None, } } + +use crate::typecheck::{type_inferencer::*, typedef::{FunSignature, FuncArg, TypeEnum, Unifier}}; +use rustpython_parser::ast; +pub fn set_primirives_magic_methods(store: &PrimitiveStore, unifier: &mut Unifier) { + // int32 -------- + if let Some(TypeEnum::TObj {fields, .. }) = Rc::get_mut(&mut unifier.get_ty(store.int32)) { + for op in &[ + ast::Operator::Add, + ast::Operator::Sub, + ast::Operator::Mult, + ast::Operator::Mod, + ast::Operator::Pow, + ast::Operator::LShift, + ast::Operator::RShift, + ast::Operator::BitOr, + ast::Operator::BitXor, + ast::Operator::BitAnd, + ast::Operator::FloorDiv + ] { + fields.insert( + binop_name(op).to_string(), + unifier.add_ty(TypeEnum::TFunc(FunSignature { + ret: store.int32, + vars: HashMap::new(), + args: vec![FuncArg { + ty: store.int32, + is_optional: false, + name: "other".into() // the name does not matter here + }], + })) + ); + + fields.insert( + binop_assign_name(op).to_string(), + unifier.add_ty(TypeEnum::TFunc(FunSignature { + ret: store.none, + vars: HashMap::new(), + args: vec![FuncArg { + ty: store.int32, + is_optional: false, + name: "other".into() + }] + })) + ); + }; + // int div int gets float + fields.insert( + binop_assign_name(&ast::Operator::Div).into(), + unifier.add_ty(TypeEnum::TFunc(FunSignature { + ret: store.float, + vars: HashMap::new(), + args: vec![FuncArg { + ty: store.int32, + is_optional: false, + name: "other".into() + }] + })) + ); + + for op in &[ + ast::Cmpop::Eq, + ast::Cmpop::NotEq, + ast::Cmpop::Lt, + ast::Cmpop::LtE, + ast::Cmpop::Gt, + ast::Cmpop::GtE, + ] { + fields.insert( + comparison_name(op).unwrap().to_string(), + unifier.add_ty(TypeEnum::TFunc(FunSignature { + ret: store.bool, + vars: HashMap::new(), + args: vec![FuncArg { + ty: store.int32, + is_optional: false, + name: "other".into() + }], + })) + ); + } + + for op in &[ + ast::Unaryop::UAdd, + ast::Unaryop::USub, + ast::Unaryop::Not, + ast::Unaryop::Invert, + ] { + fields.insert( + unaryop_name(op).into(), + unifier.add_ty(TypeEnum::TFunc(FunSignature { + ret: store.int32, + vars: HashMap::new(), + args: vec![] + })) + ); + } + } else { unreachable!() } + // int32 -------- + // int64 -------- + if let Some(TypeEnum::TObj {fields, .. }) = Rc::get_mut(&mut unifier.get_ty(store.int64)) { + for op in &[ + ast::Operator::Add, + ast::Operator::Sub, + ast::Operator::Mult, + ast::Operator::Mod, + ast::Operator::Pow, + ast::Operator::LShift, + ast::Operator::RShift, + ast::Operator::BitOr, + ast::Operator::BitXor, + ast::Operator::BitAnd, + ast::Operator::FloorDiv + ] { + fields.insert( + binop_name(op).to_string(), + unifier.add_ty(TypeEnum::TFunc(FunSignature { + ret: store.int64, + vars: HashMap::new(), + args: vec![FuncArg { + ty: store.int64, + is_optional: false, + name: "other".into() // the name does not matter here + }], + })) + ); + + fields.insert( + binop_assign_name(op).to_string(), + unifier.add_ty(TypeEnum::TFunc(FunSignature { + ret: store.none, + vars: HashMap::new(), + args: vec![FuncArg { + ty: store.int64, + is_optional: false, + name: "other".into() + }] + })) + ); + }; + fields.insert( + binop_assign_name(&ast::Operator::Div).into(), + unifier.add_ty(TypeEnum::TFunc(FunSignature { + ret: store.float, + vars: HashMap::new(), + args: vec![FuncArg { + ty: store.int64, + is_optional: false, + name: "other".into() + }] + })) + ); + + for op in &[ + ast::Cmpop::Eq, + ast::Cmpop::NotEq, + ast::Cmpop::Lt, + ast::Cmpop::LtE, + ast::Cmpop::Gt, + ast::Cmpop::GtE, + ] { + fields.insert( + comparison_name(op).unwrap().to_string(), + unifier.add_ty(TypeEnum::TFunc(FunSignature { + ret: store.bool, + vars: HashMap::new(), + args: vec![FuncArg { + ty: store.int64, + is_optional: false, + name: "other".into() + }], + })) + ); + } + + for op in &[ + ast::Unaryop::UAdd, + ast::Unaryop::USub, + ast::Unaryop::Not, + ast::Unaryop::Invert, + ] { + fields.insert( + unaryop_name(op).into(), + unifier.add_ty(TypeEnum::TFunc(FunSignature { + ret: store.int64, + vars: HashMap::new(), + args: vec![] + })) + ); + } + } else { unreachable!() } + // int64 -------- + // float -------- + if let Some(TypeEnum::TObj {fields, .. }) = Rc::get_mut(&mut unifier.get_ty(store.float)) { + for op in &[ + ast::Operator::Add, + ast::Operator::Sub, + ast::Operator::Mult, + ast::Operator::Div, + ast::Operator::Mod, + ast::Operator::Pow, + ast::Operator::FloorDiv, + ] { + fields.insert( + binop_name(op).to_string(), + unifier.add_ty(TypeEnum::TFunc(FunSignature { + ret: store.float, + vars: HashMap::new(), + args: vec![FuncArg { + ty: store.float, + is_optional: false, + name: "other".into() // the name does not matter here + }], + })) + ); + + fields.insert( + binop_assign_name(op).to_string(), + unifier.add_ty(TypeEnum::TFunc(FunSignature { + ret: store.none, + vars: HashMap::new(), + args: vec![FuncArg { + ty: store.float, + is_optional: false, + name: "other".into() + }] + })) + ); + }; + + for op in &[ + ast::Cmpop::Eq, + ast::Cmpop::NotEq, + ast::Cmpop::Lt, + ast::Cmpop::LtE, + ast::Cmpop::Gt, + ast::Cmpop::GtE, + ] { + fields.insert( + comparison_name(op).unwrap().to_string(), + unifier.add_ty(TypeEnum::TFunc(FunSignature { + ret: store.bool, + vars: HashMap::new(), + args: vec![FuncArg { + ty: store.float, + is_optional: false, + name: "other".into() + }], + })) + ); + } + + for op in &[ + ast::Unaryop::UAdd, + ast::Unaryop::USub, + ast::Unaryop::Not, + ] { + fields.insert( + unaryop_name(op).into(), + unifier.add_ty(TypeEnum::TFunc(FunSignature { + ret: store.int64, + vars: HashMap::new(), + args: vec![] + })) + ); + } + } else { unreachable!() } + // float -------- + // bool --------- + if let Some(TypeEnum::TObj {fields, .. }) = Rc::get_mut(&mut unifier.get_ty(store.bool)) { + for op in &[ + ast::Operator::Add, + ast::Operator::Sub, + ast::Operator::Mult, + ast::Operator::Mod, + ast::Operator::Pow, + ast::Operator::LShift, + ast::Operator::RShift, + ast::Operator::FloorDiv + ] { + fields.insert( + binop_name(op).into(), + unifier.add_ty(TypeEnum::TFunc(FunSignature { + ret: store.int32, + vars: HashMap::new(), + args: vec![FuncArg { + ty: store.bool, + is_optional: false, + name: "other".into() + }] + })) + ); + + fields.insert( + binop_name(op).into(), + unifier.add_ty(TypeEnum::TFunc(FunSignature { + ret: store.int32, + vars: HashMap::new(), + args: vec![FuncArg { + ty: store.int32, + is_optional: false, + name: "other".into() + }] + })) + ); + + // binop_assignment will change type? + /* fields.insert( + binop_assignment_name(op).into(), + unifier.add_ty(TypeEnum::TFunc(FunSignature { + ret: store.none, + vars: HashMap::new(), + args: vec![FuncArg { + ty: store.bool, + is_optional: false, + name: "other".into() + }] + })) + ); */ + }; + + for op in &[ + ast::Operator::BitOr, + ast::Operator::BitXor, + ast::Operator::BitAnd + ] { + fields.insert( + binop_name(op).into(), + unifier.add_ty(TypeEnum::TFunc(FunSignature { + ret: store.bool, + vars: HashMap::new(), + args: vec![FuncArg { + ty: store.int32, + is_optional: false, + name: "other".into() + }] + })) + ); + }; + + for op in &[ + ast::Cmpop::Eq, + ast::Cmpop::NotEq, + ast::Cmpop::Lt, + ast::Cmpop::LtE, + ast::Cmpop::Gt, + ast::Cmpop::GtE, + ] { + fields.insert( + comparison_name(op).unwrap().into(), + unifier.add_ty(TypeEnum::TFunc(FunSignature { + ret: store.int32, + vars: HashMap::new(), + args: vec![FuncArg { + ty: store.bool, + is_optional: false, + name: "other".into() + }] + })) + ); + } + } + // bool -------- +} \ No newline at end of file diff --git a/nac3core/src/typecheck/type_inferencer/test.rs b/nac3core/src/typecheck/type_inferencer/test.rs index fdbf7f25..d80b0521 100644 --- a/nac3core/src/typecheck/type_inferencer/test.rs +++ b/nac3core/src/typecheck/type_inferencer/test.rs @@ -45,10 +45,8 @@ struct TestEnvironment { } impl TestEnvironment { - pub fn basic_test_env() -> Option { - use rustpython_parser::ast::Operator::*; + pub fn basic_test_env() -> TestEnvironment { let mut unifier = Unifier::new(); - // let mut identifier_mapping = HashMap::new(); let int32 = unifier.add_ty(TypeEnum::TObj { obj_id: 0, @@ -77,15 +75,41 @@ impl TestEnvironment { }); // identifier_mapping.insert("None".into(), none); let primitives = PrimitiveStore { int32, int64, float, bool, none }; + set_primirives_magic_methods(&primitives, &mut unifier); + + let id_to_name = [ + (0, "int32".to_string()), + (1, "int64".to_string()), + (2, "float".to_string()), + (3, "bool".to_string()), + (4, "none".to_string()), + (5, "Foo".to_string()), + (6, "Bar".to_string()), + (7, "Bar2".to_string()), + ] + .iter() + .cloned() + .collect(); - // if let TypeEnum::TObj {ref fields, ref params, .. } = *unifier.get_ty(int32) { - // for op in [Add, Sub, Mult, MatMult, Div, Mod, Pow, LShift, RShift, BitOr, BitXor, BitAnd, FloorDiv].into_iter() { - // let call = Rc::new(Call {posargs: vec![int32], kwargs: HashMap::new(), ret: int32, fun: RefCell::new(None)}); - // }; - // None - // } else { - // None - // } + let mut identifier_mapping = HashMap::new(); + identifier_mapping.insert("None".into(), none); + + let resolver = + Box::new(Resolver { identifier_mapping: identifier_mapping.clone(), class_names: Default::default() }) + as Box; + + TestEnvironment { + unifier, + function_data: FunctionData { + resolver, + bound_variables: Vec::new(), + return_type: None + }, + primitives, + id_to_name, + identifier_mapping, + virtual_checks: Vec::new(), + } } fn new() -> TestEnvironment { From b87c627c4159d3af6ed5c58432b18110d0c55b58 Mon Sep 17 00:00:00 2001 From: CrescentonC Date: Fri, 30 Jul 2021 15:46:57 +0800 Subject: [PATCH 058/131] updated with field in the test environment --- nac3core/src/typecheck/type_inferencer/test.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/nac3core/src/typecheck/type_inferencer/test.rs b/nac3core/src/typecheck/type_inferencer/test.rs index 3912c453..eec99f2a 100644 --- a/nac3core/src/typecheck/type_inferencer/test.rs +++ b/nac3core/src/typecheck/type_inferencer/test.rs @@ -110,6 +110,7 @@ impl TestEnvironment { id_to_name, identifier_mapping, virtual_checks: Vec::new(), + calls: HashMap::new(), } } From eba92ed8bd2963cf7b0f14eabc57aa63a58df8e2 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Fri, 30 Jul 2021 16:32:50 +0800 Subject: [PATCH 059/131] added method to get all instantiations --- nac3core/src/typecheck/typedef/mod.rs | 65 ++++++++++++++++++++++++++ nac3core/src/typecheck/typedef/test.rs | 48 +++++++++++++++++++ 2 files changed, 113 insertions(+) diff --git a/nac3core/src/typecheck/typedef/mod.rs b/nac3core/src/typecheck/typedef/mod.rs index 36809e59..e3dc8337 100644 --- a/nac3core/src/typecheck/typedef/mod.rs +++ b/nac3core/src/typecheck/typedef/mod.rs @@ -156,6 +156,71 @@ impl Unifier { self.set_a_to_b(rigid, b); } + pub fn get_instantiations(&mut self, ty: Type) -> Option> { + match &*self.get_ty(ty) { + TypeEnum::TVar { range, .. } => { + let range = range.borrow(); + if range.is_empty() { + None + } else { + Some( + range + .iter() + .map(|ty| self.get_instantiations(*ty).unwrap_or_else(|| vec![*ty])) + .flatten() + .collect_vec(), + ) + } + } + TypeEnum::TList { ty } => self + .get_instantiations(*ty) + .map(|ty| ty.iter().map(|&ty| self.add_ty(TypeEnum::TList { ty })).collect_vec()), + TypeEnum::TVirtual { ty } => self.get_instantiations(*ty).map(|ty| { + ty.iter().map(|&ty| self.add_ty(TypeEnum::TVirtual { ty })).collect_vec() + }), + TypeEnum::TTuple { ty } => { + let tuples = ty + .iter() + .map(|ty| self.get_instantiations(*ty).unwrap_or_else(|| vec![*ty])) + .multi_cartesian_product() + .collect_vec(); + if tuples.len() == 1 { + None + } else { + Some( + tuples.into_iter().map(|ty| self.add_ty(TypeEnum::TTuple { ty })).collect(), + ) + } + } + TypeEnum::TObj { params, .. } => { + let (keys, params): (Vec<&u32>, Vec<&Type>) = params.iter().unzip(); + let params = params + .into_iter() + .map(|ty| self.get_instantiations(*ty).unwrap_or_else(|| vec![*ty])) + .multi_cartesian_product() + .collect_vec(); + if params.len() <= 1 { + None + } else { + Some( + params + .into_iter() + .map(|params| { + self.subst( + ty, + &zip(keys.iter().cloned().cloned(), params.iter().cloned()) + .collect(), + ) + .unwrap_or(ty) + }) + .collect(), + ) + } + } + _ => None, + } + } + pub fn is_concrete(&mut self, a: Type, allowed_typevars: &[Type]) -> bool { use TypeEnum::*; match &*self.get_ty(a) { diff --git a/nac3core/src/typecheck/typedef/test.rs b/nac3core/src/typecheck/typedef/test.rs index cf0cc9c0..0f9128ee 100644 --- a/nac3core/src/typecheck/typedef/test.rs +++ b/nac3core/src/typecheck/typedef/test.rs @@ -2,6 +2,7 @@ use super::*; use itertools::Itertools; use std::collections::HashMap; use test_case::test_case; +use indoc::indoc; impl Unifier { /// Check whether two types are equal. @@ -473,3 +474,50 @@ fn test_rigid_var() { env.unifier.replace_rigid_var(a, int); env.unifier.unify(list_x, list_int).unwrap(); } + +#[test] +fn test_instantiation() { + let mut env = TestEnvironment::new(); + let int = env.parse("int", &HashMap::new()); + let boolean = env.parse("bool", &HashMap::new()); + let float = env.parse("float", &HashMap::new()); + let list_int = env.parse("List[int]", &HashMap::new()); + + let obj_map: HashMap<_, _> = + [(0usize, "int"), (1, "float"), (2, "bool")].iter().cloned().collect(); + + let v = env.unifier.get_fresh_var_with_range(&[int, boolean]).0; + let list_v = env.unifier.add_ty(TypeEnum::TList { ty: v }); + let v1 = env.unifier.get_fresh_var_with_range(&[list_v, int]).0; + let v2 = env.unifier.get_fresh_var_with_range(&[list_int, float]).0; + let t = env.unifier.get_fresh_rigid_var().0; + let tuple = env.unifier.add_ty(TypeEnum::TTuple { ty: vec![v, v1, v2] }); + let v3 = env.unifier.get_fresh_var_with_range(&[tuple, t]).0; + + let types = env.unifier.get_instantiations(v3).unwrap(); + let expected_types = indoc! {" + tuple[bool, int, float] + tuple[bool, int, list[int]] + tuple[bool, list[bool], float] + tuple[bool, list[bool], list[int]] + tuple[bool, list[int], float] + tuple[bool, list[int], list[int]] + tuple[int, int, float] + tuple[int, int, list[int]] + tuple[int, list[bool], float] + tuple[int, list[bool], list[int]] + tuple[int, list[int], float] + tuple[int, list[int], list[int]] + v5" + }.split('\n').collect_vec(); + let types = types + .iter() + .map(|ty| { + env.unifier.stringify(*ty, &mut |i| obj_map.get(&i).unwrap().to_string(), &mut |i| { + format!("v{}", i) + }) + }) + .sorted() + .collect_vec(); + assert_eq!(expected_types, types); +} From 197a72c658fe25617c3a495797c2c79e4ed458fd Mon Sep 17 00:00:00 2001 From: pca006132 Date: Fri, 30 Jul 2021 16:43:25 +0800 Subject: [PATCH 060/131] added comment --- nac3core/src/typecheck/typedef/test.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/nac3core/src/typecheck/typedef/test.rs b/nac3core/src/typecheck/typedef/test.rs index 0f9128ee..78cb77c5 100644 --- a/nac3core/src/typecheck/typedef/test.rs +++ b/nac3core/src/typecheck/typedef/test.rs @@ -493,6 +493,12 @@ fn test_instantiation() { let t = env.unifier.get_fresh_rigid_var().0; let tuple = env.unifier.add_ty(TypeEnum::TTuple { ty: vec![v, v1, v2] }); let v3 = env.unifier.get_fresh_var_with_range(&[tuple, t]).0; + // t = TypeVar('t') + // v = TypeVar('v', int, bool) + // v1 = TypeVar('v1', 'list[v]', int) + // v2 = TypeVar('v2', 'list[int]', float) + // v3 = TypeVar('v3', tuple[v, v1, v2], t) + // what values can v3 take? let types = env.unifier.get_instantiations(v3).unwrap(); let expected_types = indoc! {" From 7e0d55443a7b643eea02760e9c417a95590b05c7 Mon Sep 17 00:00:00 2001 From: CrescentonC Date: Mon, 2 Aug 2021 11:28:05 +0800 Subject: [PATCH 061/131] better structured primitive magic methods impl --- nac3core/src/typecheck/magic_methods.rs | 576 +++++++++--------- nac3core/src/typecheck/type_inferencer/mod.rs | 1 + 2 files changed, 279 insertions(+), 298 deletions(-) diff --git a/nac3core/src/typecheck/magic_methods.rs b/nac3core/src/typecheck/magic_methods.rs index 999a634e..8bf545c7 100644 --- a/nac3core/src/typecheck/magic_methods.rs +++ b/nac3core/src/typecheck/magic_methods.rs @@ -1,4 +1,5 @@ -use std::{collections::HashMap, rc::Rc}; +use std::collections::HashMap; +use std::rc::Rc; use rustpython_parser::ast::{Cmpop, Operator, Unaryop}; pub fn binop_name(op: &Operator) -> &'static str { @@ -58,347 +59,230 @@ pub fn comparison_name(op: &Cmpop) -> Option<&'static str> { } } -use crate::typecheck::{type_inferencer::*, typedef::{FunSignature, FuncArg, TypeEnum, Unifier}}; +use crate::typecheck::{type_inferencer::*, typedef::{FunSignature, FuncArg, TypeEnum, Unifier, Type}}; use rustpython_parser::ast; -pub fn set_primirives_magic_methods(store: &PrimitiveStore, unifier: &mut Unifier) { - // int32 -------- - if let Some(TypeEnum::TObj {fields, .. }) = Rc::get_mut(&mut unifier.get_ty(store.int32)) { + +/// Add, Sub, Mult, Pow +pub fn impl_basic_arithmetic(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: Type, ret_ty: Type) { + if let Some(TypeEnum::TObj {fields, .. }) = Rc::get_mut(&mut unifier.get_ty(ty)) { for op in &[ - ast::Operator::Add, - ast::Operator::Sub, - ast::Operator::Mult, - ast::Operator::Mod, - ast::Operator::Pow, - ast::Operator::LShift, - ast::Operator::RShift, - ast::Operator::BitOr, - ast::Operator::BitXor, - ast::Operator::BitAnd, - ast::Operator::FloorDiv + ast::Operator::Add, + ast::Operator::Sub, + ast::Operator::Mult, + ast::Operator::Pow, ] { fields.insert( - binop_name(op).to_string(), + binop_name(op).into(), unifier.add_ty(TypeEnum::TFunc(FunSignature { - ret: store.int32, + ret: ret_ty, vars: HashMap::new(), args: vec![FuncArg { - ty: store.int32, - is_optional: false, - name: "other".into() // the name does not matter here - }], - })) - ); - - fields.insert( - binop_assign_name(op).to_string(), - unifier.add_ty(TypeEnum::TFunc(FunSignature { - ret: store.none, - vars: HashMap::new(), - args: vec![FuncArg { - ty: store.int32, + ty: other_ty, is_optional: false, name: "other".into() }] })) ); - }; - // int div int gets float + + fields.insert( + binop_assign_name(op).into(), + unifier.add_ty(TypeEnum::TFunc(FunSignature { + ret: store.none, + vars: HashMap::new(), + args: vec![FuncArg { + ty: other_ty, + is_optional: false, + name: "other".into() + }] + })) + ); + } + } else { unreachable!() } +} + +/// LShift, RShift, BitOr, BitXor, BitAnd +pub fn impl_bitwise_arithmetic(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type) { + if let Some(TypeEnum::TObj {fields, ..}) = Rc::get_mut(&mut unifier.get_ty(ty)) { + for op in &[ + ast::Operator::LShift, + ast::Operator::RShift, + ast::Operator::BitOr, + ast::Operator::BitXor, + ast::Operator::BitAnd, + ] { + fields.insert( + binop_name(op).into(), + unifier.add_ty(TypeEnum::TFunc(FunSignature { + ret: ty, + vars: HashMap::new(), + args: vec![FuncArg { + ty, + is_optional: false, + name: "other".into() + }] + })) + ); + + fields.insert( + binop_assign_name(op).into(), + unifier.add_ty(TypeEnum::TFunc(FunSignature { + ret: store.none, + vars: HashMap::new(), + args: vec![FuncArg { + ty, + is_optional: false, + name: "other".into() + }] + })) + ); + } + } +} + +/// Div +pub fn impl_div(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: Type) { + if let Some(TypeEnum::TObj {fields, ..}) = Rc::get_mut(&mut unifier.get_ty(ty)) { fields.insert( - binop_assign_name(&ast::Operator::Div).into(), - unifier.add_ty(TypeEnum::TFunc(FunSignature { + binop_name(&ast::Operator::Div).into(), + unifier.add_ty(TypeEnum::TFunc(FunSignature{ ret: store.float, vars: HashMap::new(), args: vec![FuncArg { - ty: store.int32, + ty: other_ty, is_optional: false, name: "other".into() }] })) ); - for op in &[ - ast::Cmpop::Eq, - ast::Cmpop::NotEq, - ast::Cmpop::Lt, - ast::Cmpop::LtE, - ast::Cmpop::Gt, - ast::Cmpop::GtE, - ] { - fields.insert( - comparison_name(op).unwrap().to_string(), - unifier.add_ty(TypeEnum::TFunc(FunSignature { - ret: store.bool, - vars: HashMap::new(), - args: vec![FuncArg { - ty: store.int32, - is_optional: false, - name: "other".into() - }], - })) - ); - } - - for op in &[ - ast::Unaryop::UAdd, - ast::Unaryop::USub, - ast::Unaryop::Not, - ast::Unaryop::Invert, - ] { - fields.insert( - unaryop_name(op).into(), - unifier.add_ty(TypeEnum::TFunc(FunSignature { - ret: store.int32, - vars: HashMap::new(), - args: vec![] - })) - ); - } - } else { unreachable!() } - // int32 -------- - // int64 -------- - if let Some(TypeEnum::TObj {fields, .. }) = Rc::get_mut(&mut unifier.get_ty(store.int64)) { - for op in &[ - ast::Operator::Add, - ast::Operator::Sub, - ast::Operator::Mult, - ast::Operator::Mod, - ast::Operator::Pow, - ast::Operator::LShift, - ast::Operator::RShift, - ast::Operator::BitOr, - ast::Operator::BitXor, - ast::Operator::BitAnd, - ast::Operator::FloorDiv - ] { - fields.insert( - binop_name(op).to_string(), - unifier.add_ty(TypeEnum::TFunc(FunSignature { - ret: store.int64, - vars: HashMap::new(), - args: vec![FuncArg { - ty: store.int64, - is_optional: false, - name: "other".into() // the name does not matter here - }], - })) - ); - - fields.insert( - binop_assign_name(op).to_string(), - unifier.add_ty(TypeEnum::TFunc(FunSignature { - ret: store.none, - vars: HashMap::new(), - args: vec![FuncArg { - ty: store.int64, - is_optional: false, - name: "other".into() - }] - })) - ); - }; fields.insert( - binop_assign_name(&ast::Operator::Div).into(), - unifier.add_ty(TypeEnum::TFunc(FunSignature { - ret: store.float, + binop_assign_name(&ast::Operator::Div).into(), + unifier.add_ty(TypeEnum::TFunc(FunSignature{ + ret: store.none, vars: HashMap::new(), args: vec![FuncArg { - ty: store.int64, + ty: other_ty, + is_optional: false, + name: "other".into() + }] + })) + ); + } else { unreachable!() } +} + +/// FloorDiv +pub fn impl_floordiv(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: Type, ret_ty: Type) { + if let Some(TypeEnum::TObj {fields, ..}) = Rc::get_mut(&mut unifier.get_ty(ty)) { + fields.insert( + binop_name(&ast::Operator::FloorDiv).into(), + unifier.add_ty(TypeEnum::TFunc(FunSignature{ + ret: ret_ty, + vars: HashMap::new(), + args: vec![FuncArg { + ty: other_ty, is_optional: false, name: "other".into() }] })) ); - for op in &[ - ast::Cmpop::Eq, - ast::Cmpop::NotEq, - ast::Cmpop::Lt, - ast::Cmpop::LtE, - ast::Cmpop::Gt, - ast::Cmpop::GtE, - ] { - fields.insert( - comparison_name(op).unwrap().to_string(), - unifier.add_ty(TypeEnum::TFunc(FunSignature { - ret: store.bool, - vars: HashMap::new(), - args: vec![FuncArg { - ty: store.int64, - is_optional: false, - name: "other".into() - }], - })) - ); - } + fields.insert( + binop_assign_name(&ast::Operator::FloorDiv).into(), + unifier.add_ty(TypeEnum::TFunc(FunSignature{ + ret: store.none, + vars: HashMap::new(), + args: vec![FuncArg { + ty: other_ty, + is_optional: false, + name: "other".into() + }] + })) + ); + } else { unreachable!() } +} +/// Mod +pub fn impl_mod(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: Type, ret_ty: Type) { + if let Some(TypeEnum::TObj {fields, .. }) = Rc::get_mut(&mut unifier.get_ty(ty)) { + fields.insert( + binop_name(&ast::Operator::Mod).into(), + unifier.add_ty(TypeEnum::TFunc(FunSignature { + ret: ret_ty, + vars: HashMap::new(), + args: vec![FuncArg { + ty: other_ty, + is_optional: false, + name: "other".into() + }] + })) + ); + + fields.insert( + binop_assign_name(&ast::Operator::Mod).into(), + unifier.add_ty(TypeEnum::TFunc(FunSignature { + ret: store.none, + vars: HashMap::new(), + args: vec![FuncArg { + ty: other_ty, + is_optional: false, + name: "other".into() + }] + })) + ); + } else { unreachable!() } +} + +/// UAdd, USub +pub fn impl_unary_op(unifier: &mut Unifier, _store: &PrimitiveStore, ty: Type) { + if let Some(TypeEnum::TObj {fields, ..}) = Rc::get_mut(&mut unifier.get_ty(ty)) { for op in &[ ast::Unaryop::UAdd, - ast::Unaryop::USub, - ast::Unaryop::Not, - ast::Unaryop::Invert, + ast::Unaryop::USub ] { fields.insert( unaryop_name(op).into(), unifier.add_ty(TypeEnum::TFunc(FunSignature { - ret: store.int64, + ret: ty, vars: HashMap::new(), args: vec![] })) ); } } else { unreachable!() } - // int64 -------- - // float -------- - if let Some(TypeEnum::TObj {fields, .. }) = Rc::get_mut(&mut unifier.get_ty(store.float)) { - for op in &[ - ast::Operator::Add, - ast::Operator::Sub, - ast::Operator::Mult, - ast::Operator::Div, - ast::Operator::Mod, - ast::Operator::Pow, - ast::Operator::FloorDiv, - ] { - fields.insert( - binop_name(op).to_string(), - unifier.add_ty(TypeEnum::TFunc(FunSignature { - ret: store.float, - vars: HashMap::new(), - args: vec![FuncArg { - ty: store.float, - is_optional: false, - name: "other".into() // the name does not matter here - }], - })) - ); +} - fields.insert( - binop_assign_name(op).to_string(), - unifier.add_ty(TypeEnum::TFunc(FunSignature { - ret: store.none, - vars: HashMap::new(), - args: vec![FuncArg { - ty: store.float, - is_optional: false, - name: "other".into() - }] - })) - ); - }; +/// Invert +pub fn impl_invert(unifier: &mut Unifier, _store: &PrimitiveStore, ty: Type) { + if let Some(TypeEnum::TObj {fields, .. }) = Rc::get_mut(&mut unifier.get_ty(ty)) { + fields.insert( + unaryop_name(&ast::Unaryop::Invert).into(), + unifier.add_ty(TypeEnum::TFunc(FunSignature { + ret: ty, + vars: HashMap::new(), + args: vec![] + })) + ); + } +} - for op in &[ - ast::Cmpop::Eq, - ast::Cmpop::NotEq, - ast::Cmpop::Lt, - ast::Cmpop::LtE, - ast::Cmpop::Gt, - ast::Cmpop::GtE, - ] { - fields.insert( - comparison_name(op).unwrap().to_string(), - unifier.add_ty(TypeEnum::TFunc(FunSignature { - ret: store.bool, - vars: HashMap::new(), - args: vec![FuncArg { - ty: store.float, - is_optional: false, - name: "other".into() - }], - })) - ); - } - - for op in &[ - ast::Unaryop::UAdd, - ast::Unaryop::USub, - ast::Unaryop::Not, - ] { - fields.insert( - unaryop_name(op).into(), - unifier.add_ty(TypeEnum::TFunc(FunSignature { - ret: store.int64, - vars: HashMap::new(), - args: vec![] - })) - ); - } +/// Not +pub fn impl_not(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type) { + if let Some(TypeEnum::TObj {fields, ..}) = Rc::get_mut(&mut unifier.get_ty(ty)) { + fields.insert( + unaryop_name(&ast::Unaryop::Not).into(), + unifier.add_ty(TypeEnum::TFunc(FunSignature { + ret: store.bool, + vars: HashMap::new(), + args: vec![] + })) + ); } else { unreachable!() } - // float -------- - // bool --------- - if let Some(TypeEnum::TObj {fields, .. }) = Rc::get_mut(&mut unifier.get_ty(store.bool)) { - for op in &[ - ast::Operator::Add, - ast::Operator::Sub, - ast::Operator::Mult, - ast::Operator::Mod, - ast::Operator::Pow, - ast::Operator::LShift, - ast::Operator::RShift, - ast::Operator::FloorDiv - ] { - fields.insert( - binop_name(op).into(), - unifier.add_ty(TypeEnum::TFunc(FunSignature { - ret: store.int32, - vars: HashMap::new(), - args: vec![FuncArg { - ty: store.bool, - is_optional: false, - name: "other".into() - }] - })) - ); +} - fields.insert( - binop_name(op).into(), - unifier.add_ty(TypeEnum::TFunc(FunSignature { - ret: store.int32, - vars: HashMap::new(), - args: vec![FuncArg { - ty: store.int32, - is_optional: false, - name: "other".into() - }] - })) - ); - - // binop_assignment will change type? - /* fields.insert( - binop_assignment_name(op).into(), - unifier.add_ty(TypeEnum::TFunc(FunSignature { - ret: store.none, - vars: HashMap::new(), - args: vec![FuncArg { - ty: store.bool, - is_optional: false, - name: "other".into() - }] - })) - ); */ - }; - - for op in &[ - ast::Operator::BitOr, - ast::Operator::BitXor, - ast::Operator::BitAnd - ] { - fields.insert( - binop_name(op).into(), - unifier.add_ty(TypeEnum::TFunc(FunSignature { - ret: store.bool, - vars: HashMap::new(), - args: vec![FuncArg { - ty: store.int32, - is_optional: false, - name: "other".into() - }] - })) - ); - }; - - for op in &[ - ast::Cmpop::Eq, - ast::Cmpop::NotEq, +/// Lt, LtE, Gt, GtE +pub fn impl_comparison(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: Type) { + if let Some(TypeEnum::TObj {fields, ..}) = Rc::get_mut(&mut unifier.get_ty(ty)) { + for op in &[ ast::Cmpop::Lt, ast::Cmpop::LtE, ast::Cmpop::Gt, @@ -407,16 +291,112 @@ pub fn set_primirives_magic_methods(store: &PrimitiveStore, unifier: &mut Unifie fields.insert( comparison_name(op).unwrap().into(), unifier.add_ty(TypeEnum::TFunc(FunSignature { - ret: store.int32, + ret: store.bool, vars: HashMap::new(), args: vec![FuncArg { - ty: store.bool, + ty: other_ty, is_optional: false, name: "other".into() }] })) ); } - } - // bool -------- + } else { unreachable!() } +} + +/// Eq, NotEq +pub fn impl_eq(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type) { + if let Some(TypeEnum::TObj {fields, ..}) = Rc::get_mut(&mut unifier.get_ty(ty)) { + for op in &[ + ast::Cmpop::Eq, + ast::Cmpop::NotEq, + ] { + fields.insert( + comparison_name(op).unwrap().into(), + unifier.add_ty(TypeEnum::TFunc(FunSignature { + ret: store.bool, + vars: HashMap::new(), + args: vec![FuncArg { + ty, + is_optional: false, + name: "other".into() + }] + })) + ); + } + } else { unreachable!() } +} + +pub fn set_primirives_magic_methods(store: &PrimitiveStore, unifier: &mut Unifier) { + let PrimitiveStore { + int32: int32_t, + int64: int64_t, + float: float_t, + bool: bool_t, + none: _none_t + } = *store; + // int32 -------- + impl_basic_arithmetic(unifier, store, int32_t, int32_t, int32_t); + impl_basic_arithmetic(unifier, store, int32_t, int64_t, int64_t); + impl_basic_arithmetic(unifier, store, int32_t, float_t, float_t); + impl_bitwise_arithmetic(unifier, store, int32_t); + impl_div(unifier, store, int32_t, int32_t); + impl_div(unifier, store, int32_t, int64_t); + impl_div(unifier, store, int32_t, float_t); + impl_floordiv(unifier, store, int32_t, int32_t, int32_t); + impl_floordiv(unifier, store, int32_t, int64_t, int32_t); + impl_floordiv(unifier, store, int32_t, float_t, float_t); + impl_mod(unifier, store, int32_t, int32_t, int32_t); + impl_mod(unifier, store, int32_t, int64_t, int32_t); + impl_mod(unifier, store, int32_t, float_t, float_t); + impl_unary_op(unifier, store, int32_t); + impl_invert(unifier, store, int32_t); + impl_not(unifier, store, int32_t); + impl_comparison(unifier, store, int32_t, int32_t); + impl_comparison(unifier, store, int32_t, int64_t); + impl_comparison(unifier, store, int32_t, float_t); + impl_eq(unifier, store, int32_t); + // int64 -------- + impl_basic_arithmetic(unifier, store, int64_t, int32_t, int64_t); + impl_basic_arithmetic(unifier, store, int64_t, int64_t, int64_t); + impl_basic_arithmetic(unifier, store, int64_t, float_t, float_t); + impl_bitwise_arithmetic(unifier, store, int64_t); + impl_div(unifier, store, int64_t, int32_t); + impl_div(unifier, store, int64_t, int64_t); + impl_div(unifier, store, int64_t, float_t); + impl_floordiv(unifier, store, int64_t, int32_t, int64_t); + impl_floordiv(unifier, store, int64_t, int64_t, int64_t); + impl_floordiv(unifier, store, int64_t, float_t, float_t); + impl_mod(unifier, store, int64_t, int32_t, int64_t); + impl_mod(unifier, store, int64_t, int64_t, int64_t); + impl_mod(unifier, store, int64_t, float_t, float_t); + impl_unary_op(unifier, store, int64_t); + impl_invert(unifier, store, int64_t); + impl_not(unifier, store, int64_t); + impl_comparison(unifier, store, int64_t, int32_t); + impl_comparison(unifier, store, int64_t, int64_t); + impl_comparison(unifier, store, int64_t, float_t); + impl_eq(unifier, store, int64_t); + // float -------- + impl_basic_arithmetic(unifier, store, float_t, int32_t, float_t); + impl_basic_arithmetic(unifier, store, float_t, int64_t, float_t); + impl_basic_arithmetic(unifier, store, float_t, float_t, float_t); + impl_div(unifier, store, float_t, int32_t); + impl_div(unifier, store, float_t, int64_t); + impl_div(unifier, store, float_t, float_t); + impl_floordiv(unifier, store, float_t, int32_t, float_t); + impl_floordiv(unifier, store, float_t, int64_t, float_t); + impl_floordiv(unifier, store, float_t, float_t, float_t); + impl_mod(unifier, store, float_t, int32_t, float_t); + impl_mod(unifier, store, float_t, int64_t, float_t); + impl_mod(unifier, store, float_t, float_t, float_t); + impl_unary_op(unifier, store, float_t); + impl_not(unifier, store, float_t); + impl_comparison(unifier, store, float_t, int32_t); + impl_comparison(unifier, store, float_t, int64_t); + impl_comparison(unifier, store, float_t, float_t); + impl_eq(unifier, store, float_t); + // bool --------- + impl_not(unifier, store, bool_t); + impl_eq(unifier, store, bool_t); } \ No newline at end of file diff --git a/nac3core/src/typecheck/type_inferencer/mod.rs b/nac3core/src/typecheck/type_inferencer/mod.rs index f002eff9..8b9c098e 100644 --- a/nac3core/src/typecheck/type_inferencer/mod.rs +++ b/nac3core/src/typecheck/type_inferencer/mod.rs @@ -32,6 +32,7 @@ impl From for CodeLocation { } } +#[derive(Clone, Copy)] pub struct PrimitiveStore { pub int32: Type, pub int64: Type, From a7e3eeea0dd73a63a0d74a3d23afaa7f77f613b2 Mon Sep 17 00:00:00 2001 From: CrescentonC Date: Mon, 2 Aug 2021 17:36:37 +0800 Subject: [PATCH 062/131] add primitive magic method support; change from TypeEnum::TObj { fields: Mapping, ..} to TypeEnum::TObj {fields: RefCell>, .. } for interior mutability --- nac3core/src/typecheck/magic_methods.rs | 123 ++++++++------- .../src/typecheck/type_inferencer/test.rs | 146 ++++++++++++++++-- nac3core/src/typecheck/typedef/mod.rs | 12 +- nac3core/src/typecheck/typedef/test.rs | 10 +- 4 files changed, 208 insertions(+), 83 deletions(-) diff --git a/nac3core/src/typecheck/magic_methods.rs b/nac3core/src/typecheck/magic_methods.rs index 8bf545c7..eacf8180 100644 --- a/nac3core/src/typecheck/magic_methods.rs +++ b/nac3core/src/typecheck/magic_methods.rs @@ -1,5 +1,5 @@ +use std::borrow::Borrow; use std::collections::HashMap; -use std::rc::Rc; use rustpython_parser::ast::{Cmpop, Operator, Unaryop}; pub fn binop_name(op: &Operator) -> &'static str { @@ -64,14 +64,14 @@ use rustpython_parser::ast; /// Add, Sub, Mult, Pow pub fn impl_basic_arithmetic(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: Type, ret_ty: Type) { - if let Some(TypeEnum::TObj {fields, .. }) = Rc::get_mut(&mut unifier.get_ty(ty)) { + if let TypeEnum::TObj {fields, .. } = unifier.get_ty(ty).borrow() { for op in &[ ast::Operator::Add, ast::Operator::Sub, ast::Operator::Mult, ast::Operator::Pow, ] { - fields.insert( + fields.borrow_mut().insert( binop_name(op).into(), unifier.add_ty(TypeEnum::TFunc(FunSignature { ret: ret_ty, @@ -84,7 +84,7 @@ pub fn impl_basic_arithmetic(unifier: &mut Unifier, store: &PrimitiveStore, ty: })) ); - fields.insert( + fields.borrow_mut().insert( binop_assign_name(op).into(), unifier.add_ty(TypeEnum::TFunc(FunSignature { ret: store.none, @@ -102,7 +102,7 @@ pub fn impl_basic_arithmetic(unifier: &mut Unifier, store: &PrimitiveStore, ty: /// LShift, RShift, BitOr, BitXor, BitAnd pub fn impl_bitwise_arithmetic(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type) { - if let Some(TypeEnum::TObj {fields, ..}) = Rc::get_mut(&mut unifier.get_ty(ty)) { + if let TypeEnum::TObj {fields, .. } = unifier.get_ty(ty).borrow() { for op in &[ ast::Operator::LShift, ast::Operator::RShift, @@ -110,7 +110,7 @@ pub fn impl_bitwise_arithmetic(unifier: &mut Unifier, store: &PrimitiveStore, ty ast::Operator::BitXor, ast::Operator::BitAnd, ] { - fields.insert( + fields.borrow_mut().insert( binop_name(op).into(), unifier.add_ty(TypeEnum::TFunc(FunSignature { ret: ty, @@ -123,7 +123,7 @@ pub fn impl_bitwise_arithmetic(unifier: &mut Unifier, store: &PrimitiveStore, ty })) ); - fields.insert( + fields.borrow_mut().insert( binop_assign_name(op).into(), unifier.add_ty(TypeEnum::TFunc(FunSignature { ret: store.none, @@ -141,8 +141,8 @@ pub fn impl_bitwise_arithmetic(unifier: &mut Unifier, store: &PrimitiveStore, ty /// Div pub fn impl_div(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: Type) { - if let Some(TypeEnum::TObj {fields, ..}) = Rc::get_mut(&mut unifier.get_ty(ty)) { - fields.insert( + if let TypeEnum::TObj {fields, .. } = unifier.get_ty(ty).borrow() { + fields.borrow_mut().insert( binop_name(&ast::Operator::Div).into(), unifier.add_ty(TypeEnum::TFunc(FunSignature{ ret: store.float, @@ -155,7 +155,7 @@ pub fn impl_div(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_t })) ); - fields.insert( + fields.borrow_mut().insert( binop_assign_name(&ast::Operator::Div).into(), unifier.add_ty(TypeEnum::TFunc(FunSignature{ ret: store.none, @@ -172,8 +172,8 @@ pub fn impl_div(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_t /// FloorDiv pub fn impl_floordiv(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: Type, ret_ty: Type) { - if let Some(TypeEnum::TObj {fields, ..}) = Rc::get_mut(&mut unifier.get_ty(ty)) { - fields.insert( + if let TypeEnum::TObj {fields, .. } = unifier.get_ty(ty).borrow() { + fields.borrow_mut().insert( binop_name(&ast::Operator::FloorDiv).into(), unifier.add_ty(TypeEnum::TFunc(FunSignature{ ret: ret_ty, @@ -186,7 +186,7 @@ pub fn impl_floordiv(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, ot })) ); - fields.insert( + fields.borrow_mut().insert( binop_assign_name(&ast::Operator::FloorDiv).into(), unifier.add_ty(TypeEnum::TFunc(FunSignature{ ret: store.none, @@ -203,8 +203,8 @@ pub fn impl_floordiv(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, ot /// Mod pub fn impl_mod(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: Type, ret_ty: Type) { - if let Some(TypeEnum::TObj {fields, .. }) = Rc::get_mut(&mut unifier.get_ty(ty)) { - fields.insert( + if let TypeEnum::TObj {fields, .. } = unifier.get_ty(ty).borrow() { + fields.borrow_mut().insert( binop_name(&ast::Operator::Mod).into(), unifier.add_ty(TypeEnum::TFunc(FunSignature { ret: ret_ty, @@ -217,7 +217,7 @@ pub fn impl_mod(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_t })) ); - fields.insert( + fields.borrow_mut().insert( binop_assign_name(&ast::Operator::Mod).into(), unifier.add_ty(TypeEnum::TFunc(FunSignature { ret: store.none, @@ -234,12 +234,12 @@ pub fn impl_mod(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_t /// UAdd, USub pub fn impl_unary_op(unifier: &mut Unifier, _store: &PrimitiveStore, ty: Type) { - if let Some(TypeEnum::TObj {fields, ..}) = Rc::get_mut(&mut unifier.get_ty(ty)) { + if let TypeEnum::TObj {fields, .. } = unifier.get_ty(ty).borrow() { for op in &[ ast::Unaryop::UAdd, ast::Unaryop::USub ] { - fields.insert( + fields.borrow_mut().insert( unaryop_name(op).into(), unifier.add_ty(TypeEnum::TFunc(FunSignature { ret: ty, @@ -253,8 +253,8 @@ pub fn impl_unary_op(unifier: &mut Unifier, _store: &PrimitiveStore, ty: Type) { /// Invert pub fn impl_invert(unifier: &mut Unifier, _store: &PrimitiveStore, ty: Type) { - if let Some(TypeEnum::TObj {fields, .. }) = Rc::get_mut(&mut unifier.get_ty(ty)) { - fields.insert( + if let TypeEnum::TObj {fields, .. } = unifier.get_ty(ty).borrow() { + fields.borrow_mut().insert( unaryop_name(&ast::Unaryop::Invert).into(), unifier.add_ty(TypeEnum::TFunc(FunSignature { ret: ty, @@ -267,8 +267,8 @@ pub fn impl_invert(unifier: &mut Unifier, _store: &PrimitiveStore, ty: Type) { /// Not pub fn impl_not(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type) { - if let Some(TypeEnum::TObj {fields, ..}) = Rc::get_mut(&mut unifier.get_ty(ty)) { - fields.insert( + if let TypeEnum::TObj {fields, .. } = unifier.get_ty(ty).borrow() { + fields.borrow_mut().insert( unaryop_name(&ast::Unaryop::Not).into(), unifier.add_ty(TypeEnum::TFunc(FunSignature { ret: store.bool, @@ -281,14 +281,14 @@ pub fn impl_not(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type) { /// Lt, LtE, Gt, GtE pub fn impl_comparison(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: Type) { - if let Some(TypeEnum::TObj {fields, ..}) = Rc::get_mut(&mut unifier.get_ty(ty)) { + if let TypeEnum::TObj {fields, .. } = unifier.get_ty(ty).borrow() { for op in &[ ast::Cmpop::Lt, ast::Cmpop::LtE, ast::Cmpop::Gt, ast::Cmpop::GtE, ] { - fields.insert( + fields.borrow_mut().insert( comparison_name(op).unwrap().into(), unifier.add_ty(TypeEnum::TFunc(FunSignature { ret: store.bool, @@ -306,12 +306,12 @@ pub fn impl_comparison(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, /// Eq, NotEq pub fn impl_eq(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type) { - if let Some(TypeEnum::TObj {fields, ..}) = Rc::get_mut(&mut unifier.get_ty(ty)) { + if let TypeEnum::TObj {fields, .. } = unifier.get_ty(ty).borrow() { for op in &[ ast::Cmpop::Eq, ast::Cmpop::NotEq, ] { - fields.insert( + fields.borrow_mut().insert( comparison_name(op).unwrap().into(), unifier.add_ty(TypeEnum::TFunc(FunSignature { ret: store.bool, @@ -335,68 +335,71 @@ pub fn set_primirives_magic_methods(store: &PrimitiveStore, unifier: &mut Unifie bool: bool_t, none: _none_t } = *store; - // int32 -------- + /* int32 ======== */ impl_basic_arithmetic(unifier, store, int32_t, int32_t, int32_t); - impl_basic_arithmetic(unifier, store, int32_t, int64_t, int64_t); - impl_basic_arithmetic(unifier, store, int32_t, float_t, float_t); + // impl_basic_arithmetic(unifier, store, int32_t, int64_t, int64_t); + // impl_basic_arithmetic(unifier, store, int32_t, float_t, float_t); impl_bitwise_arithmetic(unifier, store, int32_t); + // impl_div(unifier, store, int32_t, int32_t); + // impl_div(unifier, store, int32_t, int64_t); impl_div(unifier, store, int32_t, int32_t); - impl_div(unifier, store, int32_t, int64_t); - impl_div(unifier, store, int32_t, float_t); impl_floordiv(unifier, store, int32_t, int32_t, int32_t); - impl_floordiv(unifier, store, int32_t, int64_t, int32_t); - impl_floordiv(unifier, store, int32_t, float_t, float_t); + // impl_floordiv(unifier, store, int32_t, int64_t, int32_t); + // impl_floordiv(unifier, store, int32_t, float_t, float_t); impl_mod(unifier, store, int32_t, int32_t, int32_t); - impl_mod(unifier, store, int32_t, int64_t, int32_t); - impl_mod(unifier, store, int32_t, float_t, float_t); + // impl_mod(unifier, store, int32_t, int64_t, int32_t); + // impl_mod(unifier, store, int32_t, float_t, float_t); impl_unary_op(unifier, store, int32_t); impl_invert(unifier, store, int32_t); impl_not(unifier, store, int32_t); impl_comparison(unifier, store, int32_t, int32_t); - impl_comparison(unifier, store, int32_t, int64_t); - impl_comparison(unifier, store, int32_t, float_t); + // impl_comparison(unifier, store, int32_t, int64_t); + // impl_comparison(unifier, store, int32_t, float_t); impl_eq(unifier, store, int32_t); - // int64 -------- - impl_basic_arithmetic(unifier, store, int64_t, int32_t, int64_t); + + /* int64 ======== */ + // impl_basic_arithmetic(unifier, store, int64_t, int32_t, int64_t); impl_basic_arithmetic(unifier, store, int64_t, int64_t, int64_t); - impl_basic_arithmetic(unifier, store, int64_t, float_t, float_t); + // impl_basic_arithmetic(unifier, store, int64_t, float_t, float_t); impl_bitwise_arithmetic(unifier, store, int64_t); - impl_div(unifier, store, int64_t, int32_t); + // impl_div(unifier, store, int64_t, int32_t); impl_div(unifier, store, int64_t, int64_t); - impl_div(unifier, store, int64_t, float_t); - impl_floordiv(unifier, store, int64_t, int32_t, int64_t); + // impl_div(unifier, store, int64_t, float_t); + // impl_floordiv(unifier, store, int64_t, int32_t, int64_t); impl_floordiv(unifier, store, int64_t, int64_t, int64_t); - impl_floordiv(unifier, store, int64_t, float_t, float_t); - impl_mod(unifier, store, int64_t, int32_t, int64_t); + // impl_floordiv(unifier, store, int64_t, float_t, float_t); + // impl_mod(unifier, store, int64_t, int32_t, int64_t); impl_mod(unifier, store, int64_t, int64_t, int64_t); - impl_mod(unifier, store, int64_t, float_t, float_t); + // impl_mod(unifier, store, int64_t, float_t, float_t); impl_unary_op(unifier, store, int64_t); impl_invert(unifier, store, int64_t); impl_not(unifier, store, int64_t); - impl_comparison(unifier, store, int64_t, int32_t); + // impl_comparison(unifier, store, int64_t, int32_t); impl_comparison(unifier, store, int64_t, int64_t); - impl_comparison(unifier, store, int64_t, float_t); + // impl_comparison(unifier, store, int64_t, float_t); impl_eq(unifier, store, int64_t); - // float -------- - impl_basic_arithmetic(unifier, store, float_t, int32_t, float_t); - impl_basic_arithmetic(unifier, store, float_t, int64_t, float_t); + + /* float ======== */ + // impl_basic_arithmetic(unifier, store, float_t, int32_t, float_t); + // impl_basic_arithmetic(unifier, store, float_t, int64_t, float_t); impl_basic_arithmetic(unifier, store, float_t, float_t, float_t); - impl_div(unifier, store, float_t, int32_t); - impl_div(unifier, store, float_t, int64_t); + // impl_div(unifier, store, float_t, int32_t); + // impl_div(unifier, store, float_t, int64_t); impl_div(unifier, store, float_t, float_t); - impl_floordiv(unifier, store, float_t, int32_t, float_t); - impl_floordiv(unifier, store, float_t, int64_t, float_t); + // impl_floordiv(unifier, store, float_t, int32_t, float_t); + // impl_floordiv(unifier, store, float_t, int64_t, float_t); impl_floordiv(unifier, store, float_t, float_t, float_t); - impl_mod(unifier, store, float_t, int32_t, float_t); - impl_mod(unifier, store, float_t, int64_t, float_t); + // impl_mod(unifier, store, float_t, int32_t, float_t); + // impl_mod(unifier, store, float_t, int64_t, float_t); impl_mod(unifier, store, float_t, float_t, float_t); impl_unary_op(unifier, store, float_t); impl_not(unifier, store, float_t); - impl_comparison(unifier, store, float_t, int32_t); - impl_comparison(unifier, store, float_t, int64_t); + // impl_comparison(unifier, store, float_t, int32_t); + // impl_comparison(unifier, store, float_t, int64_t); impl_comparison(unifier, store, float_t, float_t); impl_eq(unifier, store, float_t); - // bool --------- + + /* bool ======== */ impl_not(unifier, store, bool_t); impl_eq(unifier, store, bool_t); } \ No newline at end of file diff --git a/nac3core/src/typecheck/type_inferencer/test.rs b/nac3core/src/typecheck/type_inferencer/test.rs index eec99f2a..16200dc9 100644 --- a/nac3core/src/typecheck/type_inferencer/test.rs +++ b/nac3core/src/typecheck/type_inferencer/test.rs @@ -51,31 +51,32 @@ impl TestEnvironment { let int32 = unifier.add_ty(TypeEnum::TObj { obj_id: 0, - fields: HashMap::new(), + fields: HashMap::new().into(), params: HashMap::new(), }); let int64 = unifier.add_ty(TypeEnum::TObj { obj_id: 1, - fields: HashMap::new(), + fields: HashMap::new().into(), params: HashMap::new(), }); let float = unifier.add_ty(TypeEnum::TObj { obj_id: 2, - fields: HashMap::new(), + fields: HashMap::new().into(), params: HashMap::new(), }); let bool = unifier.add_ty(TypeEnum::TObj { obj_id: 3, - fields: HashMap::new(), + fields: HashMap::new().into(), params: HashMap::new(), }); let none = unifier.add_ty(TypeEnum::TObj { obj_id: 4, - fields: HashMap::new(), + fields: HashMap::new().into(), params: HashMap::new(), }); // identifier_mapping.insert("None".into(), none); let primitives = PrimitiveStore { int32, int64, float, bool, none }; + set_primirives_magic_methods(&primitives, &mut unifier); let id_to_name = [ @@ -119,27 +120,27 @@ impl TestEnvironment { let mut identifier_mapping = HashMap::new(); let int32 = unifier.add_ty(TypeEnum::TObj { obj_id: 0, - fields: HashMap::new(), + fields: HashMap::new().into(), params: HashMap::new(), }); let int64 = unifier.add_ty(TypeEnum::TObj { obj_id: 1, - fields: HashMap::new(), + fields: HashMap::new().into(), params: HashMap::new(), }); let float = unifier.add_ty(TypeEnum::TObj { obj_id: 2, - fields: HashMap::new(), + fields: HashMap::new().into(), params: HashMap::new(), }); let bool = unifier.add_ty(TypeEnum::TObj { obj_id: 3, - fields: HashMap::new(), + fields: HashMap::new().into(), params: HashMap::new(), }); let none = unifier.add_ty(TypeEnum::TObj { obj_id: 4, - fields: HashMap::new(), + fields: HashMap::new().into(), params: HashMap::new(), }); identifier_mapping.insert("None".into(), none); @@ -150,7 +151,7 @@ impl TestEnvironment { let foo_ty = unifier.add_ty(TypeEnum::TObj { obj_id: 5, - fields: [("a".into(), v0)].iter().cloned().collect(), + fields: [("a".into(), v0)].iter().cloned().collect::>().into(), params: [(id, v0)].iter().cloned().collect(), }); @@ -170,7 +171,7 @@ impl TestEnvironment { })); let bar = unifier.add_ty(TypeEnum::TObj { obj_id: 6, - fields: [("a".into(), int32), ("b".into(), fun)].iter().cloned().collect(), + fields: [("a".into(), int32), ("b".into(), fun)].iter().cloned().collect::>().into(), params: Default::default(), }); identifier_mapping.insert( @@ -184,7 +185,7 @@ impl TestEnvironment { let bar2 = unifier.add_ty(TypeEnum::TObj { obj_id: 7, - fields: [("a".into(), bool), ("b".into(), fun)].iter().cloned().collect(), + fields: [("a".into(), bool), ("b".into(), fun)].iter().cloned().collect::>().into(), params: Default::default(), }); identifier_mapping.insert( @@ -350,3 +351,122 @@ fn test_basic(source: &str, mapping: HashMap<&str, &str>, virtuals: &[(&str, &st assert_eq!(&b, y); } } + +#[test_case(indoc! {" + a = 2 + b = 2 + c = a + b + d = a - b + e = a * b + f = a / b + g = a // b + h = a % b + "}, + [("a", "int32"), + ("b", "int32"), + ("c", "int32"), + ("d", "int32"), + ("e", "int32"), + ("f", "float"), + ("g", "int32"), + ("h", "int32")].iter().cloned().collect() + ; "int32")] +#[test_case( + indoc! {" + a = 2.4 + b = 3.6 + c = a + b + d = a - b + e = a * b + f = a / b + g = a // b + h = a % b + "}, + [("a", "float"), + ("b", "float"), + ("c", "float"), + ("d", "float"), + ("e", "float"), + ("f", "float"), + ("g", "float"), + ("h", "float")].iter().cloned().collect() + ; "float" +)] +#[test_case( + indoc! {" + a = int64(12312312312) + b = int64(24242424424) + c = a + b + d = a - b + e = a * b + f = a / b + g = a // b + h = a % b + i = a == b + j = a > b + k = a < b + l = a != b + "}, + [("a", "int64"), + ("b", "int64"), + ("c", "int64"), + ("d", "int64"), + ("e", "int64"), + ("f", "float"), + ("g", "int64"), + ("h", "int64"), + ("i", "bool"), + ("j", "bool"), + ("k", "bool"), + ("l", "bool")].iter().cloned().collect() + ; "int64" +)] +#[test_case( + indoc! {" + a = True + b = False + c = a == b + d = not a + e = a != b + "}, + [("a", "bool"), + ("b", "bool"), + ("c", "bool"), + ("d", "bool"), + ("e", "bool")].iter().cloned().collect() + ; "boolean" +)] +fn test_primitive_magic_methods(source: &str, mapping: HashMap<&str, &str>) { + println!("source:\n{}", source); + let mut env = TestEnvironment::basic_test_env(); + let id_to_name = std::mem::take(&mut env.id_to_name); + let mut defined_identifiers: Vec<_> = env.identifier_mapping.keys().cloned().collect(); + defined_identifiers.push("virtual".to_string()); + let mut inferencer = env.get_inferencer(); + let statements = parse_program(source).unwrap(); + let statements = statements + .into_iter() + .map(|v| inferencer.fold_stmt(v)) + .collect::, _>>() + .unwrap(); + + inferencer.check_block(&statements, &mut defined_identifiers).unwrap(); + + for (k, v) in inferencer.variable_mapping.iter() { + let name = inferencer.unifier.stringify( + *v, + &mut |v| id_to_name.get(&v).unwrap().clone(), + &mut |v| format!("v{}", v), + ); + println!("{}: {}", k, name); + } + for (k, v) in mapping.iter() { + let ty = inferencer.variable_mapping.get(*k).unwrap(); + let name = inferencer.unifier.stringify( + *ty, + &mut |v| id_to_name.get(&v).unwrap().clone(), + &mut |v| format!("v{}", v), + ); + assert_eq!(format!("{}: {}", k, v), format!("{}: {}", k, name)); + } +} \ No newline at end of file diff --git a/nac3core/src/typecheck/typedef/mod.rs b/nac3core/src/typecheck/typedef/mod.rs index e3dc8337..dbe739d1 100644 --- a/nac3core/src/typecheck/typedef/mod.rs +++ b/nac3core/src/typecheck/typedef/mod.rs @@ -64,7 +64,7 @@ pub enum TypeEnum { }, TObj { obj_id: usize, - fields: Mapping, + fields: RefCell>, params: VarMap, }, TVirtual { @@ -373,7 +373,8 @@ impl Unifier { (TVar { meta: Record(map), id, range, .. }, TObj { fields, .. }) => { self.occur_check(a, b)?; for (k, v) in map.borrow().iter() { - let ty = fields.get(k).ok_or_else(|| format!("No such attribute {}", k))?; + let temp = fields.borrow(); + let ty = temp.get(k).ok_or_else(|| format!("No such attribute {}", k))?; self.unify(*ty, *v)?; } let x = self.check_var_compatibility(*id, b, &range.borrow())?.unwrap_or(b); @@ -385,7 +386,8 @@ impl Unifier { let ty = self.get_ty(*ty); if let TObj { fields, .. } = ty.as_ref() { for (k, v) in map.borrow().iter() { - let ty = fields.get(k).ok_or_else(|| format!("No such attribute {}", k))?; + let temp = fields.borrow(); + let ty = temp.get(k).ok_or_else(|| format!("No such attribute {}", k))?; if !matches!(self.get_ty(*ty).as_ref(), TFunc { .. }) { return Err(format!("Cannot access field {} for virtual type", k)); } @@ -659,8 +661,8 @@ impl Unifier { if need_subst { let obj_id = *obj_id; let params = self.subst_map(¶ms, mapping).unwrap_or_else(|| params.clone()); - let fields = self.subst_map(&fields, mapping).unwrap_or_else(|| fields.clone()); - Some(self.add_ty(TypeEnum::TObj { obj_id, params, fields })) + let fields = self.subst_map(&fields.borrow(), mapping).unwrap_or_else(|| fields.borrow().clone()); + Some(self.add_ty(TypeEnum::TObj { obj_id, params, fields: fields.into() })) } else { None } diff --git a/nac3core/src/typecheck/typedef/test.rs b/nac3core/src/typecheck/typedef/test.rs index 78cb77c5..f05816db 100644 --- a/nac3core/src/typecheck/typedef/test.rs +++ b/nac3core/src/typecheck/typedef/test.rs @@ -79,7 +79,7 @@ impl TestEnvironment { "int".into(), unifier.add_ty(TypeEnum::TObj { obj_id: 0, - fields: HashMap::new(), + fields: HashMap::new().into(), params: HashMap::new(), }), ); @@ -87,7 +87,7 @@ impl TestEnvironment { "float".into(), unifier.add_ty(TypeEnum::TObj { obj_id: 1, - fields: HashMap::new(), + fields: HashMap::new().into(), params: HashMap::new(), }), ); @@ -95,7 +95,7 @@ impl TestEnvironment { "bool".into(), unifier.add_ty(TypeEnum::TObj { obj_id: 2, - fields: HashMap::new(), + fields: HashMap::new().into(), params: HashMap::new(), }), ); @@ -104,7 +104,7 @@ impl TestEnvironment { "Foo".into(), unifier.add_ty(TypeEnum::TObj { obj_id: 3, - fields: [("a".into(), v0)].iter().cloned().collect(), + fields: [("a".into(), v0)].iter().cloned().collect::>().into(), params: [(id, v0)].iter().cloned().collect(), }), ); @@ -335,7 +335,7 @@ fn test_virtual() { })); let bar = env.unifier.add_ty(TypeEnum::TObj { obj_id: 5, - fields: [("f".to_string(), fun), ("a".to_string(), int)].iter().cloned().collect(), + fields: [("f".to_string(), fun), ("a".to_string(), int)].iter().cloned().collect::>().into(), params: HashMap::new(), }); let v0 = env.unifier.get_fresh_var().0; From d4721db4a3ce73984b9c09263ee2813fbf63026f Mon Sep 17 00:00:00 2001 From: CrescentonC Date: Tue, 3 Aug 2021 09:45:39 +0800 Subject: [PATCH 063/131] not creating temp for borrow, more concise code --- nac3core/src/typecheck/typedef/mod.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/nac3core/src/typecheck/typedef/mod.rs b/nac3core/src/typecheck/typedef/mod.rs index dbe739d1..37adb67f 100644 --- a/nac3core/src/typecheck/typedef/mod.rs +++ b/nac3core/src/typecheck/typedef/mod.rs @@ -373,9 +373,8 @@ impl Unifier { (TVar { meta: Record(map), id, range, .. }, TObj { fields, .. }) => { self.occur_check(a, b)?; for (k, v) in map.borrow().iter() { - let temp = fields.borrow(); - let ty = temp.get(k).ok_or_else(|| format!("No such attribute {}", k))?; - self.unify(*ty, *v)?; + let ty = fields.borrow().get(k).copied().ok_or_else(|| format!("No such attribute {}", k))?; + self.unify(ty, *v)?; } let x = self.check_var_compatibility(*id, b, &range.borrow())?.unwrap_or(b); self.unify(x, b)?; @@ -386,12 +385,11 @@ impl Unifier { let ty = self.get_ty(*ty); if let TObj { fields, .. } = ty.as_ref() { for (k, v) in map.borrow().iter() { - let temp = fields.borrow(); - let ty = temp.get(k).ok_or_else(|| format!("No such attribute {}", k))?; - if !matches!(self.get_ty(*ty).as_ref(), TFunc { .. }) { + let ty = fields.borrow().get(k).copied().ok_or_else(|| format!("No such attribute {}", k))?; + if !matches!(self.get_ty(ty).as_ref(), TFunc { .. }) { return Err(format!("Cannot access field {} for virtual type", k)); } - self.unify(*v, *ty)?; + self.unify(*v, ty)?; } } else { // require annotation... From d4807293b0db9c069fa18adc92133cafd7211693 Mon Sep 17 00:00:00 2001 From: CrescentonC Date: Tue, 3 Aug 2021 10:41:52 +0800 Subject: [PATCH 064/131] clean up unused variabls and comments --- nac3core/src/typecheck/magic_methods.rs | 32 +------------------------ 1 file changed, 1 insertion(+), 31 deletions(-) diff --git a/nac3core/src/typecheck/magic_methods.rs b/nac3core/src/typecheck/magic_methods.rs index eacf8180..967b135a 100644 --- a/nac3core/src/typecheck/magic_methods.rs +++ b/nac3core/src/typecheck/magic_methods.rs @@ -333,69 +333,39 @@ pub fn set_primirives_magic_methods(store: &PrimitiveStore, unifier: &mut Unifie int64: int64_t, float: float_t, bool: bool_t, - none: _none_t + .. } = *store; /* int32 ======== */ impl_basic_arithmetic(unifier, store, int32_t, int32_t, int32_t); - // impl_basic_arithmetic(unifier, store, int32_t, int64_t, int64_t); - // impl_basic_arithmetic(unifier, store, int32_t, float_t, float_t); impl_bitwise_arithmetic(unifier, store, int32_t); - // impl_div(unifier, store, int32_t, int32_t); - // impl_div(unifier, store, int32_t, int64_t); impl_div(unifier, store, int32_t, int32_t); impl_floordiv(unifier, store, int32_t, int32_t, int32_t); - // impl_floordiv(unifier, store, int32_t, int64_t, int32_t); - // impl_floordiv(unifier, store, int32_t, float_t, float_t); impl_mod(unifier, store, int32_t, int32_t, int32_t); - // impl_mod(unifier, store, int32_t, int64_t, int32_t); - // impl_mod(unifier, store, int32_t, float_t, float_t); impl_unary_op(unifier, store, int32_t); impl_invert(unifier, store, int32_t); impl_not(unifier, store, int32_t); impl_comparison(unifier, store, int32_t, int32_t); - // impl_comparison(unifier, store, int32_t, int64_t); - // impl_comparison(unifier, store, int32_t, float_t); impl_eq(unifier, store, int32_t); /* int64 ======== */ - // impl_basic_arithmetic(unifier, store, int64_t, int32_t, int64_t); impl_basic_arithmetic(unifier, store, int64_t, int64_t, int64_t); - // impl_basic_arithmetic(unifier, store, int64_t, float_t, float_t); impl_bitwise_arithmetic(unifier, store, int64_t); - // impl_div(unifier, store, int64_t, int32_t); impl_div(unifier, store, int64_t, int64_t); - // impl_div(unifier, store, int64_t, float_t); - // impl_floordiv(unifier, store, int64_t, int32_t, int64_t); impl_floordiv(unifier, store, int64_t, int64_t, int64_t); - // impl_floordiv(unifier, store, int64_t, float_t, float_t); - // impl_mod(unifier, store, int64_t, int32_t, int64_t); impl_mod(unifier, store, int64_t, int64_t, int64_t); - // impl_mod(unifier, store, int64_t, float_t, float_t); impl_unary_op(unifier, store, int64_t); impl_invert(unifier, store, int64_t); impl_not(unifier, store, int64_t); - // impl_comparison(unifier, store, int64_t, int32_t); impl_comparison(unifier, store, int64_t, int64_t); - // impl_comparison(unifier, store, int64_t, float_t); impl_eq(unifier, store, int64_t); /* float ======== */ - // impl_basic_arithmetic(unifier, store, float_t, int32_t, float_t); - // impl_basic_arithmetic(unifier, store, float_t, int64_t, float_t); impl_basic_arithmetic(unifier, store, float_t, float_t, float_t); - // impl_div(unifier, store, float_t, int32_t); - // impl_div(unifier, store, float_t, int64_t); impl_div(unifier, store, float_t, float_t); - // impl_floordiv(unifier, store, float_t, int32_t, float_t); - // impl_floordiv(unifier, store, float_t, int64_t, float_t); impl_floordiv(unifier, store, float_t, float_t, float_t); - // impl_mod(unifier, store, float_t, int32_t, float_t); - // impl_mod(unifier, store, float_t, int64_t, float_t); impl_mod(unifier, store, float_t, float_t, float_t); impl_unary_op(unifier, store, float_t); impl_not(unifier, store, float_t); - // impl_comparison(unifier, store, float_t, int32_t); - // impl_comparison(unifier, store, float_t, int64_t); impl_comparison(unifier, store, float_t, float_t); impl_eq(unifier, store, float_t); From 52dc112410d2e047663e6ceb52d61aff860b18ca Mon Sep 17 00:00:00 2001 From: pca006132 Date: Tue, 3 Aug 2021 12:35:58 +0800 Subject: [PATCH 065/131] unification table: modified conversion impl from UnificationTable> <==> UnificationTable to UnificationTable> <==> UnificationTable --- nac3core/src/typecheck/unification_table.rs | 35 +++++---------------- 1 file changed, 7 insertions(+), 28 deletions(-) diff --git a/nac3core/src/typecheck/unification_table.rs b/nac3core/src/typecheck/unification_table.rs index 60ec8086..588cf4bb 100644 --- a/nac3core/src/typecheck/unification_table.rs +++ b/nac3core/src/typecheck/unification_table.rs @@ -1,4 +1,3 @@ -use std::cell::RefCell; use std::rc::Rc; #[derive(Copy, Clone, PartialEq, Eq, Debug)] @@ -12,11 +11,7 @@ pub struct UnificationTable { impl UnificationTable { pub fn new() -> UnificationTable { - UnificationTable { - parents: Vec::new(), - ranks: Vec::new(), - values: Vec::new(), - } + UnificationTable { parents: Vec::new(), ranks: Vec::new(), values: Vec::new() } } pub fn new_key(&mut self, v: V) -> UnificationKey { @@ -72,33 +67,17 @@ impl UnificationTable { } } -impl UnificationTable>> +impl UnificationTable> where V: Clone, { pub fn into_send(self) -> UnificationTable { - let values = self - .values - .iter() - .map(|v| v.as_ref().borrow().clone()) - .collect(); - UnificationTable { - parents: self.parents, - ranks: self.ranks, - values, - } + let values = self.values.iter().map(|v| v.as_ref().clone()).collect(); + UnificationTable { parents: self.parents, ranks: self.ranks, values } } - pub fn from_send(table: UnificationTable) -> UnificationTable>> { - let values = table - .values - .into_iter() - .map(|v| Rc::new(RefCell::new(v))) - .collect(); - UnificationTable { - parents: table.parents, - ranks: table.ranks, - values, - } + pub fn from_send(table: UnificationTable) -> UnificationTable> { + let values = table.values.into_iter().map(Rc::new).collect(); + UnificationTable { parents: table.parents, ranks: table.ranks, values } } } From a3acf09bdacce6926ee45fdbcaec524df745b385 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Tue, 3 Aug 2021 12:38:12 +0800 Subject: [PATCH 066/131] typedef: make it send Rc in calls is not send, so we use Arc instead. --- nac3core/src/typecheck/type_inferencer/mod.rs | 9 ++++----- nac3core/src/typecheck/type_inferencer/test.rs | 2 +- nac3core/src/typecheck/typedef/mod.rs | 4 ++-- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/nac3core/src/typecheck/type_inferencer/mod.rs b/nac3core/src/typecheck/type_inferencer/mod.rs index 8b9c098e..f474a66f 100644 --- a/nac3core/src/typecheck/type_inferencer/mod.rs +++ b/nac3core/src/typecheck/type_inferencer/mod.rs @@ -1,8 +1,7 @@ -use std::cell::RefCell; +use std::{cell::RefCell, sync::Arc}; use std::collections::HashMap; use std::convert::{TryInto, From}; use std::iter::once; -use std::rc::Rc; use super::magic_methods::*; use super::symbol_resolver::SymbolResolver; @@ -53,7 +52,7 @@ pub struct Inferencer<'a> { pub primitives: &'a PrimitiveStore, pub virtual_checks: &'a mut Vec<(Type, Type)>, pub variable_mapping: HashMap, - pub calls: &'a mut HashMap>, + pub calls: &'a mut HashMap>, } struct NaiveFolder(); @@ -192,7 +191,7 @@ impl<'a> Inferencer<'a> { ret: Type, ) -> InferenceResult { let call = - Rc::new(Call { posargs: params, kwargs: HashMap::new(), ret, fun: RefCell::new(None) }); + Arc::new(Call { posargs: params, kwargs: HashMap::new(), ret, fun: RefCell::new(None) }); let call = self.unifier.add_ty(TypeEnum::TCall(vec![call].into())); let fields = once((method, call)).collect(); let record = self.unifier.add_record(fields); @@ -389,7 +388,7 @@ impl<'a> Inferencer<'a> { .map(|v| fold::fold_keyword(self, v)) .collect::, _>>()?; let ret = self.unifier.get_fresh_var().0; - let call = Rc::new(Call { + let call = Arc::new(Call { posargs: args.iter().map(|v| v.custom.unwrap()).collect(), kwargs: keywords .iter() diff --git a/nac3core/src/typecheck/type_inferencer/test.rs b/nac3core/src/typecheck/type_inferencer/test.rs index 16200dc9..c6b2c394 100644 --- a/nac3core/src/typecheck/type_inferencer/test.rs +++ b/nac3core/src/typecheck/type_inferencer/test.rs @@ -42,7 +42,7 @@ struct TestEnvironment { pub id_to_name: HashMap, pub identifier_mapping: HashMap, pub virtual_checks: Vec<(Type, Type)>, - pub calls: HashMap>, + pub calls: HashMap>, } impl TestEnvironment { diff --git a/nac3core/src/typecheck/typedef/mod.rs b/nac3core/src/typecheck/typedef/mod.rs index 37adb67f..20736dca 100644 --- a/nac3core/src/typecheck/typedef/mod.rs +++ b/nac3core/src/typecheck/typedef/mod.rs @@ -1,5 +1,5 @@ use itertools::{chain, zip, Itertools}; -use std::borrow::Cow; +use std::{borrow::Cow, sync::Arc}; use std::cell::RefCell; use std::collections::HashMap; use std::iter::once; @@ -70,7 +70,7 @@ pub enum TypeEnum { TVirtual { ty: Type, }, - TCall(RefCell>>), + TCall(RefCell>>), TFunc(FunSignature), } From d4d12a9d1d46834b0b33f30558f58a48add9bb65 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Tue, 3 Aug 2021 12:38:55 +0800 Subject: [PATCH 067/131] added crossbeam dependency --- Cargo.lock | 78 +++++++++++++++++++++++++++++++++++++++++++++ nac3core/Cargo.toml | 1 + 2 files changed, 79 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index d4561e97..ffdf0a46 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -81,6 +81,74 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "crossbeam" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ae5588f6b3c3cb05239e90bd110f257254aecd01e4635400391aeae07497845" +dependencies = [ + "cfg-if", + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-epoch", + "crossbeam-queue", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e" +dependencies = [ + "cfg-if", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec02e091aa634e2c3ada4a392989e7c3116673ef0ac5b72232439094d73b7fd" +dependencies = [ + "cfg-if", + "crossbeam-utils", + "lazy_static", + "memoffset", + "scopeguard", +] + +[[package]] +name = "crossbeam-queue" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b10ddc024425c88c2ad148c1b0fd53f4c6d38db9697c9f1588381212fa657c9" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db" +dependencies = [ + "cfg-if", + "lazy_static", +] + [[package]] name = "crunchy" version = "0.2.2" @@ -380,10 +448,20 @@ version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc" +[[package]] +name = "memoffset" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59accc507f1338036a0477ef61afdae33cde60840f4dfe481319ce3ad116ddf9" +dependencies = [ + "autocfg", +] + [[package]] name = "nac3core" version = "0.1.0" dependencies = [ + "crossbeam", "indoc 1.0.3", "inkwell", "itertools", diff --git a/nac3core/Cargo.toml b/nac3core/Cargo.toml index 5d3753f3..09ebf724 100644 --- a/nac3core/Cargo.toml +++ b/nac3core/Cargo.toml @@ -10,6 +10,7 @@ num-traits = "0.2" inkwell = { git = "https://github.com/TheDan64/inkwell", branch = "master", features = ["llvm10-0"] } rustpython-parser = { git = "https://github.com/RustPython/RustPython", branch = "master" } itertools = "0.10.1" +crossbeam = "0.8.1" [dev-dependencies] test-case = "1.2.0" From f00c1813e3b3074562556eb3b9dd6d1d1d0b5ec7 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Tue, 3 Aug 2021 13:38:27 +0800 Subject: [PATCH 068/131] top-level related changes --- nac3core/src/typecheck/mod.rs | 5 +- nac3core/src/typecheck/symbol_resolver.rs | 4 +- nac3core/src/typecheck/top_level.rs | 51 +++++++++++ .../src/typecheck/type_inferencer/test.rs | 87 +++++++++++-------- nac3core/src/typecheck/typedef/mod.rs | 30 ++++++- nac3core/src/typecheck/unification_table.rs | 10 +-- 6 files changed, 139 insertions(+), 48 deletions(-) create mode 100644 nac3core/src/typecheck/top_level.rs diff --git a/nac3core/src/typecheck/mod.rs b/nac3core/src/typecheck/mod.rs index fbee8ebe..fe686245 100644 --- a/nac3core/src/typecheck/mod.rs +++ b/nac3core/src/typecheck/mod.rs @@ -1,8 +1,9 @@ #![allow(dead_code)] +mod function_check; pub mod location; mod magic_methods; pub mod symbol_resolver; -pub mod typedef; +mod top_level; pub mod type_inferencer; +pub mod typedef; mod unification_table; -mod function_check; diff --git a/nac3core/src/typecheck/symbol_resolver.rs b/nac3core/src/typecheck/symbol_resolver.rs index 669f7632..33d56201 100644 --- a/nac3core/src/typecheck/symbol_resolver.rs +++ b/nac3core/src/typecheck/symbol_resolver.rs @@ -1,5 +1,6 @@ -use super::typedef::Type; use super::location::Location; +use super::top_level::DefinitionId; +use super::typedef::Type; use rustpython_parser::ast::Expr; pub enum SymbolValue<'a> { @@ -14,6 +15,7 @@ pub enum SymbolValue<'a> { pub trait SymbolResolver { fn get_symbol_type(&mut self, str: &str) -> Option; fn parse_type_name(&mut self, expr: &Expr<()>) -> Option; + fn get_function_def(&mut self, str: &str) -> DefinitionId; fn get_symbol_value(&mut self, str: &str) -> Option; fn get_symbol_location(&mut self, str: &str) -> Option; // handle function call etc. diff --git a/nac3core/src/typecheck/top_level.rs b/nac3core/src/typecheck/top_level.rs new file mode 100644 index 00000000..5d63a920 --- /dev/null +++ b/nac3core/src/typecheck/top_level.rs @@ -0,0 +1,51 @@ +use std::collections::HashMap; + +use super::typedef::{SharedUnifier, Type}; +use crossbeam::queue::SegQueue; +use crossbeam::sync::ShardedLock; +use rustpython_parser::ast::Stmt; + +pub struct DefinitionId(usize); + +pub enum TopLevelDef { + Class { + // object ID used for TypeEnum + object_id: usize, + // type variables bounded to the class. + type_vars: Vec, + // class fields and method signature. + fields: Vec<(String, Type)>, + // class methods, pointing to the corresponding function definition. + methods: Vec<(String, DefinitionId)>, + // ancestor classes, including itself. + ancestors: Vec, + }, + Function { + signature: Type, + /// Function instance to symbol mapping + /// Key: string representation of type variable values, sorted by variable ID in ascending + /// order, including type variables associated with the class. + /// Value: function symbol name. + instance_to_symbol: HashMap, + /// Function instances to annotated AST mapping + /// Key: string representation of type variable values, sorted by variable ID in ascending + /// order, including type variables associated with the class. Excluding rigid type + /// variables. + /// Value: AST annotated with types together with a unification table index. Could contain + /// rigid type variables that would be substituted when the function is instantiated. + instance_to_stmt: HashMap, usize)>, + }, +} + +pub struct CodeGenTask { + pub subst: HashMap, + pub symbol_name: String, + pub body: Stmt, + pub unifier: SharedUnifier, +} + +pub struct TopLevelContext { + pub definitions: Vec>, + pub unifiers: Vec, + pub codegen_queue: SegQueue, +} diff --git a/nac3core/src/typecheck/type_inferencer/test.rs b/nac3core/src/typecheck/type_inferencer/test.rs index c6b2c394..d7729cc7 100644 --- a/nac3core/src/typecheck/type_inferencer/test.rs +++ b/nac3core/src/typecheck/type_inferencer/test.rs @@ -1,5 +1,6 @@ use super::super::location::Location; use super::super::symbol_resolver::*; +use super::super::top_level::DefinitionId; use super::super::typedef::*; use super::*; use indoc::indoc; @@ -33,6 +34,10 @@ impl SymbolResolver for Resolver { fn get_symbol_location(&mut self, _: &str) -> Option { unimplemented!() } + + fn get_function_def(&mut self, _: &str) -> DefinitionId { + unimplemented!() + } } struct TestEnvironment { @@ -48,7 +53,7 @@ struct TestEnvironment { impl TestEnvironment { pub fn basic_test_env() -> TestEnvironment { let mut unifier = Unifier::new(); - + let int32 = unifier.add_ty(TypeEnum::TObj { obj_id: 0, fields: HashMap::new().into(), @@ -76,9 +81,9 @@ impl TestEnvironment { }); // identifier_mapping.insert("None".into(), none); let primitives = PrimitiveStore { int32, int64, float, bool, none }; - + set_primirives_magic_methods(&primitives, &mut unifier); - + let id_to_name = [ (0, "int32".to_string()), (1, "int64".to_string()), @@ -95,17 +100,18 @@ impl TestEnvironment { let mut identifier_mapping = HashMap::new(); identifier_mapping.insert("None".into(), none); - - let resolver = - Box::new(Resolver { identifier_mapping: identifier_mapping.clone(), class_names: Default::default() }) - as Box; + + let resolver = Box::new(Resolver { + identifier_mapping: identifier_mapping.clone(), + class_names: Default::default(), + }) as Box; TestEnvironment { unifier, function_data: FunctionData { resolver, bound_variables: Vec::new(), - return_type: None + return_type: None, }, primitives, id_to_name, @@ -171,7 +177,11 @@ impl TestEnvironment { })); let bar = unifier.add_ty(TypeEnum::TObj { obj_id: 6, - fields: [("a".into(), int32), ("b".into(), fun)].iter().cloned().collect::>().into(), + fields: [("a".into(), int32), ("b".into(), fun)] + .iter() + .cloned() + .collect::>() + .into(), params: Default::default(), }); identifier_mapping.insert( @@ -185,7 +195,11 @@ impl TestEnvironment { let bar2 = unifier.add_ty(TypeEnum::TObj { obj_id: 7, - fields: [("a".into(), bool), ("b".into(), fun)].iter().cloned().collect::>().into(), + fields: [("a".into(), bool), ("b".into(), fun)] + .iter() + .cloned() + .collect::>() + .into(), params: Default::default(), }); identifier_mapping.insert( @@ -362,13 +376,13 @@ fn test_basic(source: &str, mapping: HashMap<&str, &str>, virtuals: &[(&str, &st g = a // b h = a % b "}, - [("a", "int32"), - ("b", "int32"), - ("c", "int32"), - ("d", "int32"), - ("e", "int32"), - ("f", "float"), - ("g", "int32"), + [("a", "int32"), + ("b", "int32"), + ("c", "int32"), + ("d", "int32"), + ("e", "int32"), + ("f", "float"), + ("g", "int32"), ("h", "int32")].iter().cloned().collect() ; "int32")] #[test_case( @@ -382,13 +396,13 @@ fn test_basic(source: &str, mapping: HashMap<&str, &str>, virtuals: &[(&str, &st g = a // b h = a % b "}, - [("a", "float"), - ("b", "float"), - ("c", "float"), - ("d", "float"), - ("e", "float"), - ("f", "float"), - ("g", "float"), + [("a", "float"), + ("b", "float"), + ("c", "float"), + ("d", "float"), + ("e", "float"), + ("f", "float"), + ("g", "float"), ("h", "float")].iter().cloned().collect() ; "float" )] @@ -407,13 +421,13 @@ fn test_basic(source: &str, mapping: HashMap<&str, &str>, virtuals: &[(&str, &st k = a < b l = a != b "}, - [("a", "int64"), - ("b", "int64"), - ("c", "int64"), - ("d", "int64"), - ("e", "int64"), - ("f", "float"), - ("g", "int64"), + [("a", "int64"), + ("b", "int64"), + ("c", "int64"), + ("d", "int64"), + ("e", "int64"), + ("f", "float"), + ("g", "int64"), ("h", "int64"), ("i", "bool"), ("j", "bool"), @@ -429,10 +443,10 @@ fn test_basic(source: &str, mapping: HashMap<&str, &str>, virtuals: &[(&str, &st d = not a e = a != b "}, - [("a", "bool"), - ("b", "bool"), - ("c", "bool"), - ("d", "bool"), + [("a", "bool"), + ("b", "bool"), + ("c", "bool"), + ("d", "bool"), ("e", "bool")].iter().cloned().collect() ; "boolean" )] @@ -469,4 +483,5 @@ fn test_primitive_magic_methods(source: &str, mapping: HashMap<&str, &str>) { ); assert_eq!(format!("{}: {}", k, v), format!("{}: {}", k, name)); } -} \ No newline at end of file +} + diff --git a/nac3core/src/typecheck/typedef/mod.rs b/nac3core/src/typecheck/typedef/mod.rs index 20736dca..8f2567a5 100644 --- a/nac3core/src/typecheck/typedef/mod.rs +++ b/nac3core/src/typecheck/typedef/mod.rs @@ -1,9 +1,10 @@ use itertools::{chain, zip, Itertools}; -use std::{borrow::Cow, sync::Arc}; +use std::borrow::Cow; use std::cell::RefCell; use std::collections::HashMap; use std::iter::once; use std::rc::Rc; +use std::sync::{Arc, Mutex}; use super::unification_table::{UnificationKey, UnificationTable}; @@ -89,6 +90,8 @@ impl TypeEnum { } } +pub type SharedUnifier = Arc, u32)>>; + pub struct Unifier { unification_table: UnificationTable>, var_id: u32, @@ -100,6 +103,15 @@ impl Unifier { Unifier { unification_table: UnificationTable::new(), var_id: 0 } } + pub fn from_shared_unifier(unifier: &SharedUnifier) -> Unifier { + let lock = unifier.lock().unwrap(); + Unifier { unification_table: UnificationTable::from_send(&lock.0), var_id: lock.1 } + } + + pub fn get_shared_unifier(&self) -> SharedUnifier { + Arc::new(Mutex::new((self.unification_table.get_send(), self.var_id))) + } + /// Register a type to the unifier. /// Returns a key in the unification_table. pub fn add_ty(&mut self, a: TypeEnum) -> Type { @@ -373,7 +385,11 @@ impl Unifier { (TVar { meta: Record(map), id, range, .. }, TObj { fields, .. }) => { self.occur_check(a, b)?; for (k, v) in map.borrow().iter() { - let ty = fields.borrow().get(k).copied().ok_or_else(|| format!("No such attribute {}", k))?; + let ty = fields + .borrow() + .get(k) + .copied() + .ok_or_else(|| format!("No such attribute {}", k))?; self.unify(ty, *v)?; } let x = self.check_var_compatibility(*id, b, &range.borrow())?.unwrap_or(b); @@ -385,7 +401,11 @@ impl Unifier { let ty = self.get_ty(*ty); if let TObj { fields, .. } = ty.as_ref() { for (k, v) in map.borrow().iter() { - let ty = fields.borrow().get(k).copied().ok_or_else(|| format!("No such attribute {}", k))?; + let ty = fields + .borrow() + .get(k) + .copied() + .ok_or_else(|| format!("No such attribute {}", k))?; if !matches!(self.get_ty(ty).as_ref(), TFunc { .. }) { return Err(format!("Cannot access field {} for virtual type", k)); } @@ -659,7 +679,9 @@ impl Unifier { if need_subst { let obj_id = *obj_id; let params = self.subst_map(¶ms, mapping).unwrap_or_else(|| params.clone()); - let fields = self.subst_map(&fields.borrow(), mapping).unwrap_or_else(|| fields.borrow().clone()); + let fields = self + .subst_map(&fields.borrow(), mapping) + .unwrap_or_else(|| fields.borrow().clone()); Some(self.add_ty(TypeEnum::TObj { obj_id, params, fields: fields.into() })) } else { None diff --git a/nac3core/src/typecheck/unification_table.rs b/nac3core/src/typecheck/unification_table.rs index 588cf4bb..8c50b5f9 100644 --- a/nac3core/src/typecheck/unification_table.rs +++ b/nac3core/src/typecheck/unification_table.rs @@ -71,13 +71,13 @@ impl UnificationTable> where V: Clone, { - pub fn into_send(self) -> UnificationTable { + pub fn get_send(&self) -> UnificationTable { let values = self.values.iter().map(|v| v.as_ref().clone()).collect(); - UnificationTable { parents: self.parents, ranks: self.ranks, values } + UnificationTable { parents: self.parents.clone(), ranks: self.ranks.clone(), values } } - pub fn from_send(table: UnificationTable) -> UnificationTable> { - let values = table.values.into_iter().map(Rc::new).collect(); - UnificationTable { parents: table.parents, ranks: table.ranks, values } + pub fn from_send(table: &UnificationTable) -> UnificationTable> { + let values = table.values.iter().cloned().map(Rc::new).collect(); + UnificationTable { parents: table.parents.clone(), ranks: table.ranks.clone(), values } } } From 8452579c6748a673bdd88a6a8e0f1b7695707fb7 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Tue, 3 Aug 2021 14:11:41 +0800 Subject: [PATCH 069/131] use parking_lot RwLock The std::sync::RwLock is platform dependent, and is unfair on Linux (may starve writer) --- Cargo.lock | 1 + nac3core/Cargo.toml | 1 + nac3core/src/typecheck/top_level.rs | 13 +++++++++---- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ffdf0a46..5dc26c9f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -467,6 +467,7 @@ dependencies = [ "itertools", "num-bigint 0.3.2", "num-traits", + "parking_lot", "rustpython-parser", "test-case", ] diff --git a/nac3core/Cargo.toml b/nac3core/Cargo.toml index 09ebf724..e7081bf5 100644 --- a/nac3core/Cargo.toml +++ b/nac3core/Cargo.toml @@ -11,6 +11,7 @@ inkwell = { git = "https://github.com/TheDan64/inkwell", branch = "master", feat rustpython-parser = { git = "https://github.com/RustPython/RustPython", branch = "master" } itertools = "0.10.1" crossbeam = "0.8.1" +parking_lot = "0.11.1" [dev-dependencies] test-case = "1.2.0" diff --git a/nac3core/src/typecheck/top_level.rs b/nac3core/src/typecheck/top_level.rs index 5d63a920..776e7cb8 100644 --- a/nac3core/src/typecheck/top_level.rs +++ b/nac3core/src/typecheck/top_level.rs @@ -1,8 +1,8 @@ -use std::collections::HashMap; +use std::{collections::HashMap, sync::Arc}; -use super::typedef::{SharedUnifier, Type}; +use super::typedef::{SharedUnifier, Type, Unifier}; use crossbeam::queue::SegQueue; -use crossbeam::sync::ShardedLock; +use parking_lot::RwLock; use rustpython_parser::ast::Stmt; pub struct DefinitionId(usize); @@ -45,7 +45,12 @@ pub struct CodeGenTask { } pub struct TopLevelContext { - pub definitions: Vec>, + pub definitions: Vec>, pub unifiers: Vec, pub codegen_queue: SegQueue, } + +pub struct WorkerContext { + pub unifier: Unifier, + pub top_level_ctx: Arc>, +} From d052f007fb1aed2121fc1d93cb6d75ac9470acc7 Mon Sep 17 00:00:00 2001 From: CrescentonC Date: Wed, 4 Aug 2021 12:03:56 +0800 Subject: [PATCH 070/131] fix typo of primitives method --- nac3core/src/typecheck/magic_methods.rs | 2 +- nac3core/src/typecheck/type_inferencer/test.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/nac3core/src/typecheck/magic_methods.rs b/nac3core/src/typecheck/magic_methods.rs index 967b135a..6f32f86b 100644 --- a/nac3core/src/typecheck/magic_methods.rs +++ b/nac3core/src/typecheck/magic_methods.rs @@ -327,7 +327,7 @@ pub fn impl_eq(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type) { } else { unreachable!() } } -pub fn set_primirives_magic_methods(store: &PrimitiveStore, unifier: &mut Unifier) { +pub fn set_primitives_magic_methods(store: &PrimitiveStore, unifier: &mut Unifier) { let PrimitiveStore { int32: int32_t, int64: int64_t, diff --git a/nac3core/src/typecheck/type_inferencer/test.rs b/nac3core/src/typecheck/type_inferencer/test.rs index d7729cc7..d4449b01 100644 --- a/nac3core/src/typecheck/type_inferencer/test.rs +++ b/nac3core/src/typecheck/type_inferencer/test.rs @@ -82,7 +82,7 @@ impl TestEnvironment { // identifier_mapping.insert("None".into(), none); let primitives = PrimitiveStore { int32, int64, float, bool, none }; - set_primirives_magic_methods(&primitives, &mut unifier); + set_primitives_magic_methods(&primitives, &mut unifier); let id_to_name = [ (0, "int32".to_string()), From 99c71687a654048f6cf7345c203f17f16cbb1e49 Mon Sep 17 00:00:00 2001 From: CrescentonC Date: Wed, 4 Aug 2021 16:46:16 +0800 Subject: [PATCH 071/131] fixed: bitwise shift rhs can only be int32; better structured code --- nac3core/src/typecheck/magic_methods.rs | 318 ++++++++---------------- 1 file changed, 104 insertions(+), 214 deletions(-) diff --git a/nac3core/src/typecheck/magic_methods.rs b/nac3core/src/typecheck/magic_methods.rs index 6f32f86b..7005f164 100644 --- a/nac3core/src/typecheck/magic_methods.rs +++ b/nac3core/src/typecheck/magic_methods.rs @@ -1,6 +1,8 @@ use std::borrow::Borrow; use std::collections::HashMap; use rustpython_parser::ast::{Cmpop, Operator, Unaryop}; +use crate::typecheck::{type_inferencer::*, typedef::{FunSignature, FuncArg, TypeEnum, Unifier, Type}}; +use rustpython_parser::ast; pub fn binop_name(op: &Operator) -> &'static str { match op { @@ -59,18 +61,9 @@ pub fn comparison_name(op: &Cmpop) -> Option<&'static str> { } } -use crate::typecheck::{type_inferencer::*, typedef::{FunSignature, FuncArg, TypeEnum, Unifier, Type}}; -use rustpython_parser::ast; - -/// Add, Sub, Mult, Pow -pub fn impl_basic_arithmetic(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: Type, ret_ty: Type) { - if let TypeEnum::TObj {fields, .. } = unifier.get_ty(ty).borrow() { - for op in &[ - ast::Operator::Add, - ast::Operator::Sub, - ast::Operator::Mult, - ast::Operator::Pow, - ] { +pub fn impl_binop(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: Type, ret_ty: Type, ops: &[ast::Operator]) { + if let TypeEnum::TObj {fields, ..} = unifier.get_ty(ty).borrow() { + for op in ops { fields.borrow_mut().insert( binop_name(op).into(), unifier.add_ty(TypeEnum::TFunc(FunSignature { @@ -97,152 +90,16 @@ pub fn impl_basic_arithmetic(unifier: &mut Unifier, store: &PrimitiveStore, ty: })) ); } - } else { unreachable!() } + } else { unreachable!("") } } -/// LShift, RShift, BitOr, BitXor, BitAnd -pub fn impl_bitwise_arithmetic(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type) { - if let TypeEnum::TObj {fields, .. } = unifier.get_ty(ty).borrow() { - for op in &[ - ast::Operator::LShift, - ast::Operator::RShift, - ast::Operator::BitOr, - ast::Operator::BitXor, - ast::Operator::BitAnd, - ] { +pub fn impl_unaryop(unifier: &mut Unifier, _store: &PrimitiveStore, ty: Type, ret_ty: Type, ops: &[ast::Unaryop]) { + if let TypeEnum::TObj {fields, ..} = unifier.get_ty(ty).borrow() { + for op in ops { fields.borrow_mut().insert( - binop_name(op).into(), + unaryop_name(op).into(), unifier.add_ty(TypeEnum::TFunc(FunSignature { - ret: ty, - vars: HashMap::new(), - args: vec![FuncArg { - ty, - is_optional: false, - name: "other".into() - }] - })) - ); - - fields.borrow_mut().insert( - binop_assign_name(op).into(), - unifier.add_ty(TypeEnum::TFunc(FunSignature { - ret: store.none, - vars: HashMap::new(), - args: vec![FuncArg { - ty, - is_optional: false, - name: "other".into() - }] - })) - ); - } - } -} - -/// Div -pub fn impl_div(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: Type) { - if let TypeEnum::TObj {fields, .. } = unifier.get_ty(ty).borrow() { - fields.borrow_mut().insert( - binop_name(&ast::Operator::Div).into(), - unifier.add_ty(TypeEnum::TFunc(FunSignature{ - ret: store.float, - vars: HashMap::new(), - args: vec![FuncArg { - ty: other_ty, - is_optional: false, - name: "other".into() - }] - })) - ); - - fields.borrow_mut().insert( - binop_assign_name(&ast::Operator::Div).into(), - unifier.add_ty(TypeEnum::TFunc(FunSignature{ - ret: store.none, - vars: HashMap::new(), - args: vec![FuncArg { - ty: other_ty, - is_optional: false, - name: "other".into() - }] - })) - ); - } else { unreachable!() } -} - -/// FloorDiv -pub fn impl_floordiv(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: Type, ret_ty: Type) { - if let TypeEnum::TObj {fields, .. } = unifier.get_ty(ty).borrow() { - fields.borrow_mut().insert( - binop_name(&ast::Operator::FloorDiv).into(), - unifier.add_ty(TypeEnum::TFunc(FunSignature{ - ret: ret_ty, - vars: HashMap::new(), - args: vec![FuncArg { - ty: other_ty, - is_optional: false, - name: "other".into() - }] - })) - ); - - fields.borrow_mut().insert( - binop_assign_name(&ast::Operator::FloorDiv).into(), - unifier.add_ty(TypeEnum::TFunc(FunSignature{ - ret: store.none, - vars: HashMap::new(), - args: vec![FuncArg { - ty: other_ty, - is_optional: false, - name: "other".into() - }] - })) - ); - } else { unreachable!() } -} - -/// Mod -pub fn impl_mod(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: Type, ret_ty: Type) { - if let TypeEnum::TObj {fields, .. } = unifier.get_ty(ty).borrow() { - fields.borrow_mut().insert( - binop_name(&ast::Operator::Mod).into(), - unifier.add_ty(TypeEnum::TFunc(FunSignature { - ret: ret_ty, - vars: HashMap::new(), - args: vec![FuncArg { - ty: other_ty, - is_optional: false, - name: "other".into() - }] - })) - ); - - fields.borrow_mut().insert( - binop_assign_name(&ast::Operator::Mod).into(), - unifier.add_ty(TypeEnum::TFunc(FunSignature { - ret: store.none, - vars: HashMap::new(), - args: vec![FuncArg { - ty: other_ty, - is_optional: false, - name: "other".into() - }] - })) - ); - } else { unreachable!() } -} - -/// UAdd, USub -pub fn impl_unary_op(unifier: &mut Unifier, _store: &PrimitiveStore, ty: Type) { - if let TypeEnum::TObj {fields, .. } = unifier.get_ty(ty).borrow() { - for op in &[ - ast::Unaryop::UAdd, - ast::Unaryop::USub - ] { - fields.borrow_mut().insert( - unaryop_name(op).into(), - unifier.add_ty(TypeEnum::TFunc(FunSignature { - ret: ty, + ret: ret_ty, vars: HashMap::new(), args: vec![] })) @@ -251,45 +108,11 @@ pub fn impl_unary_op(unifier: &mut Unifier, _store: &PrimitiveStore, ty: Type) { } else { unreachable!() } } -/// Invert -pub fn impl_invert(unifier: &mut Unifier, _store: &PrimitiveStore, ty: Type) { - if let TypeEnum::TObj {fields, .. } = unifier.get_ty(ty).borrow() { - fields.borrow_mut().insert( - unaryop_name(&ast::Unaryop::Invert).into(), - unifier.add_ty(TypeEnum::TFunc(FunSignature { - ret: ty, - vars: HashMap::new(), - args: vec![] - })) - ); - } -} - -/// Not -pub fn impl_not(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type) { - if let TypeEnum::TObj {fields, .. } = unifier.get_ty(ty).borrow() { - fields.borrow_mut().insert( - unaryop_name(&ast::Unaryop::Not).into(), - unifier.add_ty(TypeEnum::TFunc(FunSignature { - ret: store.bool, - vars: HashMap::new(), - args: vec![] - })) - ); - } else { unreachable!() } -} - -/// Lt, LtE, Gt, GtE -pub fn impl_comparison(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: Type) { - if let TypeEnum::TObj {fields, .. } = unifier.get_ty(ty).borrow() { - for op in &[ - ast::Cmpop::Lt, - ast::Cmpop::LtE, - ast::Cmpop::Gt, - ast::Cmpop::GtE, - ] { +pub fn impl_cmpop(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: Type, ops: &[ast::Cmpop]) { + if let TypeEnum::TObj {fields, ..} = unifier.get_ty(ty).borrow() { + for op in ops { fields.borrow_mut().insert( - comparison_name(op).unwrap().into(), + comparison_name(op).unwrap().into(), unifier.add_ty(TypeEnum::TFunc(FunSignature { ret: store.bool, vars: HashMap::new(), @@ -304,27 +127,92 @@ pub fn impl_comparison(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, } else { unreachable!() } } +/// Add, Sub, Mult, Pow +pub fn impl_basic_arithmetic(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: Type, ret_ty: Type) { + impl_binop(unifier, store, ty, other_ty, ret_ty, &[ + ast::Operator::Add, + ast::Operator::Sub, + ast::Operator::Mult, + ast::Operator::Pow, + ]) +} + +/// BitOr, BitXor, BitAnd +pub fn impl_bitwise_arithmetic(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type) { + impl_binop(unifier, store, ty, ty, ty, &[ + ast::Operator::BitAnd, + ast::Operator::BitOr, + ast::Operator::BitXor, + ]) +} + +/// LShift, RShift +pub fn impl_bitwise_shift(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type) { + impl_binop(unifier, store, ty, store.int32, ty, &[ + ast::Operator::LShift, + ast::Operator::RShift, + ]) +} + +/// Div +pub fn impl_div(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: Type) { + impl_binop(unifier, store, ty, other_ty, store.float, &[ + ast::Operator::Div, + ]) +} + +/// FloorDiv +pub fn impl_floordiv(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: Type, ret_ty: Type) { + impl_binop(unifier, store, ty, other_ty, ret_ty, &[ + ast::Operator::FloorDiv, + ]) +} + +/// Mod +pub fn impl_mod(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: Type, ret_ty: Type) { + impl_binop(unifier, store, ty, other_ty, ret_ty, &[ + ast::Operator::Mod, + ]) +} + +/// UAdd, USub +pub fn impl_sign(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type) { + impl_unaryop(unifier, store, ty, ty, &[ + ast::Unaryop::UAdd, + ast::Unaryop::USub, + ]) +} + +/// Invert +pub fn impl_invert(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type) { + impl_unaryop(unifier, store, ty, ty, &[ + ast::Unaryop::Invert, + ]) +} + +/// Not +pub fn impl_not(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type) { + impl_unaryop(unifier, store, ty, store.bool, &[ + ast::Unaryop::Not, + ]) +} + +/// Lt, LtE, Gt, GtE +pub fn impl_comparison(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: Type) { + impl_cmpop(unifier, store, ty, other_ty, &[ + ast::Cmpop::Lt, + ast::Cmpop::Gt, + ast::Cmpop::LtE, + ast::Cmpop::GtE, + ]) +} + /// Eq, NotEq pub fn impl_eq(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type) { - if let TypeEnum::TObj {fields, .. } = unifier.get_ty(ty).borrow() { - for op in &[ - ast::Cmpop::Eq, - ast::Cmpop::NotEq, - ] { - fields.borrow_mut().insert( - comparison_name(op).unwrap().into(), - unifier.add_ty(TypeEnum::TFunc(FunSignature { - ret: store.bool, - vars: HashMap::new(), - args: vec![FuncArg { - ty, - is_optional: false, - name: "other".into() - }] - })) - ); - } - } else { unreachable!() } + impl_cmpop(unifier, store, ty, ty, &[ + ast::Cmpop::Eq, + ast::Cmpop::NotEq, + ]) } pub fn set_primitives_magic_methods(store: &PrimitiveStore, unifier: &mut Unifier) { @@ -338,10 +226,11 @@ pub fn set_primitives_magic_methods(store: &PrimitiveStore, unifier: &mut Unifie /* int32 ======== */ impl_basic_arithmetic(unifier, store, int32_t, int32_t, int32_t); impl_bitwise_arithmetic(unifier, store, int32_t); + impl_bitwise_shift(unifier, store, int32_t); impl_div(unifier, store, int32_t, int32_t); impl_floordiv(unifier, store, int32_t, int32_t, int32_t); impl_mod(unifier, store, int32_t, int32_t, int32_t); - impl_unary_op(unifier, store, int32_t); + impl_sign(unifier, store, int32_t); impl_invert(unifier, store, int32_t); impl_not(unifier, store, int32_t); impl_comparison(unifier, store, int32_t, int32_t); @@ -350,10 +239,11 @@ pub fn set_primitives_magic_methods(store: &PrimitiveStore, unifier: &mut Unifie /* int64 ======== */ impl_basic_arithmetic(unifier, store, int64_t, int64_t, int64_t); impl_bitwise_arithmetic(unifier, store, int64_t); + impl_bitwise_shift(unifier, store, int64_t); impl_div(unifier, store, int64_t, int64_t); impl_floordiv(unifier, store, int64_t, int64_t, int64_t); impl_mod(unifier, store, int64_t, int64_t, int64_t); - impl_unary_op(unifier, store, int64_t); + impl_sign(unifier, store, int64_t); impl_invert(unifier, store, int64_t); impl_not(unifier, store, int64_t); impl_comparison(unifier, store, int64_t, int64_t); @@ -364,7 +254,7 @@ pub fn set_primitives_magic_methods(store: &PrimitiveStore, unifier: &mut Unifie impl_div(unifier, store, float_t, float_t); impl_floordiv(unifier, store, float_t, float_t, float_t); impl_mod(unifier, store, float_t, float_t, float_t); - impl_unary_op(unifier, store, float_t); + impl_sign(unifier, store, float_t); impl_not(unifier, store, float_t); impl_comparison(unifier, store, float_t, float_t); impl_eq(unifier, store, float_t); From c0227210dffb3bd4229b2170d21c11eca0a3c3a5 Mon Sep 17 00:00:00 2001 From: ychenfo Date: Thu, 5 Aug 2021 11:55:46 +0800 Subject: [PATCH 072/131] bit shift lhs rhs same type; float ** int and float ** float both supported --- nac3core/src/typecheck/magic_methods.rs | 96 ++++++++++++------- .../src/typecheck/type_inferencer/test.rs | 8 +- 2 files changed, 66 insertions(+), 38 deletions(-) diff --git a/nac3core/src/typecheck/magic_methods.rs b/nac3core/src/typecheck/magic_methods.rs index 7005f164..4c648699 100644 --- a/nac3core/src/typecheck/magic_methods.rs +++ b/nac3core/src/typecheck/magic_methods.rs @@ -61,33 +61,47 @@ pub fn comparison_name(op: &Cmpop) -> Option<&'static str> { } } -pub fn impl_binop(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: Type, ret_ty: Type, ops: &[ast::Operator]) { +pub fn impl_binop(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: &[Type], ret_ty: Type, ops: &[ast::Operator]) { if let TypeEnum::TObj {fields, ..} = unifier.get_ty(ty).borrow() { for op in ops { fields.borrow_mut().insert( binop_name(op).into(), - unifier.add_ty(TypeEnum::TFunc(FunSignature { - ret: ret_ty, - vars: HashMap::new(), - args: vec![FuncArg { - ty: other_ty, - is_optional: false, - name: "other".into() - }] - })) + { + let other = if other_ty.len() == 1 { + other_ty[0] + } else { + unifier.get_fresh_var_with_range(other_ty).0 + }; + unifier.add_ty(TypeEnum::TFunc(FunSignature { + ret: ret_ty, + vars: HashMap::new(), + args: vec![FuncArg { + ty: other, + is_optional: false, + name: "other".into() + }] + })) + } ); fields.borrow_mut().insert( binop_assign_name(op).into(), - unifier.add_ty(TypeEnum::TFunc(FunSignature { - ret: store.none, - vars: HashMap::new(), - args: vec![FuncArg { - ty: other_ty, - is_optional: false, - name: "other".into() - }] - })) + { + let other = if other_ty.len() == 1 { + other_ty[0] + } else { + unifier.get_fresh_var_with_range(other_ty).0 + }; + unifier.add_ty(TypeEnum::TFunc(FunSignature { + ret: ret_ty, + vars: HashMap::new(), + args: vec![FuncArg { + ty: other, + is_optional: false, + name: "other".into() + }] + })) + } ); } } else { unreachable!("") } @@ -128,18 +142,23 @@ pub fn impl_cmpop(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other } /// Add, Sub, Mult, Pow -pub fn impl_basic_arithmetic(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: Type, ret_ty: Type) { +pub fn impl_basic_arithmetic(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: &[Type], ret_ty: Type) { impl_binop(unifier, store, ty, other_ty, ret_ty, &[ ast::Operator::Add, ast::Operator::Sub, ast::Operator::Mult, + ]) +} + +pub fn impl_pow(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: &[Type], ret_ty: Type) { + impl_binop(unifier, store, ty, other_ty, ret_ty, &[ ast::Operator::Pow, ]) } /// BitOr, BitXor, BitAnd pub fn impl_bitwise_arithmetic(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type) { - impl_binop(unifier, store, ty, ty, ty, &[ + impl_binop(unifier, store, ty, &[ty], ty, &[ ast::Operator::BitAnd, ast::Operator::BitOr, ast::Operator::BitXor, @@ -148,28 +167,28 @@ pub fn impl_bitwise_arithmetic(unifier: &mut Unifier, store: &PrimitiveStore, ty /// LShift, RShift pub fn impl_bitwise_shift(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type) { - impl_binop(unifier, store, ty, store.int32, ty, &[ + impl_binop(unifier, store, ty, &[ty], ty, &[ ast::Operator::LShift, ast::Operator::RShift, ]) } /// Div -pub fn impl_div(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: Type) { +pub fn impl_div(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: &[Type]) { impl_binop(unifier, store, ty, other_ty, store.float, &[ ast::Operator::Div, ]) } /// FloorDiv -pub fn impl_floordiv(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: Type, ret_ty: Type) { +pub fn impl_floordiv(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: &[Type], ret_ty: Type) { impl_binop(unifier, store, ty, other_ty, ret_ty, &[ ast::Operator::FloorDiv, ]) } /// Mod -pub fn impl_mod(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: Type, ret_ty: Type) { +pub fn impl_mod(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: &[Type], ret_ty: Type) { impl_binop(unifier, store, ty, other_ty, ret_ty, &[ ast::Operator::Mod, ]) @@ -224,12 +243,13 @@ pub fn set_primitives_magic_methods(store: &PrimitiveStore, unifier: &mut Unifie .. } = *store; /* int32 ======== */ - impl_basic_arithmetic(unifier, store, int32_t, int32_t, int32_t); + impl_basic_arithmetic(unifier, store, int32_t, &[int32_t], int32_t); + impl_pow(unifier, store, int32_t, &[int32_t], int32_t); impl_bitwise_arithmetic(unifier, store, int32_t); impl_bitwise_shift(unifier, store, int32_t); - impl_div(unifier, store, int32_t, int32_t); - impl_floordiv(unifier, store, int32_t, int32_t, int32_t); - impl_mod(unifier, store, int32_t, int32_t, int32_t); + impl_div(unifier, store, int32_t, &[int32_t]); + impl_floordiv(unifier, store, int32_t, &[int32_t], int32_t); + impl_mod(unifier, store, int32_t, &[int32_t], int32_t); impl_sign(unifier, store, int32_t); impl_invert(unifier, store, int32_t); impl_not(unifier, store, int32_t); @@ -237,12 +257,13 @@ pub fn set_primitives_magic_methods(store: &PrimitiveStore, unifier: &mut Unifie impl_eq(unifier, store, int32_t); /* int64 ======== */ - impl_basic_arithmetic(unifier, store, int64_t, int64_t, int64_t); + impl_basic_arithmetic(unifier, store, int64_t, &[int64_t], int64_t); + impl_pow(unifier, store, int64_t, &[int64_t], int64_t); impl_bitwise_arithmetic(unifier, store, int64_t); impl_bitwise_shift(unifier, store, int64_t); - impl_div(unifier, store, int64_t, int64_t); - impl_floordiv(unifier, store, int64_t, int64_t, int64_t); - impl_mod(unifier, store, int64_t, int64_t, int64_t); + impl_div(unifier, store, int64_t, &[int64_t]); + impl_floordiv(unifier, store, int64_t, &[int64_t], int64_t); + impl_mod(unifier, store, int64_t, &[int64_t], int64_t); impl_sign(unifier, store, int64_t); impl_invert(unifier, store, int64_t); impl_not(unifier, store, int64_t); @@ -250,10 +271,11 @@ pub fn set_primitives_magic_methods(store: &PrimitiveStore, unifier: &mut Unifie impl_eq(unifier, store, int64_t); /* float ======== */ - impl_basic_arithmetic(unifier, store, float_t, float_t, float_t); - impl_div(unifier, store, float_t, float_t); - impl_floordiv(unifier, store, float_t, float_t, float_t); - impl_mod(unifier, store, float_t, float_t, float_t); + impl_basic_arithmetic(unifier, store, float_t, &[float_t], float_t); + impl_pow(unifier, store, float_t, &[int32_t, float_t], float_t); + impl_div(unifier, store, float_t, &[float_t]); + impl_floordiv(unifier, store, float_t, &[float_t], float_t); + impl_mod(unifier, store, float_t, &[float_t], float_t); impl_sign(unifier, store, float_t); impl_not(unifier, store, float_t); impl_comparison(unifier, store, float_t, float_t); diff --git a/nac3core/src/typecheck/type_inferencer/test.rs b/nac3core/src/typecheck/type_inferencer/test.rs index d4449b01..203b2c98 100644 --- a/nac3core/src/typecheck/type_inferencer/test.rs +++ b/nac3core/src/typecheck/type_inferencer/test.rs @@ -395,6 +395,9 @@ fn test_basic(source: &str, mapping: HashMap<&str, &str>, virtuals: &[(&str, &st f = a / b g = a // b h = a % b + i = a ** b + ii = 3 + j = a ** b "}, [("a", "float"), ("b", "float"), @@ -403,7 +406,10 @@ fn test_basic(source: &str, mapping: HashMap<&str, &str>, virtuals: &[(&str, &st ("e", "float"), ("f", "float"), ("g", "float"), - ("h", "float")].iter().cloned().collect() + ("h", "float"), + ("i", "float"), + ("ii", "int32"), + ("j", "float")].iter().cloned().collect() ; "float" )] #[test_case( From 3dcd8463023fb32ade0bc517b46836b9604b557a Mon Sep 17 00:00:00 2001 From: pca006132 Date: Thu, 5 Aug 2021 14:53:47 +0800 Subject: [PATCH 073/131] added rayon dependency --- Cargo.lock | 36 ++++++++++++++++++++++++++++++++++++ nac3core/Cargo.toml | 1 + 2 files changed, 37 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 5dc26c9f..ff33e7cf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -468,6 +468,7 @@ dependencies = [ "num-bigint 0.3.2", "num-traits", "parking_lot", + "rayon", "rustpython-parser", "test-case", ] @@ -538,6 +539,16 @@ dependencies = [ "autocfg", ] +[[package]] +name = "num_cpus" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" +dependencies = [ + "hermit-abi", + "libc", +] + [[package]] name = "once_cell" version = "1.8.0" @@ -773,6 +784,31 @@ dependencies = [ "rand_core", ] +[[package]] +name = "rayon" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06aca804d41dbc8ba42dfd964f0d01334eceb64314b9ecf7c5fad5188a06d90" +dependencies = [ + "autocfg", + "crossbeam-deque", + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d78120e2c850279833f1dd3582f730c4ab53ed95aeaaaa862a2a5c71b1656d8e" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-utils", + "lazy_static", + "num_cpus", +] + [[package]] name = "redox_syscall" version = "0.2.9" diff --git a/nac3core/Cargo.toml b/nac3core/Cargo.toml index e7081bf5..c0581cc1 100644 --- a/nac3core/Cargo.toml +++ b/nac3core/Cargo.toml @@ -12,6 +12,7 @@ rustpython-parser = { git = "https://github.com/RustPython/RustPython", branch = itertools = "0.10.1" crossbeam = "0.8.1" parking_lot = "0.11.1" +rayon = "1.5.1" [dev-dependencies] test-case = "1.2.0" From b01d0f6fbb9892aacefdf0e6b96732fe56f4addc Mon Sep 17 00:00:00 2001 From: pca006132 Date: Thu, 5 Aug 2021 14:55:09 +0800 Subject: [PATCH 074/131] formatting --- nac3core/src/typecheck/function_check.rs | 40 +++++-------------- nac3core/src/typecheck/location.rs | 2 +- nac3core/src/typecheck/symbol_resolver.rs | 2 +- nac3core/src/typecheck/type_inferencer/mod.rs | 20 +++++----- 4 files changed, 24 insertions(+), 40 deletions(-) diff --git a/nac3core/src/typecheck/function_check.rs b/nac3core/src/typecheck/function_check.rs index c114f67f..92509ccc 100644 --- a/nac3core/src/typecheck/function_check.rs +++ b/nac3core/src/typecheck/function_check.rs @@ -22,9 +22,7 @@ impl<'a> Inferencer<'a> { } Ok(()) } - _ => { - self.check_expr(pattern, defined_identifiers) - } + _ => self.check_expr(pattern, defined_identifiers), } } @@ -46,7 +44,10 @@ impl<'a> Inferencer<'a> { match &expr.node { ExprKind::Name { id, .. } => { if !defined_identifiers.contains(id) { - return Err(format!("unknown identifier {} (use before def?) at {}", id, expr.location)); + return Err(format!( + "unknown identifier {} (use before def?) at {}", + id, expr.location + )); } } ExprKind::List { elts, .. } @@ -66,9 +67,7 @@ impl<'a> Inferencer<'a> { ExprKind::UnaryOp { operand, .. } => { self.check_expr(operand, defined_identifiers)?; } - ExprKind::Compare { - left, comparators, .. - } => { + ExprKind::Compare { left, comparators, .. } => { for elt in once(left.as_ref()).chain(comparators.iter()) { self.check_expr(elt, defined_identifiers)?; } @@ -83,10 +82,7 @@ impl<'a> Inferencer<'a> { self.check_expr(orelse, defined_identifiers)?; } ExprKind::Slice { lower, upper, step } => { - for elt in [lower.as_ref(), upper.as_ref(), step.as_ref()] - .iter() - .flatten() - { + for elt in [lower.as_ref(), upper.as_ref(), step.as_ref()].iter().flatten() { self.check_expr(elt, defined_identifiers)?; } } @@ -99,13 +95,9 @@ impl<'a> Inferencer<'a> { } self.check_expr(body, &defined_identifiers)?; } - ExprKind::ListComp { - elt, generators, .. - } => { + ExprKind::ListComp { elt, generators, .. } => { // in our type inference stage, we already make sure that there is only 1 generator - let ast::Comprehension { - target, iter, ifs, .. - } = &generators[0]; + let ast::Comprehension { target, iter, ifs, .. } = &generators[0]; self.check_expr(iter, defined_identifiers)?; let mut defined_identifiers = defined_identifiers.to_vec(); self.check_pattern(target, &mut defined_identifiers)?; @@ -113,11 +105,7 @@ impl<'a> Inferencer<'a> { self.check_expr(term, &defined_identifiers)?; } } - ExprKind::Call { - func, - args, - keywords, - } => { + ExprKind::Call { func, args, keywords } => { for expr in once(func.as_ref()) .chain(args.iter()) .chain(keywords.iter().map(|v| v.node.value.as_ref())) @@ -141,13 +129,7 @@ impl<'a> Inferencer<'a> { defined_identifiers: &mut Vec, ) -> Result { match &stmt.node { - StmtKind::For { - target, - iter, - body, - orelse, - .. - } => { + StmtKind::For { target, iter, body, orelse, .. } => { self.check_expr(iter, defined_identifiers)?; for stmt in orelse.iter() { self.check_stmt(stmt, defined_identifiers)?; diff --git a/nac3core/src/typecheck/location.rs b/nac3core/src/typecheck/location.rs index 0165ef0a..424336f2 100644 --- a/nac3core/src/typecheck/location.rs +++ b/nac3core/src/typecheck/location.rs @@ -7,7 +7,7 @@ pub struct FileID(u32); #[derive(Clone, Copy, PartialEq)] pub enum Location { CodeRange(FileID, ast::Location), - Builtin + Builtin, } pub struct FileRegistry { diff --git a/nac3core/src/typecheck/symbol_resolver.rs b/nac3core/src/typecheck/symbol_resolver.rs index 33d56201..fd2ad3f4 100644 --- a/nac3core/src/typecheck/symbol_resolver.rs +++ b/nac3core/src/typecheck/symbol_resolver.rs @@ -1,6 +1,6 @@ use super::location::Location; -use super::top_level::DefinitionId; use super::typedef::Type; +use crate::top_level::DefinitionId; use rustpython_parser::ast::Expr; pub enum SymbolValue<'a> { diff --git a/nac3core/src/typecheck/type_inferencer/mod.rs b/nac3core/src/typecheck/type_inferencer/mod.rs index f474a66f..95ea2027 100644 --- a/nac3core/src/typecheck/type_inferencer/mod.rs +++ b/nac3core/src/typecheck/type_inferencer/mod.rs @@ -1,7 +1,7 @@ -use std::{cell::RefCell, sync::Arc}; use std::collections::HashMap; -use std::convert::{TryInto, From}; +use std::convert::{From, TryInto}; use std::iter::once; +use std::{cell::RefCell, sync::Arc}; use super::magic_methods::*; use super::symbol_resolver::SymbolResolver; @@ -24,10 +24,7 @@ pub struct CodeLocation { impl From for CodeLocation { fn from(loc: Location) -> CodeLocation { - CodeLocation { - row: loc.row(), - col: loc.column() - } + CodeLocation { row: loc.row(), col: loc.column() } } } @@ -190,8 +187,12 @@ impl<'a> Inferencer<'a> { params: Vec, ret: Type, ) -> InferenceResult { - let call = - Arc::new(Call { posargs: params, kwargs: HashMap::new(), ret, fun: RefCell::new(None) }); + let call = Arc::new(Call { + posargs: params, + kwargs: HashMap::new(), + ret, + fun: RefCell::new(None), + }); let call = self.unifier.add_ty(TypeEnum::TCall(vec![call].into())); let fields = once((method, call)).collect(); let record = self.unifier.add_record(fields); @@ -333,7 +334,8 @@ impl<'a> Inferencer<'a> { } let arg0 = self.fold_expr(args.remove(0))?; let ty = if let Some(arg) = args.pop() { - self.function_data.resolver + self.function_data + .resolver .parse_type_name(&arg) .ok_or_else(|| "error parsing type".to_string())? } else { From 29286210b5c822d3394698b232104714194cbae2 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Thu, 5 Aug 2021 14:55:23 +0800 Subject: [PATCH 075/131] implementing codegen --- nac3core/src/codegen/expr.rs | 389 ++++++++++++++++++ nac3core/src/codegen/helper.rs | 1 + nac3core/src/codegen/mod.rs | 2 + nac3core/src/lib.rs | 4 +- nac3core/src/{typecheck => }/top_level.rs | 25 +- nac3core/src/typecheck/mod.rs | 2 - .../src/typecheck/type_inferencer/test.rs | 3 +- nac3core/src/typecheck/typedef/mod.rs | 9 + nac3core/src/typecheck/typedef/test.rs | 12 +- nac3core/src/typecheck/unification_table.rs | 4 + 10 files changed, 434 insertions(+), 17 deletions(-) create mode 100644 nac3core/src/codegen/expr.rs create mode 100644 nac3core/src/codegen/helper.rs create mode 100644 nac3core/src/codegen/mod.rs rename nac3core/src/{typecheck => }/top_level.rs (67%) diff --git a/nac3core/src/codegen/expr.rs b/nac3core/src/codegen/expr.rs new file mode 100644 index 00000000..77b80980 --- /dev/null +++ b/nac3core/src/codegen/expr.rs @@ -0,0 +1,389 @@ +use std::{convert::TryInto, iter::once}; + +use crate::top_level::{CodeGenContext, TopLevelDef}; +use crate::typecheck::typedef::{Type, TypeEnum}; +use inkwell::{types::BasicType, values::BasicValueEnum}; +use itertools::{chain, izip, zip, Itertools}; +use rustpython_parser::ast::{self, Boolop, Constant, Expr, ExprKind, Operator}; + +impl<'ctx> CodeGenContext<'ctx> { + fn get_attr_index(&mut self, ty: Type, attr: &str) -> usize { + let obj_id = match &*self.unifier.get_ty(ty) { + TypeEnum::TObj { obj_id, .. } => *obj_id, + // we cannot have other types, virtual type should be handled by function calls + _ => unreachable!(), + }; + let def = &self.top_level.definitions.read()[obj_id]; + let index = if let TopLevelDef::Class { fields, .. } = &*def.read() { + fields.iter().find_position(|x| x.0 == attr).unwrap().0 + } else { + unreachable!() + }; + index + } + + fn gen_const(&mut self, value: &Constant, ty: Type) -> BasicValueEnum<'ctx> { + match value { + Constant::Bool(v) => { + assert!(self.unifier.unioned(ty, self.top_level.primitives.bool)); + let ty = self.ctx.bool_type(); + ty.const_int(if *v { 1 } else { 0 }, false).into() + } + Constant::Int(v) => { + let ty = if self.unifier.unioned(ty, self.top_level.primitives.int32) { + self.ctx.i32_type() + } else if self.unifier.unioned(ty, self.top_level.primitives.int64) { + self.ctx.i64_type() + } else { + unreachable!(); + }; + ty.const_int(v.try_into().unwrap(), false).into() + } + Constant::Float(v) => { + assert!(self.unifier.unioned(ty, self.top_level.primitives.float)); + let ty = self.ctx.f64_type(); + ty.const_float(*v).into() + } + Constant::Tuple(v) => { + let ty = self.unifier.get_ty(ty); + let types = + if let TypeEnum::TTuple { ty } = &*ty { ty.clone() } else { unreachable!() }; + let values = zip(types.into_iter(), v.iter()) + .map(|(ty, v)| self.gen_const(v, ty)) + .collect_vec(); + let types = values.iter().map(BasicValueEnum::get_type).collect_vec(); + let ty = self.ctx.struct_type(&types, false); + ty.const_named_struct(&values).into() + } + _ => unimplemented!(), + } + } + + fn gen_int_ops( + &mut self, + op: &Operator, + lhs: BasicValueEnum<'ctx>, + rhs: BasicValueEnum<'ctx>, + ) -> BasicValueEnum<'ctx> { + let (lhs, rhs) = + if let (BasicValueEnum::IntValue(lhs), BasicValueEnum::IntValue(rhs)) = (lhs, rhs) { + (lhs, rhs) + } else { + unreachable!() + }; + match op { + Operator::Add => self.builder.build_int_add(lhs, rhs, "add").into(), + Operator::Sub => self.builder.build_int_sub(lhs, rhs, "sub").into(), + Operator::Mult => self.builder.build_int_mul(lhs, rhs, "mul").into(), + Operator::Div => { + let float = self.ctx.f64_type(); + let left = self.builder.build_signed_int_to_float(lhs, float, "i2f"); + let right = self.builder.build_signed_int_to_float(rhs, float, "i2f"); + self.builder.build_float_div(left, right, "fdiv").into() + } + Operator::Mod => self.builder.build_int_signed_rem(lhs, rhs, "mod").into(), + Operator::BitOr => self.builder.build_or(lhs, rhs, "or").into(), + Operator::BitXor => self.builder.build_xor(lhs, rhs, "xor").into(), + Operator::BitAnd => self.builder.build_and(lhs, rhs, "and").into(), + Operator::LShift => self.builder.build_left_shift(lhs, rhs, "lshift").into(), + Operator::RShift => self.builder.build_right_shift(lhs, rhs, true, "rshift").into(), + Operator::FloorDiv => self.builder.build_int_signed_div(lhs, rhs, "floordiv").into(), + // special implementation? + Operator::Pow => unimplemented!(), + Operator::MatMult => unreachable!(), + } + } + + fn gen_float_ops( + &mut self, + op: &Operator, + lhs: BasicValueEnum<'ctx>, + rhs: BasicValueEnum<'ctx>, + ) -> BasicValueEnum<'ctx> { + let (lhs, rhs) = if let (BasicValueEnum::FloatValue(lhs), BasicValueEnum::FloatValue(rhs)) = + (lhs, rhs) + { + (lhs, rhs) + } else { + unreachable!() + }; + match op { + Operator::Add => self.builder.build_float_add(lhs, rhs, "fadd").into(), + Operator::Sub => self.builder.build_float_sub(lhs, rhs, "fsub").into(), + Operator::Mult => self.builder.build_float_mul(lhs, rhs, "fmul").into(), + Operator::Div => self.builder.build_float_div(lhs, rhs, "fdiv").into(), + Operator::Mod => self.builder.build_float_rem(lhs, rhs, "fmod").into(), + Operator::FloorDiv => { + let div = self.builder.build_float_div(lhs, rhs, "fdiv"); + let floor_intrinsic = + self.module.get_function("llvm.floor.f64").unwrap_or_else(|| { + let float = self.ctx.f64_type(); + let fn_type = float.fn_type(&[float.into()], false); + self.module.add_function("llvm.floor.f64", fn_type, None) + }); + self.builder + .build_call(floor_intrinsic, &[div.into()], "floor") + .try_as_basic_value() + .left() + .unwrap() + } + // special implementation? + _ => unimplemented!(), + } + } + + pub fn gen_expr(&mut self, expr: &Expr>) -> BasicValueEnum<'ctx> { + let zero = self.ctx.i32_type().const_int(0, false); + let primitives = &self.top_level.primitives; + match &expr.node { + ExprKind::Constant { value, .. } => { + let ty = expr.custom.clone().unwrap(); + self.gen_const(value, ty) + } + ExprKind::Name { id, .. } => { + let ptr = self.var_assignment.get(id).unwrap(); + self.builder.build_load(*ptr, "load") + } + ExprKind::List { elts, .. } => { + // this shall be optimized later for constant primitive lists... + let elements = elts.iter().map(|x| self.gen_expr(x)).collect_vec(); + let ty = if elements.is_empty() { + self.ctx.i32_type().into() + } else { + elements[0].get_type() + }; + // this length includes the leading length element + let arr_ty = self.ctx.struct_type( + &[self.ctx.i32_type().into(), ty.array_type(elements.len() as u32).into()], + false, + ); + let arr_ptr = self.builder.build_alloca(arr_ty, "tmparr"); + unsafe { + let len_ptr = arr_ptr + .const_in_bounds_gep(&[zero, self.ctx.i32_type().const_int(0u64, false)]); + self.builder.build_store( + len_ptr, + self.ctx.i32_type().const_int(elements.len() as u64, false), + ); + let arr_offset = self.ctx.i32_type().const_int(1, false); + for (i, v) in elements.iter().enumerate() { + let ptr = self.builder.build_in_bounds_gep( + arr_ptr, + &[zero, arr_offset, self.ctx.i32_type().const_int(i as u64, false)], + "arr_element", + ); + self.builder.build_store(ptr, *v); + } + } + arr_ptr.into() + } + ExprKind::Tuple { elts, .. } => { + let element_val = elts.iter().map(|x| self.gen_expr(x)).collect_vec(); + let element_ty = element_val.iter().map(BasicValueEnum::get_type).collect_vec(); + let tuple_ty = self.ctx.struct_type(&element_ty, false); + let tuple_ptr = self.builder.build_alloca(tuple_ty, "tuple"); + for (i, v) in element_val.into_iter().enumerate() { + unsafe { + let ptr = tuple_ptr.const_in_bounds_gep(&[ + zero, + self.ctx.i32_type().const_int(i as u64, false), + ]); + self.builder.build_store(ptr, v); + } + } + tuple_ptr.into() + } + ExprKind::Attribute { value, attr, .. } => { + // note that we would handle class methods directly in calls + let index = self.get_attr_index(value.custom.unwrap(), attr); + let val = self.gen_expr(value); + let ptr = if let BasicValueEnum::PointerValue(v) = val { + v + } else { + unreachable!(); + }; + unsafe { + let ptr = ptr.const_in_bounds_gep(&[ + zero, + self.ctx.i32_type().const_int(index as u64, false), + ]); + self.builder.build_load(ptr, "field") + } + } + ExprKind::BoolOp { op, values } => { + // requires conditional branches for short-circuiting... + let left = if let BasicValueEnum::IntValue(left) = self.gen_expr(&values[0]) { + left + } else { + unreachable!() + }; + let current = self.builder.get_insert_block().unwrap().get_parent().unwrap(); + let a_bb = self.ctx.append_basic_block(current, "a"); + let b_bb = self.ctx.append_basic_block(current, "b"); + let cont_bb = self.ctx.append_basic_block(current, "cont"); + self.builder.build_conditional_branch(left, a_bb, b_bb); + let (a, b) = match op { + Boolop::Or => { + self.builder.position_at_end(a_bb); + let a = self.ctx.bool_type().const_int(1, false); + self.builder.build_unconditional_branch(cont_bb); + self.builder.position_at_end(b_bb); + let b = if let BasicValueEnum::IntValue(b) = self.gen_expr(&values[1]) { + b + } else { + unreachable!() + }; + self.builder.build_unconditional_branch(cont_bb); + (a, b) + } + Boolop::And => { + self.builder.position_at_end(a_bb); + let a = if let BasicValueEnum::IntValue(a) = self.gen_expr(&values[1]) { + a + } else { + unreachable!() + }; + self.builder.build_unconditional_branch(cont_bb); + self.builder.position_at_end(b_bb); + let b = self.ctx.bool_type().const_int(0, false); + self.builder.build_unconditional_branch(cont_bb); + (a, b) + } + }; + self.builder.position_at_end(cont_bb); + let phi = self.builder.build_phi(self.ctx.bool_type(), "phi"); + phi.add_incoming(&[(&a, a_bb), (&b, b_bb)]); + phi.as_basic_value() + } + ExprKind::BinOp { op, left, right } => { + let ty1 = self.unifier.get_representative(left.custom.unwrap()); + let ty2 = self.unifier.get_representative(left.custom.unwrap()); + let left = self.gen_expr(left); + let right = self.gen_expr(right); + + // we can directly compare the types, because we've got their representatives + // which would be unchanged until further unification, which we would never do + // when doing code generation for function instances + if ty1 != ty2 { + unimplemented!() + } else if [primitives.int32, primitives.int64].contains(&ty1) { + self.gen_int_ops(op, left, right) + } else if primitives.float == ty1 { + self.gen_float_ops(op, left, right) + } else { + unimplemented!() + } + } + ExprKind::UnaryOp { op, operand } => { + let ty = self.unifier.get_representative(operand.custom.unwrap()); + let val = self.gen_expr(operand); + if ty == primitives.bool { + let val = + if let BasicValueEnum::IntValue(val) = val { val } else { unreachable!() }; + match op { + ast::Unaryop::Invert | ast::Unaryop::Not => { + self.builder.build_not(val, "not").into() + } + _ => val.into(), + } + } else if [primitives.int32, primitives.int64].contains(&ty) { + let val = + if let BasicValueEnum::IntValue(val) = val { val } else { unreachable!() }; + match op { + ast::Unaryop::USub => self.builder.build_int_neg(val, "neg").into(), + ast::Unaryop::Invert => self.builder.build_not(val, "not").into(), + ast::Unaryop::Not => self + .builder + .build_int_compare( + inkwell::IntPredicate::EQ, + val, + val.get_type().const_zero(), + "not", + ) + .into(), + _ => val.into(), + } + } else if ty == primitives.float { + let val = if let BasicValueEnum::FloatValue(val) = val { + val + } else { + unreachable!() + }; + match op { + ast::Unaryop::USub => self.builder.build_float_neg(val, "neg").into(), + ast::Unaryop::Not => self + .builder + .build_float_compare( + inkwell::FloatPredicate::OEQ, + val, + val.get_type().const_zero(), + "not", + ) + .into(), + _ => val.into(), + } + } else { + unimplemented!() + } + } + ExprKind::Compare { left, ops, comparators } => { + izip!( + chain(once(left.as_ref()), comparators.iter()), + comparators.iter(), + ops.iter(), + ) + .fold(None, |prev, (lhs, rhs, op)| { + let ty = lhs.custom.unwrap(); + let current = if [primitives.int32, primitives.int64, primitives.bool] + .contains(&ty) + { + let (lhs, rhs) = + if let (BasicValueEnum::IntValue(lhs), BasicValueEnum::IntValue(rhs)) = + (self.gen_expr(lhs), self.gen_expr(rhs)) + { + (lhs, rhs) + } else { + unreachable!() + }; + let op = match op { + ast::Cmpop::Eq | ast::Cmpop::Is => inkwell::IntPredicate::EQ, + ast::Cmpop::NotEq => inkwell::IntPredicate::NE, + ast::Cmpop::Lt => inkwell::IntPredicate::SLT, + ast::Cmpop::LtE => inkwell::IntPredicate::SLE, + ast::Cmpop::Gt => inkwell::IntPredicate::SGT, + ast::Cmpop::GtE => inkwell::IntPredicate::SGE, + _ => unreachable!(), + }; + self.builder.build_int_compare(op, lhs, rhs, "cmp") + } else if ty == primitives.float { + let (lhs, rhs) = if let ( + BasicValueEnum::FloatValue(lhs), + BasicValueEnum::FloatValue(rhs), + ) = (self.gen_expr(lhs), self.gen_expr(rhs)) + { + (lhs, rhs) + } else { + unreachable!() + }; + let op = match op { + ast::Cmpop::Eq | ast::Cmpop::Is => inkwell::FloatPredicate::OEQ, + ast::Cmpop::NotEq => inkwell::FloatPredicate::ONE, + ast::Cmpop::Lt => inkwell::FloatPredicate::OLT, + ast::Cmpop::LtE => inkwell::FloatPredicate::OLE, + ast::Cmpop::Gt => inkwell::FloatPredicate::OGT, + ast::Cmpop::GtE => inkwell::FloatPredicate::OGE, + _ => unreachable!(), + }; + self.builder.build_float_compare(op, lhs, rhs, "cmp") + } else { + unimplemented!() + }; + prev.map(|v| self.builder.build_and(v, current, "cmp")).or(Some(current)) + }) + .unwrap() + .into() // as there should be at least 1 element, it should never be none + } + _ => unimplemented!(), + } + } +} diff --git a/nac3core/src/codegen/helper.rs b/nac3core/src/codegen/helper.rs new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/nac3core/src/codegen/helper.rs @@ -0,0 +1 @@ + diff --git a/nac3core/src/codegen/mod.rs b/nac3core/src/codegen/mod.rs new file mode 100644 index 00000000..89a10d8c --- /dev/null +++ b/nac3core/src/codegen/mod.rs @@ -0,0 +1,2 @@ +mod expr; +mod helper; diff --git a/nac3core/src/lib.rs b/nac3core/src/lib.rs index 1cce4c19..8d6db713 100644 --- a/nac3core/src/lib.rs +++ b/nac3core/src/lib.rs @@ -1,4 +1,6 @@ #![warn(clippy::all)] +#![allow(dead_code)] +mod codegen; +mod top_level; mod typecheck; - diff --git a/nac3core/src/typecheck/top_level.rs b/nac3core/src/top_level.rs similarity index 67% rename from nac3core/src/typecheck/top_level.rs rename to nac3core/src/top_level.rs index 776e7cb8..87d82363 100644 --- a/nac3core/src/typecheck/top_level.rs +++ b/nac3core/src/top_level.rs @@ -1,7 +1,9 @@ use std::{collections::HashMap, sync::Arc}; -use super::typedef::{SharedUnifier, Type, Unifier}; -use crossbeam::queue::SegQueue; +use super::typecheck::symbol_resolver::SymbolResolver; +use super::typecheck::type_inferencer::PrimitiveStore; +use super::typecheck::typedef::{SharedUnifier, Type, Unifier}; +use inkwell::{builder::Builder, context::Context, module::Module, values::PointerValue}; use parking_lot::RwLock; use rustpython_parser::ast::Stmt; @@ -33,24 +35,29 @@ pub enum TopLevelDef { /// variables. /// Value: AST annotated with types together with a unification table index. Could contain /// rigid type variables that would be substituted when the function is instantiated. - instance_to_stmt: HashMap, usize)>, + instance_to_stmt: HashMap>, usize)>, }, } pub struct CodeGenTask { pub subst: HashMap, pub symbol_name: String, - pub body: Stmt, + pub body: Stmt>, pub unifier: SharedUnifier, } pub struct TopLevelContext { - pub definitions: Vec>, - pub unifiers: Vec, - pub codegen_queue: SegQueue, + pub primitives: PrimitiveStore, + pub definitions: Arc>>>, + pub unifiers: Arc>>, } -pub struct WorkerContext { +pub struct CodeGenContext<'ctx> { + pub ctx: &'ctx Context, + pub builder: Builder<'ctx>, + pub module: Module<'ctx>, + pub top_level: &'ctx TopLevelContext, pub unifier: Unifier, - pub top_level_ctx: Arc>, + pub resolver: Box, + pub var_assignment: HashMap>, } diff --git a/nac3core/src/typecheck/mod.rs b/nac3core/src/typecheck/mod.rs index fe686245..e79cb65a 100644 --- a/nac3core/src/typecheck/mod.rs +++ b/nac3core/src/typecheck/mod.rs @@ -1,9 +1,7 @@ -#![allow(dead_code)] mod function_check; pub mod location; mod magic_methods; pub mod symbol_resolver; -mod top_level; pub mod type_inferencer; pub mod typedef; mod unification_table; diff --git a/nac3core/src/typecheck/type_inferencer/test.rs b/nac3core/src/typecheck/type_inferencer/test.rs index 203b2c98..3eec4b02 100644 --- a/nac3core/src/typecheck/type_inferencer/test.rs +++ b/nac3core/src/typecheck/type_inferencer/test.rs @@ -1,8 +1,8 @@ use super::super::location::Location; use super::super::symbol_resolver::*; -use super::super::top_level::DefinitionId; use super::super::typedef::*; use super::*; +use crate::top_level::DefinitionId; use indoc::indoc; use itertools::zip; use rustpython_parser::ast; @@ -490,4 +490,3 @@ fn test_primitive_magic_methods(source: &str, mapping: HashMap<&str, &str>) { assert_eq!(format!("{}: {}", k, v), format!("{}: {}", k, name)); } } - diff --git a/nac3core/src/typecheck/typedef/mod.rs b/nac3core/src/typecheck/typedef/mod.rs index 8f2567a5..93d90bf0 100644 --- a/nac3core/src/typecheck/typedef/mod.rs +++ b/nac3core/src/typecheck/typedef/mod.rs @@ -103,6 +103,11 @@ impl Unifier { Unifier { unification_table: UnificationTable::new(), var_id: 0 } } + /// Determine if the two types are the same + pub fn unioned(&mut self, a: Type, b: Type) -> bool { + self.unification_table.unioned(a, b) + } + pub fn from_shared_unifier(unifier: &SharedUnifier) -> Unifier { let lock = unifier.lock().unwrap(); Unifier { unification_table: UnificationTable::from_send(&lock.0), var_id: lock.1 } @@ -128,6 +133,10 @@ impl Unifier { }) } + pub fn get_representative(&mut self, ty: Type) -> Type { + self.unification_table.get_representative(ty) + } + pub fn add_sequence(&mut self, sequence: Mapping) -> Type { let id = self.var_id + 1; self.var_id += 1; diff --git a/nac3core/src/typecheck/typedef/test.rs b/nac3core/src/typecheck/typedef/test.rs index f05816db..f200dc18 100644 --- a/nac3core/src/typecheck/typedef/test.rs +++ b/nac3core/src/typecheck/typedef/test.rs @@ -1,8 +1,8 @@ use super::*; +use indoc::indoc; use itertools::Itertools; use std::collections::HashMap; use test_case::test_case; -use indoc::indoc; impl Unifier { /// Check whether two types are equal. @@ -335,7 +335,11 @@ fn test_virtual() { })); let bar = env.unifier.add_ty(TypeEnum::TObj { obj_id: 5, - fields: [("f".to_string(), fun), ("a".to_string(), int)].iter().cloned().collect::>().into(), + fields: [("f".to_string(), fun), ("a".to_string(), int)] + .iter() + .cloned() + .collect::>() + .into(), params: HashMap::new(), }); let v0 = env.unifier.get_fresh_var().0; @@ -515,7 +519,9 @@ fn test_instantiation() { tuple[int, list[int], float] tuple[int, list[int], list[int]] v5" - }.split('\n').collect_vec(); + } + .split('\n') + .collect_vec(); let types = types .iter() .map(|ty| { diff --git a/nac3core/src/typecheck/unification_table.rs b/nac3core/src/typecheck/unification_table.rs index 8c50b5f9..7a95a2a2 100644 --- a/nac3core/src/typecheck/unification_table.rs +++ b/nac3core/src/typecheck/unification_table.rs @@ -51,6 +51,10 @@ impl UnificationTable { self.find(a) == self.find(b) } + pub fn get_representative(&mut self, key: UnificationKey) -> UnificationKey { + UnificationKey(self.find(key)) + } + fn find(&mut self, key: UnificationKey) -> usize { let mut root = key.0; let mut parent = self.parents[root]; From 095f28468b7c3f12ed782dc66f43f2cd45e52b66 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Thu, 5 Aug 2021 16:52:41 +0800 Subject: [PATCH 076/131] added if expr --- nac3core/src/codegen/expr.rs | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/nac3core/src/codegen/expr.rs b/nac3core/src/codegen/expr.rs index 77b80980..e0f72ba8 100644 --- a/nac3core/src/codegen/expr.rs +++ b/nac3core/src/codegen/expr.rs @@ -383,6 +383,29 @@ impl<'ctx> CodeGenContext<'ctx> { .unwrap() .into() // as there should be at least 1 element, it should never be none } + ExprKind::IfExp { test, body, orelse } => { + let test = if let BasicValueEnum::IntValue(test) = self.gen_expr(test) { + test + } else { + unreachable!() + }; + + let current = self.builder.get_insert_block().unwrap().get_parent().unwrap(); + let then_bb = self.ctx.append_basic_block(current, "then"); + let else_bb = self.ctx.append_basic_block(current, "else"); + let cont_bb = self.ctx.append_basic_block(current, "cont"); + self.builder.build_conditional_branch(test, then_bb, else_bb); + self.builder.position_at_end(then_bb); + let a = self.gen_expr(body); + self.builder.build_unconditional_branch(cont_bb); + self.builder.position_at_end(else_bb); + let b = self.gen_expr(orelse); + self.builder.build_unconditional_branch(cont_bb); + self.builder.position_at_end(cont_bb); + let phi = self.builder.build_phi(a.get_type(), "ifexpr"); + phi.add_incoming(&[(&a, then_bb), (&b, else_bb)]); + phi.as_basic_value() + } _ => unimplemented!(), } } From fe260703642b014a8beeb9a09a3a1f13c1b9ec94 Mon Sep 17 00:00:00 2001 From: ychenfo Date: Thu, 5 Aug 2021 15:09:28 +0800 Subject: [PATCH 077/131] cleanup basic_test_env --- nac3core/src/typecheck/type_inferencer/test.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/nac3core/src/typecheck/type_inferencer/test.rs b/nac3core/src/typecheck/type_inferencer/test.rs index 3eec4b02..4592ec6e 100644 --- a/nac3core/src/typecheck/type_inferencer/test.rs +++ b/nac3core/src/typecheck/type_inferencer/test.rs @@ -79,9 +79,7 @@ impl TestEnvironment { fields: HashMap::new().into(), params: HashMap::new(), }); - // identifier_mapping.insert("None".into(), none); let primitives = PrimitiveStore { int32, int64, float, bool, none }; - set_primitives_magic_methods(&primitives, &mut unifier); let id_to_name = [ @@ -90,9 +88,6 @@ impl TestEnvironment { (2, "float".to_string()), (3, "bool".to_string()), (4, "none".to_string()), - (5, "Foo".to_string()), - (6, "Bar".to_string()), - (7, "Bar2".to_string()), ] .iter() .cloned() From 18db2ddd531e8cffd1a9424392fc1f2545368fe9 Mon Sep 17 00:00:00 2001 From: ychenfo Date: Fri, 6 Aug 2021 10:30:57 +0800 Subject: [PATCH 078/131] change the type TypeEnum::TObj {object_id} to DefinitionId as with top_level change TopLevelDef::Class {object_id} to DefinitionId --- nac3core/src/codegen/expr.rs | 4 +-- nac3core/src/top_level.rs | 5 ++-- nac3core/src/typecheck/magic_methods.rs | 2 +- .../src/typecheck/type_inferencer/test.rs | 26 +++++++++---------- nac3core/src/typecheck/typedef/mod.rs | 7 ++--- nac3core/src/typecheck/typedef/test.rs | 10 +++---- 6 files changed, 28 insertions(+), 26 deletions(-) diff --git a/nac3core/src/codegen/expr.rs b/nac3core/src/codegen/expr.rs index e0f72ba8..8e629f9b 100644 --- a/nac3core/src/codegen/expr.rs +++ b/nac3core/src/codegen/expr.rs @@ -13,7 +13,7 @@ impl<'ctx> CodeGenContext<'ctx> { // we cannot have other types, virtual type should be handled by function calls _ => unreachable!(), }; - let def = &self.top_level.definitions.read()[obj_id]; + let def = &self.top_level.definitions.read()[obj_id.0]; let index = if let TopLevelDef::Class { fields, .. } = &*def.read() { fields.iter().find_position(|x| x.0 == attr).unwrap().0 } else { @@ -137,7 +137,7 @@ impl<'ctx> CodeGenContext<'ctx> { let primitives = &self.top_level.primitives; match &expr.node { ExprKind::Constant { value, .. } => { - let ty = expr.custom.clone().unwrap(); + let ty = expr.custom.unwrap(); self.gen_const(value, ty) } ExprKind::Name { id, .. } => { diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index 87d82363..a861e791 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -7,12 +7,13 @@ use inkwell::{builder::Builder, context::Context, module::Module, values::Pointe use parking_lot::RwLock; use rustpython_parser::ast::Stmt; -pub struct DefinitionId(usize); +#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy)] +pub struct DefinitionId(pub usize); pub enum TopLevelDef { Class { // object ID used for TypeEnum - object_id: usize, + object_id: DefinitionId, // type variables bounded to the class. type_vars: Vec, // class fields and method signature. diff --git a/nac3core/src/typecheck/magic_methods.rs b/nac3core/src/typecheck/magic_methods.rs index 4c648699..ff28da5e 100644 --- a/nac3core/src/typecheck/magic_methods.rs +++ b/nac3core/src/typecheck/magic_methods.rs @@ -61,7 +61,7 @@ pub fn comparison_name(op: &Cmpop) -> Option<&'static str> { } } -pub fn impl_binop(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: &[Type], ret_ty: Type, ops: &[ast::Operator]) { +pub fn impl_binop(unifier: &mut Unifier, _store: &PrimitiveStore, ty: Type, other_ty: &[Type], ret_ty: Type, ops: &[ast::Operator]) { if let TypeEnum::TObj {fields, ..} = unifier.get_ty(ty).borrow() { for op in ops { fields.borrow_mut().insert( diff --git a/nac3core/src/typecheck/type_inferencer/test.rs b/nac3core/src/typecheck/type_inferencer/test.rs index 4592ec6e..d88fe6d4 100644 --- a/nac3core/src/typecheck/type_inferencer/test.rs +++ b/nac3core/src/typecheck/type_inferencer/test.rs @@ -55,27 +55,27 @@ impl TestEnvironment { let mut unifier = Unifier::new(); let int32 = unifier.add_ty(TypeEnum::TObj { - obj_id: 0, + obj_id: DefinitionId(0), fields: HashMap::new().into(), params: HashMap::new(), }); let int64 = unifier.add_ty(TypeEnum::TObj { - obj_id: 1, + obj_id: DefinitionId(1), fields: HashMap::new().into(), params: HashMap::new(), }); let float = unifier.add_ty(TypeEnum::TObj { - obj_id: 2, + obj_id: DefinitionId(2), fields: HashMap::new().into(), params: HashMap::new(), }); let bool = unifier.add_ty(TypeEnum::TObj { - obj_id: 3, + obj_id: DefinitionId(3), fields: HashMap::new().into(), params: HashMap::new(), }); let none = unifier.add_ty(TypeEnum::TObj { - obj_id: 4, + obj_id: DefinitionId(4), fields: HashMap::new().into(), params: HashMap::new(), }); @@ -120,27 +120,27 @@ impl TestEnvironment { let mut unifier = Unifier::new(); let mut identifier_mapping = HashMap::new(); let int32 = unifier.add_ty(TypeEnum::TObj { - obj_id: 0, + obj_id: DefinitionId(0), fields: HashMap::new().into(), params: HashMap::new(), }); let int64 = unifier.add_ty(TypeEnum::TObj { - obj_id: 1, + obj_id: DefinitionId(1), fields: HashMap::new().into(), params: HashMap::new(), }); let float = unifier.add_ty(TypeEnum::TObj { - obj_id: 2, + obj_id: DefinitionId(2), fields: HashMap::new().into(), params: HashMap::new(), }); let bool = unifier.add_ty(TypeEnum::TObj { - obj_id: 3, + obj_id: DefinitionId(3), fields: HashMap::new().into(), params: HashMap::new(), }); let none = unifier.add_ty(TypeEnum::TObj { - obj_id: 4, + obj_id: DefinitionId(4), fields: HashMap::new().into(), params: HashMap::new(), }); @@ -151,7 +151,7 @@ impl TestEnvironment { let (v0, id) = unifier.get_fresh_var(); let foo_ty = unifier.add_ty(TypeEnum::TObj { - obj_id: 5, + obj_id: DefinitionId(5), fields: [("a".into(), v0)].iter().cloned().collect::>().into(), params: [(id, v0)].iter().cloned().collect(), }); @@ -171,7 +171,7 @@ impl TestEnvironment { vars: Default::default(), })); let bar = unifier.add_ty(TypeEnum::TObj { - obj_id: 6, + obj_id: DefinitionId(6), fields: [("a".into(), int32), ("b".into(), fun)] .iter() .cloned() @@ -189,7 +189,7 @@ impl TestEnvironment { ); let bar2 = unifier.add_ty(TypeEnum::TObj { - obj_id: 7, + obj_id: DefinitionId(7), fields: [("a".into(), bool), ("b".into(), fun)] .iter() .cloned() diff --git a/nac3core/src/typecheck/typedef/mod.rs b/nac3core/src/typecheck/typedef/mod.rs index 93d90bf0..daf7fcfe 100644 --- a/nac3core/src/typecheck/typedef/mod.rs +++ b/nac3core/src/typecheck/typedef/mod.rs @@ -6,6 +6,7 @@ use std::iter::once; use std::rc::Rc; use std::sync::{Arc, Mutex}; +use crate::top_level::DefinitionId; use super::unification_table::{UnificationKey, UnificationTable}; #[cfg(test)] @@ -64,7 +65,7 @@ pub enum TypeEnum { ty: Type, }, TObj { - obj_id: usize, + obj_id: DefinitionId, fields: RefCell>, params: VarMap, }, @@ -433,7 +434,7 @@ impl Unifier { TObj { obj_id: id2, params: params2, .. }, ) => { if id1 != id2 { - return Err(format!("Cannot unify objects with ID {} and {}", id1, id2)); + return Err(format!("Cannot unify objects with ID {} and {}", id1.0, id2.0)); } for (x, y) in zip(params1.values(), params2.values()) { self.unify(*x, *y)?; @@ -570,7 +571,7 @@ impl Unifier { format!("virtual[{}]", self.stringify(*ty, obj_to_name, var_to_name)) } TypeEnum::TObj { obj_id, params, .. } => { - let name = obj_to_name(*obj_id); + let name = obj_to_name(obj_id.0); if !params.is_empty() { let mut params = params.values().map(|v| self.stringify(*v, obj_to_name, var_to_name)); diff --git a/nac3core/src/typecheck/typedef/test.rs b/nac3core/src/typecheck/typedef/test.rs index f200dc18..0e4a32f8 100644 --- a/nac3core/src/typecheck/typedef/test.rs +++ b/nac3core/src/typecheck/typedef/test.rs @@ -78,7 +78,7 @@ impl TestEnvironment { type_mapping.insert( "int".into(), unifier.add_ty(TypeEnum::TObj { - obj_id: 0, + obj_id: DefinitionId(0), fields: HashMap::new().into(), params: HashMap::new(), }), @@ -86,7 +86,7 @@ impl TestEnvironment { type_mapping.insert( "float".into(), unifier.add_ty(TypeEnum::TObj { - obj_id: 1, + obj_id: DefinitionId(1), fields: HashMap::new().into(), params: HashMap::new(), }), @@ -94,7 +94,7 @@ impl TestEnvironment { type_mapping.insert( "bool".into(), unifier.add_ty(TypeEnum::TObj { - obj_id: 2, + obj_id: DefinitionId(2), fields: HashMap::new().into(), params: HashMap::new(), }), @@ -103,7 +103,7 @@ impl TestEnvironment { type_mapping.insert( "Foo".into(), unifier.add_ty(TypeEnum::TObj { - obj_id: 3, + obj_id: DefinitionId(3), fields: [("a".into(), v0)].iter().cloned().collect::>().into(), params: [(id, v0)].iter().cloned().collect(), }), @@ -334,7 +334,7 @@ fn test_virtual() { vars: HashMap::new(), })); let bar = env.unifier.add_ty(TypeEnum::TObj { - obj_id: 5, + obj_id: DefinitionId(5), fields: [("f".to_string(), fun), ("a".to_string(), int)] .iter() .cloned() From c405e46b00c45241dab2c8e61c7efd9276d9d92a Mon Sep 17 00:00:00 2001 From: pca006132 Date: Sat, 7 Aug 2021 10:28:41 +0800 Subject: [PATCH 079/131] moving location and symbol_resolver out from typecheck --- nac3core/src/lib.rs | 2 ++ nac3core/src/{typecheck => }/location.rs | 0 nac3core/src/{typecheck => }/symbol_resolver.rs | 4 ++-- nac3core/src/top_level.rs | 2 +- nac3core/src/typecheck/mod.rs | 2 -- nac3core/src/typecheck/type_inferencer/mod.rs | 2 +- nac3core/src/typecheck/type_inferencer/test.rs | 4 ++-- 7 files changed, 8 insertions(+), 8 deletions(-) rename nac3core/src/{typecheck => }/location.rs (100%) rename nac3core/src/{typecheck => }/symbol_resolver.rs (90%) diff --git a/nac3core/src/lib.rs b/nac3core/src/lib.rs index 8d6db713..6d7de6f5 100644 --- a/nac3core/src/lib.rs +++ b/nac3core/src/lib.rs @@ -2,5 +2,7 @@ #![allow(dead_code)] mod codegen; +mod location; +mod symbol_resolver; mod top_level; mod typecheck; diff --git a/nac3core/src/typecheck/location.rs b/nac3core/src/location.rs similarity index 100% rename from nac3core/src/typecheck/location.rs rename to nac3core/src/location.rs diff --git a/nac3core/src/typecheck/symbol_resolver.rs b/nac3core/src/symbol_resolver.rs similarity index 90% rename from nac3core/src/typecheck/symbol_resolver.rs rename to nac3core/src/symbol_resolver.rs index fd2ad3f4..1c9f80c7 100644 --- a/nac3core/src/typecheck/symbol_resolver.rs +++ b/nac3core/src/symbol_resolver.rs @@ -1,5 +1,5 @@ -use super::location::Location; -use super::typedef::Type; +use crate::location::Location; +use crate::typecheck::typedef::Type; use crate::top_level::DefinitionId; use rustpython_parser::ast::Expr; diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index a861e791..ef2651cd 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -1,8 +1,8 @@ use std::{collections::HashMap, sync::Arc}; -use super::typecheck::symbol_resolver::SymbolResolver; use super::typecheck::type_inferencer::PrimitiveStore; use super::typecheck::typedef::{SharedUnifier, Type, Unifier}; +use crate::symbol_resolver::SymbolResolver; use inkwell::{builder::Builder, context::Context, module::Module, values::PointerValue}; use parking_lot::RwLock; use rustpython_parser::ast::Stmt; diff --git a/nac3core/src/typecheck/mod.rs b/nac3core/src/typecheck/mod.rs index e79cb65a..a75222f5 100644 --- a/nac3core/src/typecheck/mod.rs +++ b/nac3core/src/typecheck/mod.rs @@ -1,7 +1,5 @@ mod function_check; -pub mod location; mod magic_methods; -pub mod symbol_resolver; pub mod type_inferencer; pub mod typedef; mod unification_table; diff --git a/nac3core/src/typecheck/type_inferencer/mod.rs b/nac3core/src/typecheck/type_inferencer/mod.rs index 95ea2027..a3e16bfb 100644 --- a/nac3core/src/typecheck/type_inferencer/mod.rs +++ b/nac3core/src/typecheck/type_inferencer/mod.rs @@ -4,8 +4,8 @@ use std::iter::once; use std::{cell::RefCell, sync::Arc}; use super::magic_methods::*; -use super::symbol_resolver::SymbolResolver; use super::typedef::{Call, FunSignature, FuncArg, Type, TypeEnum, Unifier}; +use crate::symbol_resolver::SymbolResolver; use itertools::izip; use rustpython_parser::ast::{ self, diff --git a/nac3core/src/typecheck/type_inferencer/test.rs b/nac3core/src/typecheck/type_inferencer/test.rs index d88fe6d4..76b7515c 100644 --- a/nac3core/src/typecheck/type_inferencer/test.rs +++ b/nac3core/src/typecheck/type_inferencer/test.rs @@ -1,7 +1,7 @@ -use super::super::location::Location; -use super::super::symbol_resolver::*; use super::super::typedef::*; use super::*; +use crate::location::Location; +use crate::symbol_resolver::*; use crate::top_level::DefinitionId; use indoc::indoc; use itertools::zip; From 34d3317ea01b698e19dd65a5c5a3ee5425b9af3e Mon Sep 17 00:00:00 2001 From: pca006132 Date: Sat, 7 Aug 2021 10:41:53 +0800 Subject: [PATCH 080/131] store operation method signature --- nac3core/src/typecheck/type_inferencer/mod.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/nac3core/src/typecheck/type_inferencer/mod.rs b/nac3core/src/typecheck/type_inferencer/mod.rs index a3e16bfb..e2fdc172 100644 --- a/nac3core/src/typecheck/type_inferencer/mod.rs +++ b/nac3core/src/typecheck/type_inferencer/mod.rs @@ -182,6 +182,7 @@ impl<'a> Inferencer<'a> { fn build_method_call( &mut self, + location: Location, method: String, obj: Type, params: Vec, @@ -193,6 +194,7 @@ impl<'a> Inferencer<'a> { ret, fun: RefCell::new(None), }); + self.calls.insert(location.into(), call.clone()); let call = self.unifier.add_ty(TypeEnum::TCall(vec![call].into())); let fields = once((method, call)).collect(); let record = self.unifier.add_record(fields); @@ -477,6 +479,7 @@ impl<'a> Inferencer<'a> { let method = binop_name(op); let ret = self.unifier.get_fresh_var().0; self.build_method_call( + left.location, method.to_string(), left.custom.unwrap(), vec![right.custom.unwrap()], @@ -491,7 +494,13 @@ impl<'a> Inferencer<'a> { ) -> InferenceResult { let method = unaryop_name(op); let ret = self.unifier.get_fresh_var().0; - self.build_method_call(method.to_string(), operand.custom.unwrap(), vec![], ret) + self.build_method_call( + operand.location, + method.to_string(), + operand.custom.unwrap(), + vec![], + ret, + ) } fn infer_compare( @@ -504,7 +513,7 @@ impl<'a> Inferencer<'a> { for (a, b, c) in izip!(once(left).chain(comparators), comparators, ops) { let method = comparison_name(c).ok_or_else(|| "unsupported comparator".to_string())?.to_string(); - self.build_method_call(method, a.custom.unwrap(), vec![b.custom.unwrap()], boolean)?; + self.build_method_call(a.location, method, a.custom.unwrap(), vec![b.custom.unwrap()], boolean)?; } Ok(boolean) } From 7a38ab31192fe11db1f35f7ca0e68047c73b13da Mon Sep 17 00:00:00 2001 From: pca006132 Date: Sat, 7 Aug 2021 15:06:39 +0800 Subject: [PATCH 081/131] codegen for function call --- nac3core/src/codegen/expr.rs | 154 +++++++++++++++++--- nac3core/src/top_level.rs | 12 +- nac3core/src/typecheck/typedef/mod.rs | 2 + nac3core/src/typecheck/unification_table.rs | 2 +- 4 files changed, 143 insertions(+), 27 deletions(-) diff --git a/nac3core/src/codegen/expr.rs b/nac3core/src/codegen/expr.rs index 8e629f9b..26ec835c 100644 --- a/nac3core/src/codegen/expr.rs +++ b/nac3core/src/codegen/expr.rs @@ -1,12 +1,41 @@ use std::{convert::TryInto, iter::once}; -use crate::top_level::{CodeGenContext, TopLevelDef}; -use crate::typecheck::typedef::{Type, TypeEnum}; -use inkwell::{types::BasicType, values::BasicValueEnum}; +use crate::{ + top_level::DefinitionId, + typecheck::typedef::{Type, TypeEnum}, +}; +use crate::{ + top_level::{CodeGenContext, TopLevelDef}, + typecheck::typedef::FunSignature, +}; +use inkwell::{ + types::{BasicType, BasicTypeEnum}, + values::BasicValueEnum, + AddressSpace, +}; use itertools::{chain, izip, zip, Itertools}; use rustpython_parser::ast::{self, Boolop, Constant, Expr, ExprKind, Operator}; impl<'ctx> CodeGenContext<'ctx> { + fn get_subst_key(&mut self, obj: Option, fun: &FunSignature) -> String { + let mut vars = obj + .map(|ty| { + if let TypeEnum::TObj { params, .. } = &*self.unifier.get_ty(ty) { + params.clone() + } else { + unreachable!() + } + }) + .unwrap_or_default(); + vars.extend(fun.vars.iter()); + let sorted = vars.keys().sorted(); + sorted + .map(|id| { + self.unifier.stringify(vars[id], &mut |id| id.to_string(), &mut |id| id.to_string()) + }) + .join(", ") + } + fn get_attr_index(&mut self, ty: Type, attr: &str) -> usize { let obj_id = match &*self.unifier.get_ty(ty) { TypeEnum::TObj { obj_id, .. } => *obj_id, @@ -22,17 +51,88 @@ impl<'ctx> CodeGenContext<'ctx> { index } + fn get_llvm_type(&mut self, ty: Type) -> BasicTypeEnum<'ctx> { + use TypeEnum::*; + // we assume the type cache should already contain primitive types, + // and they should be passed by value instead of passing as pointer. + self.type_cache.get(&ty).cloned().unwrap_or_else(|| match &*self.unifier.get_ty(ty) { + TObj { obj_id, fields, .. } => { + // a struct with fields in the order of declaration + let defs = self.top_level.definitions.read(); + let definition = defs.get(obj_id.0).unwrap(); + let ty = if let TopLevelDef::Class { fields: fields_list, .. } = &*definition.read() + { + let fields = fields.borrow(); + let fields = + fields_list.iter().map(|f| self.get_llvm_type(fields[&f.0])).collect_vec(); + self.ctx + .struct_type(&fields, false) + .ptr_type(AddressSpace::Generic) + .into() + } else { + unreachable!() + }; + ty + } + TTuple { ty } => { + // a struct with fields in the order present in the tuple + let fields = ty.iter().map(|ty| self.get_llvm_type(*ty)).collect_vec(); + self.ctx.struct_type(&fields, false).ptr_type(AddressSpace::Generic).into() + } + TList { ty } => { + // a struct with an integer and a pointer to an array + let element_type = self.get_llvm_type(*ty); + let fields = [ + self.ctx.i32_type().into(), + element_type.ptr_type(AddressSpace::Generic).into(), + ]; + self.ctx.struct_type(&fields, false).ptr_type(AddressSpace::Generic).into() + } + _ => unreachable!(), + }) + } + + fn gen_call( + &mut self, + obj: Option<(Type, BasicValueEnum<'ctx>)>, + fun: (&FunSignature, DefinitionId), + params: &[BasicValueEnum<'ctx>], + ret: Type, + ) -> Option> { + let key = self.get_subst_key(obj.map(|(a, _)| a), fun.0); + let defs = self.top_level.definitions.read(); + let definition = defs.get(fun.1.0).unwrap(); + let val = if let TopLevelDef::Function { instance_to_symbol, .. } = &*definition.read() { + // TODO: codegen for function that are not yet generated + let symbol = instance_to_symbol.get(&key).unwrap(); + let fun_val = self.module.get_function(symbol).unwrap_or_else(|| { + let params = fun.0.args.iter().map(|arg| self.get_llvm_type(arg.ty)).collect_vec(); + let fun_ty = if self.unifier.unioned(ret, self.primitives.none) { + self.ctx.void_type().fn_type(¶ms, false) + } else { + self.get_llvm_type(ret).fn_type(¶ms, false) + }; + self.module.add_function(symbol, fun_ty, None) + }); + // TODO: deal with default parameters and reordering based on keys + self.builder.build_call(fun_val, params, "call").try_as_basic_value().left() + } else { + unreachable!() + }; + val + } + fn gen_const(&mut self, value: &Constant, ty: Type) -> BasicValueEnum<'ctx> { match value { Constant::Bool(v) => { - assert!(self.unifier.unioned(ty, self.top_level.primitives.bool)); + assert!(self.unifier.unioned(ty, self.primitives.bool)); let ty = self.ctx.bool_type(); ty.const_int(if *v { 1 } else { 0 }, false).into() } Constant::Int(v) => { - let ty = if self.unifier.unioned(ty, self.top_level.primitives.int32) { + let ty = if self.unifier.unioned(ty, self.primitives.int32) { self.ctx.i32_type() - } else if self.unifier.unioned(ty, self.top_level.primitives.int64) { + } else if self.unifier.unioned(ty, self.primitives.int64) { self.ctx.i64_type() } else { unreachable!(); @@ -40,7 +140,7 @@ impl<'ctx> CodeGenContext<'ctx> { ty.const_int(v.try_into().unwrap(), false).into() } Constant::Float(v) => { - assert!(self.unifier.unioned(ty, self.top_level.primitives.float)); + assert!(self.unifier.unioned(ty, self.primitives.float)); let ty = self.ctx.f64_type(); ty.const_float(*v).into() } @@ -134,7 +234,6 @@ impl<'ctx> CodeGenContext<'ctx> { pub fn gen_expr(&mut self, expr: &Expr>) -> BasicValueEnum<'ctx> { let zero = self.ctx.i32_type().const_int(0, false); - let primitives = &self.top_level.primitives; match &expr.node { ExprKind::Constant { value, .. } => { let ty = expr.custom.unwrap(); @@ -146,25 +245,36 @@ impl<'ctx> CodeGenContext<'ctx> { } ExprKind::List { elts, .. } => { // this shall be optimized later for constant primitive lists... + // we should use memcpy for that instead of generating thousands of stores let elements = elts.iter().map(|x| self.gen_expr(x)).collect_vec(); let ty = if elements.is_empty() { self.ctx.i32_type().into() } else { elements[0].get_type() }; - // this length includes the leading length element + let arr_ptr = self.builder.build_array_alloca( + ty, + self.ctx.i32_type().const_int(elements.len() as u64, false), + "tmparr", + ); let arr_ty = self.ctx.struct_type( - &[self.ctx.i32_type().into(), ty.array_type(elements.len() as u32).into()], + &[ + self.ctx.i32_type().into(), + ty.ptr_type(AddressSpace::Generic).into(), + ], false, ); - let arr_ptr = self.builder.build_alloca(arr_ty, "tmparr"); + let arr_str_ptr = self.builder.build_alloca(arr_ty, "tmparrstr"); unsafe { - let len_ptr = arr_ptr - .const_in_bounds_gep(&[zero, self.ctx.i32_type().const_int(0u64, false)]); self.builder.build_store( - len_ptr, + arr_str_ptr.const_in_bounds_gep(&[zero, zero]), self.ctx.i32_type().const_int(elements.len() as u64, false), ); + self.builder.build_store( + arr_str_ptr + .const_in_bounds_gep(&[zero, self.ctx.i32_type().const_int(1, false)]), + arr_ptr, + ); let arr_offset = self.ctx.i32_type().const_int(1, false); for (i, v) in elements.iter().enumerate() { let ptr = self.builder.build_in_bounds_gep( @@ -175,7 +285,7 @@ impl<'ctx> CodeGenContext<'ctx> { self.builder.build_store(ptr, *v); } } - arr_ptr.into() + arr_str_ptr.into() } ExprKind::Tuple { elts, .. } => { let element_val = elts.iter().map(|x| self.gen_expr(x)).collect_vec(); @@ -266,9 +376,9 @@ impl<'ctx> CodeGenContext<'ctx> { // when doing code generation for function instances if ty1 != ty2 { unimplemented!() - } else if [primitives.int32, primitives.int64].contains(&ty1) { + } else if [self.primitives.int32, self.primitives.int64].contains(&ty1) { self.gen_int_ops(op, left, right) - } else if primitives.float == ty1 { + } else if self.primitives.float == ty1 { self.gen_float_ops(op, left, right) } else { unimplemented!() @@ -277,7 +387,7 @@ impl<'ctx> CodeGenContext<'ctx> { ExprKind::UnaryOp { op, operand } => { let ty = self.unifier.get_representative(operand.custom.unwrap()); let val = self.gen_expr(operand); - if ty == primitives.bool { + if ty == self.primitives.bool { let val = if let BasicValueEnum::IntValue(val) = val { val } else { unreachable!() }; match op { @@ -286,7 +396,7 @@ impl<'ctx> CodeGenContext<'ctx> { } _ => val.into(), } - } else if [primitives.int32, primitives.int64].contains(&ty) { + } else if [self.primitives.int32, self.primitives.int64].contains(&ty) { let val = if let BasicValueEnum::IntValue(val) = val { val } else { unreachable!() }; match op { @@ -303,7 +413,7 @@ impl<'ctx> CodeGenContext<'ctx> { .into(), _ => val.into(), } - } else if ty == primitives.float { + } else if ty == self.primitives.float { let val = if let BasicValueEnum::FloatValue(val) = val { val } else { @@ -334,7 +444,7 @@ impl<'ctx> CodeGenContext<'ctx> { ) .fold(None, |prev, (lhs, rhs, op)| { let ty = lhs.custom.unwrap(); - let current = if [primitives.int32, primitives.int64, primitives.bool] + let current = if [self.primitives.int32, self.primitives.int64, self.primitives.bool] .contains(&ty) { let (lhs, rhs) = @@ -355,7 +465,7 @@ impl<'ctx> CodeGenContext<'ctx> { _ => unreachable!(), }; self.builder.build_int_compare(op, lhs, rhs, "cmp") - } else if ty == primitives.float { + } else if ty == self.primitives.float { let (lhs, rhs) = if let ( BasicValueEnum::FloatValue(lhs), BasicValueEnum::FloatValue(rhs), diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index ef2651cd..e205af28 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -3,7 +3,7 @@ use std::{collections::HashMap, sync::Arc}; use super::typecheck::type_inferencer::PrimitiveStore; use super::typecheck::typedef::{SharedUnifier, Type, Unifier}; use crate::symbol_resolver::SymbolResolver; -use inkwell::{builder::Builder, context::Context, module::Module, values::PointerValue}; +use inkwell::{builder::Builder, context::Context, module::Module, types::BasicTypeEnum, values::PointerValue}; use parking_lot::RwLock; use rustpython_parser::ast::Stmt; @@ -16,14 +16,17 @@ pub enum TopLevelDef { object_id: DefinitionId, // type variables bounded to the class. type_vars: Vec, - // class fields and method signature. + // class fields fields: Vec<(String, Type)>, // class methods, pointing to the corresponding function definition. - methods: Vec<(String, DefinitionId)>, + methods: Vec<(String, Type, DefinitionId)>, // ancestor classes, including itself. ancestors: Vec, }, Function { + // prefix for symbol, should be unique globally, and not ending with numbers + name: String, + // function signature. signature: Type, /// Function instance to symbol mapping /// Key: string representation of type variable values, sorted by variable ID in ascending @@ -48,7 +51,6 @@ pub struct CodeGenTask { } pub struct TopLevelContext { - pub primitives: PrimitiveStore, pub definitions: Arc>>>, pub unifiers: Arc>>, } @@ -61,4 +63,6 @@ pub struct CodeGenContext<'ctx> { pub unifier: Unifier, pub resolver: Box, pub var_assignment: HashMap>, + pub type_cache: HashMap>, + pub primitives: PrimitiveStore, } diff --git a/nac3core/src/typecheck/typedef/mod.rs b/nac3core/src/typecheck/typedef/mod.rs index daf7fcfe..c880b6c2 100644 --- a/nac3core/src/typecheck/typedef/mod.rs +++ b/nac3core/src/typecheck/typedef/mod.rs @@ -30,6 +30,8 @@ pub struct Call { pub struct FuncArg { pub name: String, pub ty: Type, + // TODO: change this to an optional value + // for primitive types pub is_optional: bool, } diff --git a/nac3core/src/typecheck/unification_table.rs b/nac3core/src/typecheck/unification_table.rs index 7a95a2a2..7475afce 100644 --- a/nac3core/src/typecheck/unification_table.rs +++ b/nac3core/src/typecheck/unification_table.rs @@ -1,6 +1,6 @@ use std::rc::Rc; -#[derive(Copy, Clone, PartialEq, Eq, Debug)] +#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)] pub struct UnificationKey(usize); pub struct UnificationTable { From 711482d09cc86fd86949da61caa9c8ddd488b635 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Sat, 7 Aug 2021 15:30:03 +0800 Subject: [PATCH 082/131] expr codegen cleanup --- nac3core/src/codegen/expr.rs | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/nac3core/src/codegen/expr.rs b/nac3core/src/codegen/expr.rs index 26ec835c..09a250aa 100644 --- a/nac3core/src/codegen/expr.rs +++ b/nac3core/src/codegen/expr.rs @@ -88,6 +88,7 @@ impl<'ctx> CodeGenContext<'ctx> { ]; self.ctx.struct_type(&fields, false).ptr_type(AddressSpace::Generic).into() } + TVirtual { .. } => unimplemented!(), _ => unreachable!(), }) } @@ -103,8 +104,10 @@ impl<'ctx> CodeGenContext<'ctx> { let defs = self.top_level.definitions.read(); let definition = defs.get(fun.1.0).unwrap(); let val = if let TopLevelDef::Function { instance_to_symbol, .. } = &*definition.read() { - // TODO: codegen for function that are not yet generated - let symbol = instance_to_symbol.get(&key).unwrap(); + let symbol = instance_to_symbol.get(&key).unwrap_or_else(|| { + // TODO: codegen for function that are not yet generated + unimplemented!() + }); let fun_val = self.module.get_function(symbol).unwrap_or_else(|| { let params = fun.0.args.iter().map(|arg| self.get_llvm_type(arg.ty)).collect_vec(); let fun_ty = if self.unifier.unioned(ret, self.primitives.none) { @@ -155,7 +158,7 @@ impl<'ctx> CodeGenContext<'ctx> { let ty = self.ctx.struct_type(&types, false); ty.const_named_struct(&values).into() } - _ => unimplemented!(), + _ => unreachable!() } } @@ -374,11 +377,9 @@ impl<'ctx> CodeGenContext<'ctx> { // we can directly compare the types, because we've got their representatives // which would be unchanged until further unification, which we would never do // when doing code generation for function instances - if ty1 != ty2 { - unimplemented!() - } else if [self.primitives.int32, self.primitives.int64].contains(&ty1) { + if ty1 == ty2 && [self.primitives.int32, self.primitives.int64].contains(&ty1) { self.gen_int_ops(op, left, right) - } else if self.primitives.float == ty1 { + } else if ty1 == ty2 && self.primitives.float == ty1 { self.gen_float_ops(op, left, right) } else { unimplemented!() From 86ca02796b5752fc800a5aa56c57df0cb1df57dd Mon Sep 17 00:00:00 2001 From: pca006132 Date: Sat, 7 Aug 2021 17:25:14 +0800 Subject: [PATCH 083/131] function parameter handling --- nac3core/src/codegen/expr.rs | 122 ++++++++++-------- nac3core/src/symbol_resolver.rs | 8 +- nac3core/src/typecheck/magic_methods.rs | 32 ++--- nac3core/src/typecheck/type_inferencer/mod.rs | 10 +- nac3core/src/typecheck/typedef/mod.rs | 11 +- 5 files changed, 99 insertions(+), 84 deletions(-) diff --git a/nac3core/src/codegen/expr.rs b/nac3core/src/codegen/expr.rs index 09a250aa..f93975ab 100644 --- a/nac3core/src/codegen/expr.rs +++ b/nac3core/src/codegen/expr.rs @@ -1,12 +1,9 @@ -use std::{convert::TryInto, iter::once}; +use std::{collections::HashMap, convert::TryInto, iter::once}; use crate::{ - top_level::DefinitionId, - typecheck::typedef::{Type, TypeEnum}, -}; -use crate::{ - top_level::{CodeGenContext, TopLevelDef}, - typecheck::typedef::FunSignature, + symbol_resolver::SymbolValue, + top_level::{CodeGenContext, DefinitionId, TopLevelDef}, + typecheck::typedef::{FunSignature, Type, TypeEnum}, }; use inkwell::{ types::{BasicType, BasicTypeEnum}, @@ -65,10 +62,7 @@ impl<'ctx> CodeGenContext<'ctx> { let fields = fields.borrow(); let fields = fields_list.iter().map(|f| self.get_llvm_type(fields[&f.0])).collect_vec(); - self.ctx - .struct_type(&fields, false) - .ptr_type(AddressSpace::Generic) - .into() + self.ctx.struct_type(&fields, false).ptr_type(AddressSpace::Generic).into() } else { unreachable!() }; @@ -93,16 +87,20 @@ impl<'ctx> CodeGenContext<'ctx> { }) } + fn gen_symbol_val(&mut self, val: &SymbolValue) -> BasicValueEnum<'ctx> { + unimplemented!() + } + fn gen_call( &mut self, obj: Option<(Type, BasicValueEnum<'ctx>)>, fun: (&FunSignature, DefinitionId), - params: &[BasicValueEnum<'ctx>], + params: Vec<(Option, BasicValueEnum<'ctx>)>, ret: Type, ) -> Option> { let key = self.get_subst_key(obj.map(|(a, _)| a), fun.0); let defs = self.top_level.definitions.read(); - let definition = defs.get(fun.1.0).unwrap(); + let definition = defs.get(fun.1 .0).unwrap(); let val = if let TopLevelDef::Function { instance_to_symbol, .. } = &*definition.read() { let symbol = instance_to_symbol.get(&key).unwrap_or_else(|| { // TODO: codegen for function that are not yet generated @@ -117,8 +115,19 @@ impl<'ctx> CodeGenContext<'ctx> { }; self.module.add_function(symbol, fun_ty, None) }); - // TODO: deal with default parameters and reordering based on keys - self.builder.build_call(fun_val, params, "call").try_as_basic_value().left() + let mut keys = fun.0.args.clone(); + let mut mapping = HashMap::new(); + for (key, value) in params.into_iter() { + mapping.insert(key.unwrap_or_else(|| keys.remove(0).name), value); + } + // default value handling + for k in keys.into_iter() { + mapping.insert(k.name, self.gen_symbol_val(&k.default_value.unwrap())); + } + // reorder the parameters + let params = + fun.0.args.iter().map(|arg| mapping.remove(&arg.name).unwrap()).collect_vec(); + self.builder.build_call(fun_val, ¶ms, "call").try_as_basic_value().left() } else { unreachable!() }; @@ -158,7 +167,7 @@ impl<'ctx> CodeGenContext<'ctx> { let ty = self.ctx.struct_type(&types, false); ty.const_named_struct(&values).into() } - _ => unreachable!() + _ => unreachable!(), } } @@ -261,10 +270,7 @@ impl<'ctx> CodeGenContext<'ctx> { "tmparr", ); let arr_ty = self.ctx.struct_type( - &[ - self.ctx.i32_type().into(), - ty.ptr_type(AddressSpace::Generic).into(), - ], + &[self.ctx.i32_type().into(), ty.ptr_type(AddressSpace::Generic).into()], false, ); let arr_str_ptr = self.builder.build_alloca(arr_ty, "tmparrstr"); @@ -445,50 +451,52 @@ impl<'ctx> CodeGenContext<'ctx> { ) .fold(None, |prev, (lhs, rhs, op)| { let ty = lhs.custom.unwrap(); - let current = if [self.primitives.int32, self.primitives.int64, self.primitives.bool] - .contains(&ty) - { - let (lhs, rhs) = - if let (BasicValueEnum::IntValue(lhs), BasicValueEnum::IntValue(rhs)) = - (self.gen_expr(lhs), self.gen_expr(rhs)) + let current = + if [self.primitives.int32, self.primitives.int64, self.primitives.bool] + .contains(&ty) + { + let (lhs, rhs) = if let ( + BasicValueEnum::IntValue(lhs), + BasicValueEnum::IntValue(rhs), + ) = (self.gen_expr(lhs), self.gen_expr(rhs)) { (lhs, rhs) } else { unreachable!() }; - let op = match op { - ast::Cmpop::Eq | ast::Cmpop::Is => inkwell::IntPredicate::EQ, - ast::Cmpop::NotEq => inkwell::IntPredicate::NE, - ast::Cmpop::Lt => inkwell::IntPredicate::SLT, - ast::Cmpop::LtE => inkwell::IntPredicate::SLE, - ast::Cmpop::Gt => inkwell::IntPredicate::SGT, - ast::Cmpop::GtE => inkwell::IntPredicate::SGE, - _ => unreachable!(), - }; - self.builder.build_int_compare(op, lhs, rhs, "cmp") - } else if ty == self.primitives.float { - let (lhs, rhs) = if let ( - BasicValueEnum::FloatValue(lhs), - BasicValueEnum::FloatValue(rhs), - ) = (self.gen_expr(lhs), self.gen_expr(rhs)) - { - (lhs, rhs) + let op = match op { + ast::Cmpop::Eq | ast::Cmpop::Is => inkwell::IntPredicate::EQ, + ast::Cmpop::NotEq => inkwell::IntPredicate::NE, + ast::Cmpop::Lt => inkwell::IntPredicate::SLT, + ast::Cmpop::LtE => inkwell::IntPredicate::SLE, + ast::Cmpop::Gt => inkwell::IntPredicate::SGT, + ast::Cmpop::GtE => inkwell::IntPredicate::SGE, + _ => unreachable!(), + }; + self.builder.build_int_compare(op, lhs, rhs, "cmp") + } else if ty == self.primitives.float { + let (lhs, rhs) = if let ( + BasicValueEnum::FloatValue(lhs), + BasicValueEnum::FloatValue(rhs), + ) = (self.gen_expr(lhs), self.gen_expr(rhs)) + { + (lhs, rhs) + } else { + unreachable!() + }; + let op = match op { + ast::Cmpop::Eq | ast::Cmpop::Is => inkwell::FloatPredicate::OEQ, + ast::Cmpop::NotEq => inkwell::FloatPredicate::ONE, + ast::Cmpop::Lt => inkwell::FloatPredicate::OLT, + ast::Cmpop::LtE => inkwell::FloatPredicate::OLE, + ast::Cmpop::Gt => inkwell::FloatPredicate::OGT, + ast::Cmpop::GtE => inkwell::FloatPredicate::OGE, + _ => unreachable!(), + }; + self.builder.build_float_compare(op, lhs, rhs, "cmp") } else { - unreachable!() + unimplemented!() }; - let op = match op { - ast::Cmpop::Eq | ast::Cmpop::Is => inkwell::FloatPredicate::OEQ, - ast::Cmpop::NotEq => inkwell::FloatPredicate::ONE, - ast::Cmpop::Lt => inkwell::FloatPredicate::OLT, - ast::Cmpop::LtE => inkwell::FloatPredicate::OLE, - ast::Cmpop::Gt => inkwell::FloatPredicate::OGT, - ast::Cmpop::GtE => inkwell::FloatPredicate::OGE, - _ => unreachable!(), - }; - self.builder.build_float_compare(op, lhs, rhs, "cmp") - } else { - unimplemented!() - }; prev.map(|v| self.builder.build_and(v, current, "cmp")).or(Some(current)) }) .unwrap() diff --git a/nac3core/src/symbol_resolver.rs b/nac3core/src/symbol_resolver.rs index 1c9f80c7..86a43ad7 100644 --- a/nac3core/src/symbol_resolver.rs +++ b/nac3core/src/symbol_resolver.rs @@ -3,13 +3,15 @@ use crate::typecheck::typedef::Type; use crate::top_level::DefinitionId; use rustpython_parser::ast::Expr; -pub enum SymbolValue<'a> { +#[derive(Clone, PartialEq)] +pub enum SymbolValue { I32(i32), I64(i64), Double(f64), Bool(bool), - Tuple(&'a [SymbolValue<'a>]), - Bytes(&'a [u8]), + Tuple(Vec), + // we should think about how to implement bytes later... + // Bytes(&'a [u8]), } pub trait SymbolResolver { diff --git a/nac3core/src/typecheck/magic_methods.rs b/nac3core/src/typecheck/magic_methods.rs index ff28da5e..9d374394 100644 --- a/nac3core/src/typecheck/magic_methods.rs +++ b/nac3core/src/typecheck/magic_methods.rs @@ -77,7 +77,7 @@ pub fn impl_binop(unifier: &mut Unifier, _store: &PrimitiveStore, ty: Type, othe vars: HashMap::new(), args: vec![FuncArg { ty: other, - is_optional: false, + default_value: None, name: "other".into() }] })) @@ -97,7 +97,7 @@ pub fn impl_binop(unifier: &mut Unifier, _store: &PrimitiveStore, ty: Type, othe vars: HashMap::new(), args: vec![FuncArg { ty: other, - is_optional: false, + default_value: None, name: "other".into() }] })) @@ -132,7 +132,7 @@ pub fn impl_cmpop(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other vars: HashMap::new(), args: vec![FuncArg { ty: other_ty, - is_optional: false, + default_value: None, name: "other".into() }] })) @@ -144,15 +144,15 @@ pub fn impl_cmpop(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other /// Add, Sub, Mult, Pow pub fn impl_basic_arithmetic(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: &[Type], ret_ty: Type) { impl_binop(unifier, store, ty, other_ty, ret_ty, &[ - ast::Operator::Add, - ast::Operator::Sub, + ast::Operator::Add, + ast::Operator::Sub, ast::Operator::Mult, ]) } pub fn impl_pow(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: &[Type], ret_ty: Type) { impl_binop(unifier, store, ty, other_ty, ret_ty, &[ - ast::Operator::Pow, + ast::Operator::Pow, ]) } @@ -236,9 +236,9 @@ pub fn impl_eq(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type) { pub fn set_primitives_magic_methods(store: &PrimitiveStore, unifier: &mut Unifier) { let PrimitiveStore { - int32: int32_t, - int64: int64_t, - float: float_t, + int32: int32_t, + int64: int64_t, + float: float_t, bool: bool_t, .. } = *store; @@ -255,8 +255,8 @@ pub fn set_primitives_magic_methods(store: &PrimitiveStore, unifier: &mut Unifie impl_not(unifier, store, int32_t); impl_comparison(unifier, store, int32_t, int32_t); impl_eq(unifier, store, int32_t); - - /* int64 ======== */ + + /* int64 ======== */ impl_basic_arithmetic(unifier, store, int64_t, &[int64_t], int64_t); impl_pow(unifier, store, int64_t, &[int64_t], int64_t); impl_bitwise_arithmetic(unifier, store, int64_t); @@ -269,8 +269,8 @@ pub fn set_primitives_magic_methods(store: &PrimitiveStore, unifier: &mut Unifie impl_not(unifier, store, int64_t); impl_comparison(unifier, store, int64_t, int64_t); impl_eq(unifier, store, int64_t); - - /* float ======== */ + + /* float ======== */ impl_basic_arithmetic(unifier, store, float_t, &[float_t], float_t); impl_pow(unifier, store, float_t, &[int32_t, float_t], float_t); impl_div(unifier, store, float_t, &[float_t]); @@ -280,8 +280,8 @@ pub fn set_primitives_magic_methods(store: &PrimitiveStore, unifier: &mut Unifie impl_not(unifier, store, float_t); impl_comparison(unifier, store, float_t, float_t); impl_eq(unifier, store, float_t); - - /* bool ======== */ + + /* bool ======== */ impl_not(unifier, store, bool_t); impl_eq(unifier, store, bool_t); -} \ No newline at end of file +} diff --git a/nac3core/src/typecheck/type_inferencer/mod.rs b/nac3core/src/typecheck/type_inferencer/mod.rs index e2fdc172..7f5bcbc8 100644 --- a/nac3core/src/typecheck/type_inferencer/mod.rs +++ b/nac3core/src/typecheck/type_inferencer/mod.rs @@ -240,7 +240,7 @@ impl<'a> Inferencer<'a> { let fun = FunSignature { args: fn_args .iter() - .map(|(k, ty)| FuncArg { name: k.clone(), ty: *ty, is_optional: false }) + .map(|(k, ty)| FuncArg { name: k.clone(), ty: *ty, default_value: None }) .collect(), ret, vars: Default::default(), @@ -513,7 +513,13 @@ impl<'a> Inferencer<'a> { for (a, b, c) in izip!(once(left).chain(comparators), comparators, ops) { let method = comparison_name(c).ok_or_else(|| "unsupported comparator".to_string())?.to_string(); - self.build_method_call(a.location, method, a.custom.unwrap(), vec![b.custom.unwrap()], boolean)?; + self.build_method_call( + a.location, + method, + a.custom.unwrap(), + vec![b.custom.unwrap()], + boolean, + )?; } Ok(boolean) } diff --git a/nac3core/src/typecheck/typedef/mod.rs b/nac3core/src/typecheck/typedef/mod.rs index c880b6c2..38e2a9ff 100644 --- a/nac3core/src/typecheck/typedef/mod.rs +++ b/nac3core/src/typecheck/typedef/mod.rs @@ -6,8 +6,9 @@ use std::iter::once; use std::rc::Rc; use std::sync::{Arc, Mutex}; -use crate::top_level::DefinitionId; use super::unification_table::{UnificationKey, UnificationTable}; +use crate::symbol_resolver::SymbolValue; +use crate::top_level::DefinitionId; #[cfg(test)] mod test; @@ -30,9 +31,7 @@ pub struct Call { pub struct FuncArg { pub name: String, pub ty: Type, - // TODO: change this to an optional value - // for primitive types - pub is_optional: bool, + pub default_value: Option, } #[derive(Clone)] @@ -457,7 +456,7 @@ impl Unifier { let required: Vec = signature .args .iter() - .filter(|v| !v.is_optional) + .filter(|v| v.default_value.is_none()) .map(|v| v.name.clone()) .rev() .collect(); @@ -516,7 +515,7 @@ impl Unifier { if x.name != y.name { return Err("Functions differ in parameter names.".to_string()); } - if x.is_optional != y.is_optional { + if x.default_value != y.default_value { return Err("Functions differ in optional parameters.".to_string()); } self.unify(x.ty, y.ty)?; From 057fcfe3dfb771b2b790b236fec4afb46891eaeb Mon Sep 17 00:00:00 2001 From: pca006132 Date: Sat, 7 Aug 2021 17:31:01 +0800 Subject: [PATCH 084/131] default parameter value generation --- nac3core/src/codegen/expr.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/nac3core/src/codegen/expr.rs b/nac3core/src/codegen/expr.rs index f93975ab..ccba23c9 100644 --- a/nac3core/src/codegen/expr.rs +++ b/nac3core/src/codegen/expr.rs @@ -88,7 +88,16 @@ impl<'ctx> CodeGenContext<'ctx> { } fn gen_symbol_val(&mut self, val: &SymbolValue) -> BasicValueEnum<'ctx> { - unimplemented!() + match val { + SymbolValue::I32(v) => self.ctx.i32_type().const_int(*v as u64, true).into(), + SymbolValue::I64(v) => self.ctx.i64_type().const_int(*v as u64, true).into(), + SymbolValue::Bool(v) => self.ctx.bool_type().const_int(*v as u64, true).into(), + SymbolValue::Double(v) => self.ctx.f64_type().const_float(*v).into(), + SymbolValue::Tuple(ls) => { + let vals = ls.iter().map(|v| self.gen_symbol_val(v)).collect_vec(); + self.ctx.const_struct(&vals, false).into() + } + } } fn gen_call( From 1ffb7920000ebd4ebed1968cd4e86c834de00208 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Sat, 7 Aug 2021 17:41:48 +0800 Subject: [PATCH 085/131] make tuple a ptr to a struct instead of a struct --- nac3core/src/codegen/expr.rs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/nac3core/src/codegen/expr.rs b/nac3core/src/codegen/expr.rs index ccba23c9..ac37dcdd 100644 --- a/nac3core/src/codegen/expr.rs +++ b/nac3core/src/codegen/expr.rs @@ -95,7 +95,20 @@ impl<'ctx> CodeGenContext<'ctx> { SymbolValue::Double(v) => self.ctx.f64_type().const_float(*v).into(), SymbolValue::Tuple(ls) => { let vals = ls.iter().map(|v| self.gen_symbol_val(v)).collect_vec(); - self.ctx.const_struct(&vals, false).into() + let fields = vals.iter().map(|v| v.get_type()).collect_vec(); + let ty = self.ctx.struct_type(&fields, false); + let ptr = self.builder.build_alloca(ty, "tuple"); + let zero = self.ctx.i32_type().const_zero(); + unsafe { + for (i, val) in vals.into_iter().enumerate() { + let p = ptr.const_in_bounds_gep(&[ + zero, + self.ctx.i32_type().const_int(i as u64, false), + ]); + self.builder.build_store(p, val); + } + } + ptr.into() } } } From d8c713ce3df7ca4505286a348a5bd3999bfc54bc Mon Sep 17 00:00:00 2001 From: pca006132 Date: Mon, 9 Aug 2021 15:39:50 +0800 Subject: [PATCH 086/131] assignment statement --- nac3core/src/codegen/expr.rs | 16 +++++-- nac3core/src/codegen/helper.rs | 1 - nac3core/src/codegen/mod.rs | 2 +- nac3core/src/codegen/stmt.rs | 88 ++++++++++++++++++++++++++++++++++ 4 files changed, 101 insertions(+), 6 deletions(-) delete mode 100644 nac3core/src/codegen/helper.rs create mode 100644 nac3core/src/codegen/stmt.rs diff --git a/nac3core/src/codegen/expr.rs b/nac3core/src/codegen/expr.rs index ac37dcdd..6f03f0d2 100644 --- a/nac3core/src/codegen/expr.rs +++ b/nac3core/src/codegen/expr.rs @@ -33,7 +33,7 @@ impl<'ctx> CodeGenContext<'ctx> { .join(", ") } - fn get_attr_index(&mut self, ty: Type, attr: &str) -> usize { + pub fn get_attr_index(&mut self, ty: Type, attr: &str) -> usize { let obj_id = match &*self.unifier.get_ty(ty) { TypeEnum::TObj { obj_id, .. } => *obj_id, // we cannot have other types, virtual type should be handled by function calls @@ -48,7 +48,7 @@ impl<'ctx> CodeGenContext<'ctx> { index } - fn get_llvm_type(&mut self, ty: Type) -> BasicTypeEnum<'ctx> { + pub fn get_llvm_type(&mut self, ty: Type) -> BasicTypeEnum<'ctx> { use TypeEnum::*; // we assume the type cache should already contain primitive types, // and they should be passed by value instead of passing as pointer. @@ -275,7 +275,15 @@ impl<'ctx> CodeGenContext<'ctx> { } ExprKind::Name { id, .. } => { let ptr = self.var_assignment.get(id).unwrap(); - self.builder.build_load(*ptr, "load") + let primitives = &self.primitives; + // we should only dereference primitive types + if [primitives.int32, primitives.int64, primitives.float, primitives.bool] + .contains(&self.unifier.get_representative(expr.custom.unwrap())) + { + self.builder.build_load(*ptr, "load") + } else { + (*ptr).into() + } } ExprKind::List { elts, .. } => { // this shall be optimized later for constant primitive lists... @@ -472,7 +480,7 @@ impl<'ctx> CodeGenContext<'ctx> { ops.iter(), ) .fold(None, |prev, (lhs, rhs, op)| { - let ty = lhs.custom.unwrap(); + let ty = self.unifier.get_representative(lhs.custom.unwrap()); let current = if [self.primitives.int32, self.primitives.int64, self.primitives.bool] .contains(&ty) diff --git a/nac3core/src/codegen/helper.rs b/nac3core/src/codegen/helper.rs deleted file mode 100644 index 8b137891..00000000 --- a/nac3core/src/codegen/helper.rs +++ /dev/null @@ -1 +0,0 @@ - diff --git a/nac3core/src/codegen/mod.rs b/nac3core/src/codegen/mod.rs index 89a10d8c..95ee4bbf 100644 --- a/nac3core/src/codegen/mod.rs +++ b/nac3core/src/codegen/mod.rs @@ -1,2 +1,2 @@ mod expr; -mod helper; +mod stmt; diff --git a/nac3core/src/codegen/stmt.rs b/nac3core/src/codegen/stmt.rs new file mode 100644 index 00000000..1fa65060 --- /dev/null +++ b/nac3core/src/codegen/stmt.rs @@ -0,0 +1,88 @@ +use std::convert::TryInto; + +use crate::{top_level::CodeGenContext, typecheck::typedef::Type}; +use inkwell::{ + types::BasicTypeEnum, + values::{BasicValueEnum, PointerValue}, +}; +use rustpython_parser::ast::{Expr, ExprKind, Stmt, StmtKind}; + +impl<'ctx> CodeGenContext<'ctx> { + fn gen_var(&mut self, ty: Type) -> PointerValue<'ctx> { + let ty = self.get_llvm_type(ty); + let ty = if let BasicTypeEnum::PointerType(ty) = ty { + ty.get_element_type().try_into().unwrap() + } else { + ty + }; + self.builder.build_alloca(ty, "tmp") + } + + fn parse_pattern(&mut self, pattern: &Expr>) -> PointerValue<'ctx> { + // very similar to gen_expr, but we don't do an extra load at the end + // and we flatten nested tuples + match &pattern.node { + ExprKind::Name { id, .. } => { + self.var_assignment.get(id).cloned().unwrap_or_else(|| { + let ptr = self.gen_var(pattern.custom.unwrap()); + self.var_assignment.insert(id.clone(), ptr); + ptr + }) + } + ExprKind::Attribute { value, attr, .. } => { + let index = self.get_attr_index(value.custom.unwrap(), attr); + let val = self.gen_expr(value); + let ptr = if let BasicValueEnum::PointerValue(v) = val { + v + } else { + unreachable!(); + }; + unsafe { + ptr.const_in_bounds_gep(&[ + self.ctx.i32_type().const_zero(), + self.ctx.i32_type().const_int(index as u64, false), + ]) + } + } + ExprKind::Subscript { .. } => unimplemented!(), + _ => unreachable!(), + } + } + + fn gen_assignment(&mut self, target: &Expr>, value: BasicValueEnum<'ctx>) { + if let ExprKind::Tuple { elts, .. } = &target.node { + if let BasicValueEnum::PointerValue(ptr) = value { + for (i, elt) in elts.iter().enumerate() { + unsafe { + let t = ptr.const_in_bounds_gep(&[ + self.ctx.i32_type().const_zero(), + self.ctx.i32_type().const_int(i as u64, false), + ]); + let v = self.builder.build_load(t, "tmpload"); + self.gen_assignment(elt, v); + } + } + } else { + unreachable!() + } + } else { + let ptr = self.parse_pattern(target); + self.builder.build_store(ptr, value); + } + } + + pub fn gen_stmt(&mut self, stmt: &Stmt>) { + match &stmt.node { + StmtKind::Expr { value } => { + self.gen_expr(&value); + } + StmtKind::Assign { targets, value, .. } => { + let value = self.gen_expr(&value); + for target in targets.iter() { + self.gen_assignment(target, value); + } + } + _ => unimplemented!(), + } + } +} From 7a90ff5791e5801bc76f680da4fea8082b3b7792 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Mon, 9 Aug 2021 16:10:17 +0800 Subject: [PATCH 087/131] while loop constructs --- nac3core/src/codegen/stmt.rs | 42 ++++++++++++++++++++++++++++++++++++ nac3core/src/top_level.rs | 8 ++++++- 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/nac3core/src/codegen/stmt.rs b/nac3core/src/codegen/stmt.rs index 1fa65060..897cf1b7 100644 --- a/nac3core/src/codegen/stmt.rs +++ b/nac3core/src/codegen/stmt.rs @@ -82,6 +82,48 @@ impl<'ctx> CodeGenContext<'ctx> { self.gen_assignment(target, value); } } + StmtKind::Continue => { + self.builder.build_unconditional_branch(self.loop_bb.unwrap().0); + } + StmtKind::Break => { + self.builder.build_unconditional_branch(self.loop_bb.unwrap().1); + } + StmtKind::While { test, body, orelse } => { + let current = self.builder.get_insert_block().unwrap().get_parent().unwrap(); + let test_bb = self.ctx.append_basic_block(current, "test"); + let body_bb = self.ctx.append_basic_block(current, "body"); + let cont_bb = self.ctx.append_basic_block(current, "cont"); + // if there is no orelse, we just go to cont_bb + let orelse_bb = if orelse.is_empty() { + cont_bb + } else { + self.ctx.append_basic_block(current, "orelse") + }; + // store loop bb information and restore it later + let loop_bb = self.loop_bb.replace((test_bb, cont_bb)); + self.builder.build_unconditional_branch(test_bb); + self.builder.position_at_end(test_bb); + let test = self.gen_expr(test); + if let BasicValueEnum::IntValue(test) = test { + self.builder.build_conditional_branch(test, body_bb, orelse_bb); + } else { + unreachable!() + }; + self.builder.position_at_end(body_bb); + for stmt in body.iter() { + self.gen_stmt(stmt); + } + self.builder.build_unconditional_branch(test_bb); + if !orelse.is_empty() { + self.builder.position_at_end(orelse_bb); + for stmt in orelse.iter() { + self.gen_stmt(stmt); + } + self.builder.build_unconditional_branch(cont_bb); + } + self.builder.position_at_end(cont_bb); + self.loop_bb = loop_bb; + } _ => unimplemented!(), } } diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index e205af28..cd4af4d8 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -3,7 +3,10 @@ use std::{collections::HashMap, sync::Arc}; use super::typecheck::type_inferencer::PrimitiveStore; use super::typecheck::typedef::{SharedUnifier, Type, Unifier}; use crate::symbol_resolver::SymbolResolver; -use inkwell::{builder::Builder, context::Context, module::Module, types::BasicTypeEnum, values::PointerValue}; +use inkwell::{ + basic_block::BasicBlock, builder::Builder, context::Context, module::Module, + types::BasicTypeEnum, values::PointerValue, +}; use parking_lot::RwLock; use rustpython_parser::ast::Stmt; @@ -65,4 +68,7 @@ pub struct CodeGenContext<'ctx> { pub var_assignment: HashMap>, pub type_cache: HashMap>, pub primitives: PrimitiveStore, + // where continue and break should go to respectively + // the first one is the test_bb, and the second one is bb after the loop + pub loop_bb: Option<(BasicBlock<'ctx>, BasicBlock<'ctx>)>, } From cc0692a34cd1ecea3ec806198cdf56e98482b5c2 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Mon, 9 Aug 2021 16:19:20 +0800 Subject: [PATCH 088/131] modified alloca --- nac3core/src/codegen/stmt.rs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/nac3core/src/codegen/stmt.rs b/nac3core/src/codegen/stmt.rs index 897cf1b7..b66825cd 100644 --- a/nac3core/src/codegen/stmt.rs +++ b/nac3core/src/codegen/stmt.rs @@ -9,12 +9,8 @@ use rustpython_parser::ast::{Expr, ExprKind, Stmt, StmtKind}; impl<'ctx> CodeGenContext<'ctx> { fn gen_var(&mut self, ty: Type) -> PointerValue<'ctx> { + // should we build the alloca in an initial block? let ty = self.get_llvm_type(ty); - let ty = if let BasicTypeEnum::PointerType(ty) = ty { - ty.get_element_type().try_into().unwrap() - } else { - ty - }; self.builder.build_alloca(ty, "tmp") } @@ -76,6 +72,12 @@ impl<'ctx> CodeGenContext<'ctx> { StmtKind::Expr { value } => { self.gen_expr(&value); } + StmtKind::AnnAssign { target, value, .. } => { + if let Some(value) = value { + let value = self.gen_expr(&value); + self.gen_assignment(target, value); + } + } StmtKind::Assign { targets, value, .. } => { let value = self.gen_expr(&value); for target in targets.iter() { From 4db871c244dff936ac3554b851d8069c695d6f36 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Mon, 9 Aug 2021 16:37:28 +0800 Subject: [PATCH 089/131] put alloca in init block --- nac3core/src/codegen/stmt.rs | 15 +++++++-------- nac3core/src/top_level.rs | 2 ++ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/nac3core/src/codegen/stmt.rs b/nac3core/src/codegen/stmt.rs index b66825cd..f2d7dd33 100644 --- a/nac3core/src/codegen/stmt.rs +++ b/nac3core/src/codegen/stmt.rs @@ -1,17 +1,16 @@ -use std::convert::TryInto; - use crate::{top_level::CodeGenContext, typecheck::typedef::Type}; -use inkwell::{ - types::BasicTypeEnum, - values::{BasicValueEnum, PointerValue}, -}; +use inkwell::values::{BasicValueEnum, PointerValue}; use rustpython_parser::ast::{Expr, ExprKind, Stmt, StmtKind}; impl<'ctx> CodeGenContext<'ctx> { fn gen_var(&mut self, ty: Type) -> PointerValue<'ctx> { - // should we build the alloca in an initial block? + // put the alloca in init block + let current = self.builder.get_insert_block().unwrap(); + self.builder.position_at_end(self.init_bb); let ty = self.get_llvm_type(ty); - self.builder.build_alloca(ty, "tmp") + let ptr = self.builder.build_alloca(ty, "tmp"); + self.builder.position_at_end(current); + ptr } fn parse_pattern(&mut self, pattern: &Expr>) -> PointerValue<'ctx> { diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index cd4af4d8..d5a3276c 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -68,6 +68,8 @@ pub struct CodeGenContext<'ctx> { pub var_assignment: HashMap>, pub type_cache: HashMap>, pub primitives: PrimitiveStore, + // stores the alloca for variables + pub init_bb: BasicBlock<'ctx>, // where continue and break should go to respectively // the first one is the test_bb, and the second one is bb after the loop pub loop_bb: Option<(BasicBlock<'ctx>, BasicBlock<'ctx>)>, From 6ad953f877e969295e6c00af781849806dd7a093 Mon Sep 17 00:00:00 2001 From: ychenfo Date: Mon, 9 Aug 2021 01:43:41 +0800 Subject: [PATCH 090/131] top level class roughly handled, push for review --- nac3core/src/top_level.rs | 132 +++++++++++++++++++++++++++++++++- nac3core/src/typecheck/mod.rs | 2 +- 2 files changed, 132 insertions(+), 2 deletions(-) diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index d5a3276c..29238ee6 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -1,7 +1,8 @@ +use std::default; use std::{collections::HashMap, sync::Arc}; use super::typecheck::type_inferencer::PrimitiveStore; -use super::typecheck::typedef::{SharedUnifier, Type, Unifier}; +use super::typecheck::typedef::{SharedUnifier, Type, Unifier, TypeEnum}; use crate::symbol_resolver::SymbolResolver; use inkwell::{ basic_block::BasicBlock, builder::Builder, context::Context, module::Module, @@ -74,3 +75,132 @@ pub struct CodeGenContext<'ctx> { // the first one is the test_bb, and the second one is bb after the loop pub loop_bb: Option<(BasicBlock<'ctx>, BasicBlock<'ctx>)>, } + +pub struct TopLevelManager<'a> { + pub def_index: usize, + pub ctx: TopLevelContext, + pub resolver: &'a mut Box, + pub primitives: (PrimitiveStore, Unifier) +} + + +use rustpython_parser::ast; +impl<'a> TopLevelManager<'a> { + pub fn make_primitives() -> (PrimitiveStore, Unifier) { + let mut unifier = Unifier::new(); + let int32 = unifier.add_ty(TypeEnum::TObj { + obj_id: DefinitionId(0), // NOTE: what should it be? + fields: HashMap::new().into(), + params: HashMap::new(), + }); + let int64 = unifier.add_ty(TypeEnum::TObj { + obj_id: DefinitionId(1), // NOTE: what should it be? + fields: HashMap::new().into(), + params: HashMap::new(), + }); + let float = unifier.add_ty(TypeEnum::TObj { + obj_id: DefinitionId(2), // NOTE: what should it be? + fields: HashMap::new().into(), + params: HashMap::new(), + }); + let bool = unifier.add_ty(TypeEnum::TObj { + obj_id: DefinitionId(3), // NOTE: what should it be? + fields: HashMap::new().into(), + params: HashMap::new(), + }); + let none = unifier.add_ty(TypeEnum::TObj { + obj_id: DefinitionId(4), // NOTE: what should it be? + fields: HashMap::new().into(), + params: HashMap::new(), + }); + let primitives = PrimitiveStore { int32, int64, float, bool, none }; + crate::typecheck::magic_methods::set_primitives_magic_methods(&primitives, &mut unifier); + (primitives, unifier) + } + + pub fn new(resolver: &'a mut Box) -> Self { + TopLevelManager { + def_index: 1, + ctx: TopLevelContext { + definitions: Default::default(), + unifiers: Default::default() + }, + resolver, + primitives: Self::make_primitives() + } + } + + pub fn register_top_level(&mut self, ast: &ast::Stmt<()>) -> Result{ + match &ast.node { + ast::StmtKind::ClassDef { + name, + bases, + keywords, + body, + decorator_list + } => { + // ancestors and type_vars are found using the `bases` field + let mut class_ancestors: Vec = Default::default(); + let mut class_type_vars: Vec = Default::default(); + for base in bases { + match &base.node { + ast::ExprKind::Subscript {value, slice, ..} => { + match &value.node { + ast::ExprKind::Name {id, ..} if id == "Generic" => { + match &slice.node { + ast::ExprKind::Tuple {elts, ..} => { + for e in elts { + class_type_vars.push( + self.resolver. + parse_type_name(e) + .ok_or_else(|| "unkown base class type".to_string())? + ); // FIXME: is it correct to use this? + } + }, + _ => class_type_vars.push( + self.resolver + .parse_type_name(slice) + .ok_or_else(|| "unkown base class type".to_string())? + ) // FIXME: is it correct to use this? + } + }, + _ => return Err("only subscription on keyword Generic is allowed".into()) + } + }, + + ast::ExprKind::Name {id, ..} => { + class_ancestors.push(self.resolver.get_function_def(id)) // FIXME: is it correct to use this? + } + + _ => return Err("unsupported expression in the bases list".into()) + } + } + + // fields and methods are determined using the `body` field + let class_fields: Vec<(String, Type)> = Default::default(); + let class_methods: Vec<(String, Type, DefinitionId)> = Default::default(); + for stmt in body { + match &stmt.node { + ast::StmtKind::FunctionDef {name, .. } if name != "__init__" => { + let result = self.register_top_level(stmt)?; + unimplemented!() + }, + + _ => unimplemented!() + } + } + + let defs = self.ctx.definitions.write(); + let index = defs.len(); + + unimplemented!() + }, + + ast::StmtKind::FunctionDef {name, ..} => { + unimplemented!() + } + + _ => Err("only expect function definition and class definition".into()) + } + } +} \ No newline at end of file diff --git a/nac3core/src/typecheck/mod.rs b/nac3core/src/typecheck/mod.rs index a75222f5..db7bcaec 100644 --- a/nac3core/src/typecheck/mod.rs +++ b/nac3core/src/typecheck/mod.rs @@ -1,5 +1,5 @@ mod function_check; -mod magic_methods; +pub mod magic_methods; pub mod type_inferencer; pub mod typedef; mod unification_table; From 82ce816177ef422a4658527e3ce82511aeb84a1a Mon Sep 17 00:00:00 2001 From: ychenfo Date: Tue, 10 Aug 2021 10:33:18 +0800 Subject: [PATCH 091/131] refactored top level parsing, need review --- nac3core/src/symbol_resolver.rs | 11 +- nac3core/src/top_level.rs | 272 ++++++++++++------ .../src/typecheck/type_inferencer/test.rs | 14 +- 3 files changed, 204 insertions(+), 93 deletions(-) diff --git a/nac3core/src/symbol_resolver.rs b/nac3core/src/symbol_resolver.rs index 86a43ad7..7d34423f 100644 --- a/nac3core/src/symbol_resolver.rs +++ b/nac3core/src/symbol_resolver.rs @@ -15,10 +15,11 @@ pub enum SymbolValue { } pub trait SymbolResolver { - fn get_symbol_type(&mut self, str: &str) -> Option; - fn parse_type_name(&mut self, expr: &Expr<()>) -> Option; - fn get_function_def(&mut self, str: &str) -> DefinitionId; - fn get_symbol_value(&mut self, str: &str) -> Option; - fn get_symbol_location(&mut self, str: &str) -> Option; + fn get_symbol_type(&self, str: &str) -> Option; + fn parse_type_name(&self, expr: &Expr<()>) -> Option; + fn get_identifier_def(&self, str: &str) -> DefinitionId; + fn get_symbol_value(&self, str: &str) -> Option; + fn get_symbol_location(&self, str: &str) -> Option; + fn get_module_resolver(&self, module_name: &str) -> Option<&dyn SymbolResolver>; // NOTE: for getting imported modules' symbol resolver? // handle function call etc. } diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index 29238ee6..8eafc61a 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -1,4 +1,3 @@ -use std::default; use std::{collections::HashMap, sync::Arc}; use super::typecheck::type_inferencer::PrimitiveStore; @@ -45,6 +44,9 @@ pub enum TopLevelDef { /// rigid type variables that would be substituted when the function is instantiated. instance_to_stmt: HashMap>, usize)>, }, + Initializer { + class_id: Option, + } } pub struct CodeGenTask { @@ -76,40 +78,46 @@ pub struct CodeGenContext<'ctx> { pub loop_bb: Option<(BasicBlock<'ctx>, BasicBlock<'ctx>)>, } -pub struct TopLevelManager<'a> { - pub def_index: usize, - pub ctx: TopLevelContext, - pub resolver: &'a mut Box, - pub primitives: (PrimitiveStore, Unifier) -} use rustpython_parser::ast; -impl<'a> TopLevelManager<'a> { +pub struct TopLevelDefInfo<'a> { // like adding some info on top of the TopLevelDef for later parsing the class bases, method, and function sigatures + def: TopLevelDef, // the definition entry + ty: Type, // the entry in the top_level unifier + ast: Option>, // the ast submitted by applications + resolver: Option<&'a dyn SymbolResolver> // the resolver +} +pub struct TopLevelComposer<'a> { + pub definition_list: Vec>, + pub primitives: PrimitiveStore, + pub unifier: Unifier, +} + +impl<'a> TopLevelComposer<'a> { pub fn make_primitives() -> (PrimitiveStore, Unifier) { let mut unifier = Unifier::new(); let int32 = unifier.add_ty(TypeEnum::TObj { - obj_id: DefinitionId(0), // NOTE: what should it be? + obj_id: DefinitionId(0), // 0 should be fine fields: HashMap::new().into(), params: HashMap::new(), }); let int64 = unifier.add_ty(TypeEnum::TObj { - obj_id: DefinitionId(1), // NOTE: what should it be? + obj_id: DefinitionId(1), // 0 should be fine fields: HashMap::new().into(), params: HashMap::new(), }); let float = unifier.add_ty(TypeEnum::TObj { - obj_id: DefinitionId(2), // NOTE: what should it be? + obj_id: DefinitionId(2), // 0 should be fine fields: HashMap::new().into(), params: HashMap::new(), }); let bool = unifier.add_ty(TypeEnum::TObj { - obj_id: DefinitionId(3), // NOTE: what should it be? + obj_id: DefinitionId(3), // 0 should be fine fields: HashMap::new().into(), params: HashMap::new(), }); let none = unifier.add_ty(TypeEnum::TObj { - obj_id: DefinitionId(4), // NOTE: what should it be? + obj_id: DefinitionId(4), // 0 should be fine fields: HashMap::new().into(), params: HashMap::new(), }); @@ -117,90 +125,188 @@ impl<'a> TopLevelManager<'a> { crate::typecheck::magic_methods::set_primitives_magic_methods(&primitives, &mut unifier); (primitives, unifier) } - - pub fn new(resolver: &'a mut Box) -> Self { - TopLevelManager { - def_index: 1, - ctx: TopLevelContext { - definitions: Default::default(), - unifiers: Default::default() + pub fn new() -> Self { + let primitives = Self::make_primitives(); + let definition_list: Vec> = vec![ + TopLevelDefInfo { + def: Self::make_top_level_class_def(0), + ast: None, + resolver: None, + ty: primitives.0.int32 // just arbitary picked one... }, - resolver, - primitives: Self::make_primitives() + TopLevelDefInfo { + def: Self::make_top_level_class_def(1), + ast: None, + resolver: None, + ty: primitives.0.int64 // just arbitary picked one... + }, + TopLevelDefInfo { + def: Self::make_top_level_class_def(2), + ast: None, + resolver: None, + ty: primitives.0.float // just arbitary picked one... + }, + TopLevelDefInfo { + def: Self::make_top_level_class_def(3), + ast: None, + resolver: None, + ty: primitives.0.bool // just arbitary picked one... + }, + TopLevelDefInfo { + def: Self::make_top_level_class_def(4), + ast: None, + resolver: None, + ty: primitives.0.none // just arbitary picked one... + }, + ]; // the entries for primitive types + TopLevelComposer { + definition_list, + primitives: primitives.0, + unifier: primitives.1 } } - pub fn register_top_level(&mut self, ast: &ast::Stmt<()>) -> Result{ + pub fn make_top_level_class_def(index: usize) -> TopLevelDef { + TopLevelDef::Class { + object_id: DefinitionId(index), + type_vars: Default::default(), + fields: Default::default(), + methods: Default::default(), + ancestors: Default::default(), + } + } + pub fn make_top_level_function_def(name: String, ty: Type) -> TopLevelDef { + TopLevelDef::Function { + name, + signature: ty, + instance_to_symbol: Default::default(), + instance_to_stmt: Default::default() + } + } + + // like to make and return a "primitive" symbol resolver? so that the symbol resolver can later figure out primitive type definitions when passed a primitive type name + pub fn get_primitives_definition(&self) -> Vec<(String, DefinitionId, Type)> { + vec![ + ("int32".into(), DefinitionId(0), self.primitives.int32), + ("int64".into(), DefinitionId(0), self.primitives.int32), + ("float".into(), DefinitionId(0), self.primitives.int32), + ("bool".into(), DefinitionId(0), self.primitives.int32), + ("none".into(), DefinitionId(0), self.primitives.int32), + ] + } + + pub fn register_top_level(&mut self, ast: ast::Stmt<()>, resolver: &'a dyn SymbolResolver) -> Result, String> { match &ast.node { - ast::StmtKind::ClassDef { - name, - bases, - keywords, - body, - decorator_list - } => { - // ancestors and type_vars are found using the `bases` field - let mut class_ancestors: Vec = Default::default(); - let mut class_type_vars: Vec = Default::default(); - for base in bases { - match &base.node { - ast::ExprKind::Subscript {value, slice, ..} => { - match &value.node { - ast::ExprKind::Name {id, ..} if id == "Generic" => { - match &slice.node { - ast::ExprKind::Tuple {elts, ..} => { - for e in elts { - class_type_vars.push( - self.resolver. - parse_type_name(e) - .ok_or_else(|| "unkown base class type".to_string())? - ); // FIXME: is it correct to use this? - } - }, - _ => class_type_vars.push( - self.resolver - .parse_type_name(slice) - .ok_or_else(|| "unkown base class type".to_string())? - ) // FIXME: is it correct to use this? + ast::StmtKind::ClassDef {name, body, ..} => { + let class_name = name.to_string(); + let def_id = self.definition_list.len(); + // add the class to the unifier + let ty = self.unifier.add_ty(TypeEnum::TObj { + obj_id: DefinitionId(def_id), + fields: Default::default(), + params: Default::default() + }); + // add to the definition list + self.definition_list.push( + TopLevelDefInfo { + def: Self::make_top_level_class_def(def_id), + resolver: Some(resolver), + ast: Some(ast), + ty, + } + ); + + // TODO: parse class def body and register class methods into the def list? + // FIXME: module's symbol resolver would not know the name of the class methods, thus cannot return their definition_id? so we have to manage it ourselves? + // or do we return the class method list of (method_name, def_id, type) to application to be used to build symbol resolver? <- current implementation + + Ok(vec![(class_name, DefinitionId(def_id), ty)]) // FIXME: need to add class method def + }, + + ast::StmtKind::FunctionDef {name, ..} => { + let fun_name = name.to_string(); + let def_id = self.definition_list.len(); + // add to the unifier + let ty = self.unifier.add_ty(TypeEnum::TFunc(crate::typecheck::typedef::FunSignature { + args: Default::default(), + ret: self.primitives.none, // NOTE: this needs to be changed later + vars: Default::default() + })); + // add to the definition list + self.definition_list.push( + TopLevelDefInfo { + def: Self::make_top_level_function_def( + name.into(), + self.primitives.none // NOTE: this needs to be changed later + ), + resolver: Some(resolver), + ast: Some(ast), + ty, + } + ); + + Ok(vec![(fun_name, DefinitionId(def_id), ty)]) + }, + + _ => Err("only registrations of top level classes/functions are supprted".into()) + } + } + + /// this should be called after all top level classes are registered, and will actually fill in those fields of the previous dummy one + pub fn analyze_top_level(&mut self) -> Result<(), String> { + for mut d in &mut self.definition_list { + if let (Some(ast), Some(resolver)) = (&d.ast, d.resolver) { + match &ast.node { + ast::StmtKind::ClassDef { + name, + bases, + body, + .. + } => { + // ancestors and typevars associate with the class are analyzed by looking into the `bases` ast node + for b in bases { + match &b.node { + ast::ExprKind::Name {id, ..} => { // base class, name directly available inside the module, can use this module's symbol resolver + let def_id = resolver.get_identifier_def(id); + unimplemented!() + }, + ast::ExprKind::Attribute {value, attr, ..} => { // things can be like `class A(BaseModule.Base)`, here we have to get the symbol resolver of the module `BaseModule`? + unimplemented!() // need to change symbol resolver in order to get the symbol resolver of the imported module + }, + ast::ExprKind::Subscript {value, slice, ..} => { // typevars bounded to the class, things like `class A(Generic[T, V])` + if let ast::ExprKind::Name {id, ..} = &value.node { + if id == "Generic" { + // TODO: get typevars + unimplemented!() + } else { + return Err("unknown type var".into()) + } } }, - _ => return Err("only subscription on keyword Generic is allowed".into()) + _ => return Err("not supported".into()) } - }, - - ast::ExprKind::Name {id, ..} => { - class_ancestors.push(self.resolver.get_function_def(id)) // FIXME: is it correct to use this? } - _ => return Err("unsupported expression in the bases list".into()) - } - } - - // fields and methods are determined using the `body` field - let class_fields: Vec<(String, Type)> = Default::default(); - let class_methods: Vec<(String, Type, DefinitionId)> = Default::default(); - for stmt in body { - match &stmt.node { - ast::StmtKind::FunctionDef {name, .. } if name != "__init__" => { - let result = self.register_top_level(stmt)?; + // class method and field are analyzed by looking into the class body ast node + for stmt in body { unimplemented!() - }, + } + }, - _ => unimplemented!() + ast::StmtKind::FunctionDef { + name, + args, + body, + returns, + .. + } => { + unimplemented!() } + + _ => return Err("only expect function and class definitions to be submitted here to be analyzed".into()) } - - let defs = self.ctx.definitions.write(); - let index = defs.len(); - - unimplemented!() - }, - - ast::StmtKind::FunctionDef {name, ..} => { - unimplemented!() } - - _ => Err("only expect function definition and class definition".into()) - } + }; + Ok(()) } } \ No newline at end of file diff --git a/nac3core/src/typecheck/type_inferencer/test.rs b/nac3core/src/typecheck/type_inferencer/test.rs index 76b7515c..f4d2cd8d 100644 --- a/nac3core/src/typecheck/type_inferencer/test.rs +++ b/nac3core/src/typecheck/type_inferencer/test.rs @@ -15,11 +15,11 @@ struct Resolver { } impl SymbolResolver for Resolver { - fn get_symbol_type(&mut self, str: &str) -> Option { + fn get_symbol_type(&self, str: &str) -> Option { self.identifier_mapping.get(str).cloned() } - fn parse_type_name(&mut self, ty: &ast::Expr<()>) -> Option { + fn parse_type_name(&self, ty: &ast::Expr<()>) -> Option { if let ExprKind::Name { id, .. } = &ty.node { self.class_names.get(id).cloned() } else { @@ -27,15 +27,19 @@ impl SymbolResolver for Resolver { } } - fn get_symbol_value(&mut self, _: &str) -> Option { + fn get_symbol_value(&self, _: &str) -> Option { unimplemented!() } - fn get_symbol_location(&mut self, _: &str) -> Option { + fn get_symbol_location(&self, _: &str) -> Option { unimplemented!() } - fn get_function_def(&mut self, _: &str) -> DefinitionId { + fn get_identifier_def(&self, _: &str) -> DefinitionId { + unimplemented!() + } + + fn get_module_resolver(&self, _: &str) -> Option<&dyn SymbolResolver> { unimplemented!() } } From a73ab922e2e10a365d135e73a23583a804a555b1 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Tue, 10 Aug 2021 21:57:31 +0800 Subject: [PATCH 092/131] cleanup --- nac3core/src/top_level.rs | 150 ++++++++++++++++++++------------------ 1 file changed, 80 insertions(+), 70 deletions(-) diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index 8eafc61a..a5123986 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -1,14 +1,14 @@ use std::{collections::HashMap, sync::Arc}; use super::typecheck::type_inferencer::PrimitiveStore; -use super::typecheck::typedef::{SharedUnifier, Type, Unifier, TypeEnum}; +use super::typecheck::typedef::{SharedUnifier, Type, TypeEnum, Unifier}; use crate::symbol_resolver::SymbolResolver; use inkwell::{ basic_block::BasicBlock, builder::Builder, context::Context, module::Module, types::BasicTypeEnum, values::PointerValue, }; use parking_lot::RwLock; -use rustpython_parser::ast::Stmt; +use rustpython_parser::ast::{self, Stmt}; #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy)] pub struct DefinitionId(pub usize); @@ -45,8 +45,8 @@ pub enum TopLevelDef { instance_to_stmt: HashMap>, usize)>, }, Initializer { - class_id: Option, - } + class_id: DefinitionId, + }, } pub struct CodeGenTask { @@ -78,15 +78,15 @@ pub struct CodeGenContext<'ctx> { pub loop_bb: Option<(BasicBlock<'ctx>, BasicBlock<'ctx>)>, } - - -use rustpython_parser::ast; -pub struct TopLevelDefInfo<'a> { // like adding some info on top of the TopLevelDef for later parsing the class bases, method, and function sigatures - def: TopLevelDef, // the definition entry - ty: Type, // the entry in the top_level unifier - ast: Option>, // the ast submitted by applications - resolver: Option<&'a dyn SymbolResolver> // the resolver +pub struct TopLevelDefInfo<'a> { + // like adding some info on top of the TopLevelDef for later parsing the class bases, method, + // and function sigatures + def: TopLevelDef, // the definition entry + ty: Type, // the entry in the top_level unifier + ast: Option>, // the ast submitted by applications + resolver: Option<&'a dyn SymbolResolver>, // the resolver } + pub struct TopLevelComposer<'a> { pub definition_list: Vec>, pub primitives: PrimitiveStore, @@ -97,27 +97,27 @@ impl<'a> TopLevelComposer<'a> { pub fn make_primitives() -> (PrimitiveStore, Unifier) { let mut unifier = Unifier::new(); let int32 = unifier.add_ty(TypeEnum::TObj { - obj_id: DefinitionId(0), // 0 should be fine + obj_id: DefinitionId(0), fields: HashMap::new().into(), params: HashMap::new(), }); let int64 = unifier.add_ty(TypeEnum::TObj { - obj_id: DefinitionId(1), // 0 should be fine + obj_id: DefinitionId(1), fields: HashMap::new().into(), params: HashMap::new(), }); let float = unifier.add_ty(TypeEnum::TObj { - obj_id: DefinitionId(2), // 0 should be fine + obj_id: DefinitionId(2), fields: HashMap::new().into(), params: HashMap::new(), }); let bool = unifier.add_ty(TypeEnum::TObj { - obj_id: DefinitionId(3), // 0 should be fine + obj_id: DefinitionId(3), fields: HashMap::new().into(), params: HashMap::new(), }); let none = unifier.add_ty(TypeEnum::TObj { - obj_id: DefinitionId(4), // 0 should be fine + obj_id: DefinitionId(4), fields: HashMap::new().into(), params: HashMap::new(), }); @@ -125,6 +125,7 @@ impl<'a> TopLevelComposer<'a> { crate::typecheck::magic_methods::set_primitives_magic_methods(&primitives, &mut unifier); (primitives, unifier) } + pub fn new() -> Self { let primitives = Self::make_primitives(); let definition_list: Vec> = vec![ @@ -132,38 +133,34 @@ impl<'a> TopLevelComposer<'a> { def: Self::make_top_level_class_def(0), ast: None, resolver: None, - ty: primitives.0.int32 // just arbitary picked one... + ty: primitives.0.int32, }, TopLevelDefInfo { def: Self::make_top_level_class_def(1), ast: None, resolver: None, - ty: primitives.0.int64 // just arbitary picked one... + ty: primitives.0.int64, }, TopLevelDefInfo { def: Self::make_top_level_class_def(2), ast: None, resolver: None, - ty: primitives.0.float // just arbitary picked one... + ty: primitives.0.float, }, TopLevelDefInfo { def: Self::make_top_level_class_def(3), ast: None, resolver: None, - ty: primitives.0.bool // just arbitary picked one... + ty: primitives.0.bool, }, TopLevelDefInfo { def: Self::make_top_level_class_def(4), ast: None, resolver: None, - ty: primitives.0.none // just arbitary picked one... + ty: primitives.0.none, }, ]; // the entries for primitive types - TopLevelComposer { - definition_list, - primitives: primitives.0, - unifier: primitives.1 - } + TopLevelComposer { definition_list, primitives: primitives.0, unifier: primitives.1 } } pub fn make_top_level_class_def(index: usize) -> TopLevelDef { @@ -180,11 +177,12 @@ impl<'a> TopLevelComposer<'a> { name, signature: ty, instance_to_symbol: Default::default(), - instance_to_stmt: Default::default() + instance_to_stmt: Default::default(), } } - // like to make and return a "primitive" symbol resolver? so that the symbol resolver can later figure out primitive type definitions when passed a primitive type name + // like to make and return a "primitive" symbol resolver? so that the symbol resolver can later + // figure out primitive type definitions when passed a primitive type name pub fn get_primitives_definition(&self) -> Vec<(String, DefinitionId, Type)> { vec![ ("int32".into(), DefinitionId(0), self.primitives.int32), @@ -195,60 +193,63 @@ impl<'a> TopLevelComposer<'a> { ] } - pub fn register_top_level(&mut self, ast: ast::Stmt<()>, resolver: &'a dyn SymbolResolver) -> Result, String> { + pub fn register_top_level( + &mut self, + ast: ast::Stmt<()>, + resolver: &'a dyn SymbolResolver, + ) -> Result, String> { match &ast.node { - ast::StmtKind::ClassDef {name, body, ..} => { + ast::StmtKind::ClassDef { name, body, .. } => { let class_name = name.to_string(); let def_id = self.definition_list.len(); // add the class to the unifier let ty = self.unifier.add_ty(TypeEnum::TObj { obj_id: DefinitionId(def_id), fields: Default::default(), - params: Default::default() + params: Default::default(), }); // add to the definition list - self.definition_list.push( - TopLevelDefInfo { - def: Self::make_top_level_class_def(def_id), - resolver: Some(resolver), - ast: Some(ast), - ty, - } - ); - + self.definition_list.push(TopLevelDefInfo { + def: Self::make_top_level_class_def(def_id), + resolver: Some(resolver), + ast: Some(ast), + ty, + }); + // TODO: parse class def body and register class methods into the def list? - // FIXME: module's symbol resolver would not know the name of the class methods, thus cannot return their definition_id? so we have to manage it ourselves? - // or do we return the class method list of (method_name, def_id, type) to application to be used to build symbol resolver? <- current implementation + // FIXME: module's symbol resolver would not know the name of the class methods, + // thus cannot return their definition_id? so we have to manage it ourselves? or + // do we return the class method list of (method_name, def_id, type) to application + // to be used to build symbol resolver? <- current implementation Ok(vec![(class_name, DefinitionId(def_id), ty)]) // FIXME: need to add class method def - }, + } - ast::StmtKind::FunctionDef {name, ..} => { + ast::StmtKind::FunctionDef { name, .. } => { let fun_name = name.to_string(); let def_id = self.definition_list.len(); // add to the unifier - let ty = self.unifier.add_ty(TypeEnum::TFunc(crate::typecheck::typedef::FunSignature { - args: Default::default(), - ret: self.primitives.none, // NOTE: this needs to be changed later - vars: Default::default() - })); + let ty = + self.unifier.add_ty(TypeEnum::TFunc(crate::typecheck::typedef::FunSignature { + args: Default::default(), + ret: self.primitives.none, // NOTE: this needs to be changed later + vars: Default::default(), + })); // add to the definition list - self.definition_list.push( - TopLevelDefInfo { - def: Self::make_top_level_function_def( - name.into(), - self.primitives.none // NOTE: this needs to be changed later - ), - resolver: Some(resolver), - ast: Some(ast), - ty, - } - ); + self.definition_list.push(TopLevelDefInfo { + def: Self::make_top_level_function_def( + name.into(), + self.primitives.none, // NOTE: this needs to be changed later + ), + resolver: Some(resolver), + ast: Some(ast), + ty, + }); Ok(vec![(fun_name, DefinitionId(def_id), ty)]) - }, + } - _ => Err("only registrations of top level classes/functions are supprted".into()) + _ => Err("only registrations of top level classes/functions are supprted".into()), } } @@ -263,17 +264,26 @@ impl<'a> TopLevelComposer<'a> { body, .. } => { - // ancestors and typevars associate with the class are analyzed by looking into the `bases` ast node + // ancestors and typevars associate with the class are analyzed by looking + // into the `bases` ast node for b in bases { match &b.node { - ast::ExprKind::Name {id, ..} => { // base class, name directly available inside the module, can use this module's symbol resolver + // base class, name directly available inside the module, can use + // this module's symbol resolver + ast::ExprKind::Name {id, ..} => { let def_id = resolver.get_identifier_def(id); unimplemented!() }, - ast::ExprKind::Attribute {value, attr, ..} => { // things can be like `class A(BaseModule.Base)`, here we have to get the symbol resolver of the module `BaseModule`? - unimplemented!() // need to change symbol resolver in order to get the symbol resolver of the imported module + // things can be like `class A(BaseModule.Base)`, here we have to + // get the symbol resolver of the module `BaseModule`? + ast::ExprKind::Attribute {value, attr, ..} => { + // need to change symbol resolver in order to get the symbol + // resolver of the imported module + unimplemented!() }, - ast::ExprKind::Subscript {value, slice, ..} => { // typevars bounded to the class, things like `class A(Generic[T, V])` + // typevars bounded to the class, things like + // `class A(Generic[T, V])` + ast::ExprKind::Subscript {value, slice, ..} => { if let ast::ExprKind::Name {id, ..} = &value.node { if id == "Generic" { // TODO: get typevars @@ -306,7 +316,7 @@ impl<'a> TopLevelComposer<'a> { _ => return Err("only expect function and class definitions to be submitted here to be analyzed".into()) } } - }; + } Ok(()) } -} \ No newline at end of file +} From 1bec6cf2db9b8006f58b4a48b9a76da9bb99a86c Mon Sep 17 00:00:00 2001 From: ychenfo Date: Tue, 10 Aug 2021 23:49:58 +0800 Subject: [PATCH 093/131] continue working on the top level --- nac3core/src/top_level.rs | 281 ++++++++++++++++++++++++++++++-------- shell.nix | 2 + 2 files changed, 224 insertions(+), 59 deletions(-) diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index a5123986..754fca9d 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -1,3 +1,4 @@ +use std::borrow::Borrow; use std::{collections::HashMap, sync::Arc}; use super::typecheck::type_inferencer::PrimitiveStore; @@ -78,13 +79,19 @@ pub struct CodeGenContext<'ctx> { pub loop_bb: Option<(BasicBlock<'ctx>, BasicBlock<'ctx>)>, } -pub struct TopLevelDefInfo<'a> { + +pub fn name_mangling(mut class_name: String, method_name: &str) -> String { // need to further extend to more name mangling like instantiations of typevar + class_name.push_str(method_name); + class_name +} + +pub struct TopLevelDefInfo<'a> { // like adding some info on top of the TopLevelDef for later parsing the class bases, method, // and function sigatures - def: TopLevelDef, // the definition entry - ty: Type, // the entry in the top_level unifier - ast: Option>, // the ast submitted by applications - resolver: Option<&'a dyn SymbolResolver>, // the resolver + def: TopLevelDef, // the definition entry + ty: Type, // the entry in the top_level unifier + ast: Option>, // the ast submitted by applications, primitives and class methods will have None value here + resolver: Option<&'a dyn SymbolResolver> // the resolver } pub struct TopLevelComposer<'a> { @@ -163,13 +170,14 @@ impl<'a> TopLevelComposer<'a> { TopLevelComposer { definition_list, primitives: primitives.0, unifier: primitives.1 } } + /// already include the definition_id of itself inside the ancestors vector pub fn make_top_level_class_def(index: usize) -> TopLevelDef { TopLevelDef::Class { object_id: DefinitionId(index), type_vars: Default::default(), fields: Default::default(), methods: Default::default(), - ancestors: Default::default(), + ancestors: vec![DefinitionId(index)], } } pub fn make_top_level_function_def(name: String, ty: Type) -> TopLevelDef { @@ -186,10 +194,10 @@ impl<'a> TopLevelComposer<'a> { pub fn get_primitives_definition(&self) -> Vec<(String, DefinitionId, Type)> { vec![ ("int32".into(), DefinitionId(0), self.primitives.int32), - ("int64".into(), DefinitionId(0), self.primitives.int32), - ("float".into(), DefinitionId(0), self.primitives.int32), - ("bool".into(), DefinitionId(0), self.primitives.int32), - ("none".into(), DefinitionId(0), self.primitives.int32), + ("int64".into(), DefinitionId(1), self.primitives.int64), + ("float".into(), DefinitionId(2), self.primitives.float), + ("bool".into(), DefinitionId(3), self.primitives.bool), + ("none".into(), DefinitionId(4), self.primitives.none), ] } @@ -201,29 +209,67 @@ impl<'a> TopLevelComposer<'a> { match &ast.node { ast::StmtKind::ClassDef { name, body, .. } => { let class_name = name.to_string(); - let def_id = self.definition_list.len(); + let class_def_id = self.definition_list.len(); + // add the class to the unifier let ty = self.unifier.add_ty(TypeEnum::TObj { - obj_id: DefinitionId(def_id), + obj_id: DefinitionId(class_def_id), fields: Default::default(), params: Default::default(), }); + + let mut ret_vector: Vec<(String, DefinitionId, Type)> = vec![(class_name.clone(), DefinitionId(class_def_id), ty)]; + // parse class def body and register class methods into the def list + // NOTE: module's symbol resolver would not know the name of the class methods, thus cannot return their definition_id? so we have to manage it ourselves? + // or do we return the class method list of (method_name, def_id, type) to application to be used to build symbol resolver? <- current implementation + for b in body { + if let ast::StmtKind::FunctionDef {name, ..} = &b.node { + let fun_name = name_mangling(class_name.clone(), name); + let def_id = self.definition_list.len(); + // add to unifier + let ty = self.unifier.add_ty(TypeEnum::TFunc(crate::typecheck::typedef::FunSignature { + args: Default::default(), + ret: self.primitives.none, + vars: Default::default() + })); + // add to the definition list + self.definition_list.push( + TopLevelDefInfo { + def: Self::make_top_level_function_def(fun_name.clone(), ty), + resolver: Some(resolver), + ty, + ast: None // since it is inside the class def body statments + } + ); + ret_vector.push((fun_name, DefinitionId(def_id), ty)); + + if name == "__init__" { // if it is the contructor, special handling is needed. In the above handling, we still add __init__ function to the class method + self.definition_list.push( + TopLevelDefInfo { + def: TopLevelDef::Initializer { + class_id: DefinitionId(class_def_id) // FIXME: None if have no parameter, Some if same as __init__? + }, + ty: self.primitives.none, // arbitary picked one + ast: None, // it is inside the class def body statments + resolver: Some(resolver) + } + ) + // FIXME: should we return this to the symbol resolver? + } + } else { } // else do nothing + } // add to the definition list - self.definition_list.push(TopLevelDefInfo { - def: Self::make_top_level_class_def(def_id), - resolver: Some(resolver), - ast: Some(ast), - ty, - }); - - // TODO: parse class def body and register class methods into the def list? - // FIXME: module's symbol resolver would not know the name of the class methods, - // thus cannot return their definition_id? so we have to manage it ourselves? or - // do we return the class method list of (method_name, def_id, type) to application - // to be used to build symbol resolver? <- current implementation - - Ok(vec![(class_name, DefinitionId(def_id), ty)]) // FIXME: need to add class method def - } + self.definition_list.push( + TopLevelDefInfo { + def: Self::make_top_level_class_def(class_def_id), + resolver: Some(resolver), + ast: Some(ast), + ty, + } + ); + + Ok(ret_vector) + }, ast::StmtKind::FunctionDef { name, .. } => { let fun_name = name.to_string(); @@ -232,18 +278,18 @@ impl<'a> TopLevelComposer<'a> { let ty = self.unifier.add_ty(TypeEnum::TFunc(crate::typecheck::typedef::FunSignature { args: Default::default(), - ret: self.primitives.none, // NOTE: this needs to be changed later - vars: Default::default(), - })); + ret: self.primitives.none, + vars: Default::default() + })); // add to the definition list self.definition_list.push(TopLevelDefInfo { - def: Self::make_top_level_function_def( - name.into(), - self.primitives.none, // NOTE: this needs to be changed later - ), - resolver: Some(resolver), - ast: Some(ast), - ty, + def: Self::make_top_level_function_def( + name.into(), + self.primitives.none + ), + resolver: Some(resolver), + ast: Some(ast), + ty, }); Ok(vec![(fun_name, DefinitionId(def_id), ty)]) @@ -259,50 +305,137 @@ impl<'a> TopLevelComposer<'a> { if let (Some(ast), Some(resolver)) = (&d.ast, d.resolver) { match &ast.node { ast::StmtKind::ClassDef { - name, bases, body, .. } => { + // get the mutable reference of the entry in the definition list, get the `TopLevelDef` + let (_, + ancestors, + fields, + methods, + type_vars + ) = if let TopLevelDef::Class { + object_id, + ancestors, + fields, + methods, + type_vars + } = &mut d.def { + (object_id, ancestors, fields, methods, type_vars) + } else { unreachable!() }; + + // try to get mutable reference of the entry in the unification table, get the `TypeEnum` + let (params, + fields + ) = if let TypeEnum::TObj { + params, // FIXME: this params is immutable, even if this is mutable, what should the key be, get the original typevar's var_id? + fields, + .. + } = self.unifier.get_ty(d.ty).borrow() { + (params, fields) + } else { unreachable!() }; + // ancestors and typevars associate with the class are analyzed by looking // into the `bases` ast node for b in bases { match &b.node { - // base class, name directly available inside the module, can use - // this module's symbol resolver + // typevars bounded to the class, things like `class A(Generic[T, V, ImportedModule.T])` + // should update the TopLevelDef::Class.typevars and the TypeEnum::TObj.params + ast::ExprKind::Subscript {value, slice, ..} if { + if let ast::ExprKind::Name {id, ..} = &value.node { + id == "Generic" + } else { false } + } => { + match &slice.node { + // `class Foo(Generic[T, V, P, ImportedModule.T]):` + ast::ExprKind::Tuple {elts, ..} => { + for e in elts { + // TODO: I'd better parse the node to get the Type of the type vars(can have things like: A.B.C.typevar?) + match &e.node { + ast::ExprKind::Name {id, ..} => { + // the def_list + type_vars.push(resolver.get_symbol_type(id).ok_or_else(|| "unknown type variable".to_string())?); + + // the TypeEnum of the class + // FIXME: the `params` destructed above is not mutable, even if this is mutable, what should the key be? + unimplemented!() + }, + + _ => unimplemented!() + } + } + }, + + // `class Foo(Generic[T]):` + ast::ExprKind::Name {id, ..} => { + // the def_list + type_vars.push(resolver.get_symbol_type(id).ok_or_else(|| "unknown type variable".to_string())?); + + // the TypeEnum of the class + // FIXME: the `params` destructed above is not mutable, even if this is mutable, what should the key be? + unimplemented!() + }, + + // `class Foo(Generic[ImportedModule.T])` + ast::ExprKind::Attribute {value, attr, ..} => { + // TODO: + unimplemented!() + }, + + _ => return Err("not supported".into()) // NOTE: it is really all the supported cases? + }; + }, + + // base class, name directly available inside the + // module, can use this module's symbol resolver ast::ExprKind::Name {id, ..} => { let def_id = resolver.get_identifier_def(id); - unimplemented!() + // the definition list + ancestors.push(def_id); }, - // things can be like `class A(BaseModule.Base)`, here we have to - // get the symbol resolver of the module `BaseModule`? + + // base class, things can be like `class A(BaseModule.Base)`, here we have to get the + // symbol resolver of the module `BaseModule`? ast::ExprKind::Attribute {value, attr, ..} => { - // need to change symbol resolver in order to get the symbol - // resolver of the imported module - unimplemented!() - }, - // typevars bounded to the class, things like - // `class A(Generic[T, V])` - ast::ExprKind::Subscript {value, slice, ..} => { if let ast::ExprKind::Name {id, ..} = &value.node { - if id == "Generic" { - // TODO: get typevars - unimplemented!() - } else { - return Err("unknown type var".into()) - } - } + if let Some(base_module_resolver) = resolver.get_module_resolver(id) { + let def_id = base_module_resolver.get_identifier_def(attr); + // the definition list + ancestors.push(def_id); + } else { return Err("unkown imported module".into()) } + } else { return Err("unkown imported module".into()) } + }, + + // `class Foo(ImportedModule.A[int, bool])`, A is a class with associated type variables + ast::ExprKind::Subscript {value, slice, ..} => { + unimplemented!() }, _ => return Err("not supported".into()) } } - // class method and field are analyzed by looking into the class body ast node + // ----------- class method and field are analyzed by looking into the class body ast node ----------- for stmt in body { - unimplemented!() + if let ast::StmtKind::FunctionDef { + name, + args, + body, + returns, + .. + } = &stmt.node { + + } else { } + // do nothing. we do not care about things like this? + // class A: + // a = 3 + // b = [2, 3] + + } }, + // top level function definition ast::StmtKind::FunctionDef { name, args, @@ -320,3 +453,33 @@ impl<'a> TopLevelComposer<'a> { Ok(()) } } + + + +pub fn parse_type_var(input: &ast::Expr, resolver: &dyn SymbolResolver) -> Result { + match &input.node { + ast::ExprKind::Name {id, ..} => { + resolver.get_symbol_type(id).ok_or_else(|| "unknown type variable identifer".to_string()) + }, + + ast::ExprKind::Attribute {value, attr, ..} => { + if let ast::ExprKind::Name {id, ..} = &value.node { + let next_resolver = resolver.get_module_resolver(id).ok_or_else(|| "unknown imported module".to_string())?; + next_resolver.get_symbol_type(attr).ok_or_else(|| "unknown type variable identifer".to_string()) + } else { + unimplemented!() + // recursively resolve attr thing, FIXME: new problem: how do we handle this? + // # A.py + // class A: + // T = TypeVar('T', int, bool) + // pass + // # B.py + // import A + // class B(Generic[A.A.T]): + // pass + } + }, + + _ => Err("not supported".into()) + } +} diff --git a/shell.nix b/shell.nix index 858e68b9..8f4e2ec8 100644 --- a/shell.nix +++ b/shell.nix @@ -6,4 +6,6 @@ in buildInputs = with pkgs; [ llvm_10 clang_10 cargo rustc libffi libxml2 clippy ]; + + RUST_SRC_PATH = "${pkgs.rust.packages.stable.rustPlatform.rustLibSrc}"; } From 43236db9bd38de83412de0ba47d744922d651f13 Mon Sep 17 00:00:00 2001 From: ychenfo Date: Wed, 11 Aug 2021 13:31:59 +0800 Subject: [PATCH 094/131] update some previous work on top level with the clean up --- nac3core/src/top_level.rs | 23 +++++++++++++++-------- shell.nix | 2 -- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index 754fca9d..6f1dcbb7 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -180,6 +180,7 @@ impl<'a> TopLevelComposer<'a> { ancestors: vec![DefinitionId(index)], } } + pub fn make_top_level_function_def(name: String, ty: Type) -> TopLevelDef { TopLevelDef::Function { name, @@ -189,8 +190,8 @@ impl<'a> TopLevelComposer<'a> { } } - // like to make and return a "primitive" symbol resolver? so that the symbol resolver can later - // figure out primitive type definitions when passed a primitive type name + // like to make and return a "primitive" symbol resolver? so that the symbol resolver + // can later figure out primitive type definitions when passed a primitive type name pub fn get_primitives_definition(&self) -> Vec<(String, DefinitionId, Type)> { vec![ ("int32".into(), DefinitionId(0), self.primitives.int32), @@ -220,8 +221,11 @@ impl<'a> TopLevelComposer<'a> { let mut ret_vector: Vec<(String, DefinitionId, Type)> = vec![(class_name.clone(), DefinitionId(class_def_id), ty)]; // parse class def body and register class methods into the def list - // NOTE: module's symbol resolver would not know the name of the class methods, thus cannot return their definition_id? so we have to manage it ourselves? - // or do we return the class method list of (method_name, def_id, type) to application to be used to build symbol resolver? <- current implementation + // NOTE: module's symbol resolver would not know the name of the class methods, + // thus cannot return their definition_id? so we have to manage it ourselves? + // or do we return the class method list of (method_name, def_id, type) to + // application to be used to build symbol resolver? <- current implementation + // FIXME: better do not return and let symbol resolver to manage the mangled name for b in body { if let ast::StmtKind::FunctionDef {name, ..} = &b.node { let fun_name = name_mangling(class_name.clone(), name); @@ -243,18 +247,20 @@ impl<'a> TopLevelComposer<'a> { ); ret_vector.push((fun_name, DefinitionId(def_id), ty)); - if name == "__init__" { // if it is the contructor, special handling is needed. In the above handling, we still add __init__ function to the class method + // if it is the contructor, special handling is needed. In the above + // handling, we still add __init__ function to the class method + if name == "__init__" { self.definition_list.push( TopLevelDefInfo { def: TopLevelDef::Initializer { - class_id: DefinitionId(class_def_id) // FIXME: None if have no parameter, Some if same as __init__? + class_id: DefinitionId(class_def_id) }, ty: self.primitives.none, // arbitary picked one ast: None, // it is inside the class def body statments resolver: Some(resolver) } ) - // FIXME: should we return this to the symbol resolver? + // FIXME: should we return this to the symbol resolver?, should be yes } } else { } // else do nothing } @@ -415,7 +421,8 @@ impl<'a> TopLevelComposer<'a> { } } - // ----------- class method and field are analyzed by looking into the class body ast node ----------- + // class method and field are analyzed by + // looking into the class body ast node for stmt in body { if let ast::StmtKind::FunctionDef { name, diff --git a/shell.nix b/shell.nix index 8f4e2ec8..858e68b9 100644 --- a/shell.nix +++ b/shell.nix @@ -6,6 +6,4 @@ in buildInputs = with pkgs; [ llvm_10 clang_10 cargo rustc libffi libxml2 clippy ]; - - RUST_SRC_PATH = "${pkgs.rust.packages.stable.rustPlatform.rustLibSrc}"; } From e112354d25d3c16d3c18297f3143bde3b1508c74 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Wed, 11 Aug 2021 14:37:26 +0800 Subject: [PATCH 095/131] codegen refactored --- nac3core/src/codegen/expr.rs | 46 ++------- nac3core/src/codegen/mod.rs | 195 +++++++++++++++++++++++++++++++++++ nac3core/src/codegen/stmt.rs | 13 ++- nac3core/src/top_level.rs | 34 +----- 4 files changed, 215 insertions(+), 73 deletions(-) diff --git a/nac3core/src/codegen/expr.rs b/nac3core/src/codegen/expr.rs index 6f03f0d2..2f4c74db 100644 --- a/nac3core/src/codegen/expr.rs +++ b/nac3core/src/codegen/expr.rs @@ -1,8 +1,9 @@ use std::{collections::HashMap, convert::TryInto, iter::once}; +use super::{get_llvm_type, CodeGenContext}; use crate::{ symbol_resolver::SymbolValue, - top_level::{CodeGenContext, DefinitionId, TopLevelDef}, + top_level::{DefinitionId, TopLevelDef}, typecheck::typedef::{FunSignature, Type, TypeEnum}, }; use inkwell::{ @@ -48,45 +49,6 @@ impl<'ctx> CodeGenContext<'ctx> { index } - pub fn get_llvm_type(&mut self, ty: Type) -> BasicTypeEnum<'ctx> { - use TypeEnum::*; - // we assume the type cache should already contain primitive types, - // and they should be passed by value instead of passing as pointer. - self.type_cache.get(&ty).cloned().unwrap_or_else(|| match &*self.unifier.get_ty(ty) { - TObj { obj_id, fields, .. } => { - // a struct with fields in the order of declaration - let defs = self.top_level.definitions.read(); - let definition = defs.get(obj_id.0).unwrap(); - let ty = if let TopLevelDef::Class { fields: fields_list, .. } = &*definition.read() - { - let fields = fields.borrow(); - let fields = - fields_list.iter().map(|f| self.get_llvm_type(fields[&f.0])).collect_vec(); - self.ctx.struct_type(&fields, false).ptr_type(AddressSpace::Generic).into() - } else { - unreachable!() - }; - ty - } - TTuple { ty } => { - // a struct with fields in the order present in the tuple - let fields = ty.iter().map(|ty| self.get_llvm_type(*ty)).collect_vec(); - self.ctx.struct_type(&fields, false).ptr_type(AddressSpace::Generic).into() - } - TList { ty } => { - // a struct with an integer and a pointer to an array - let element_type = self.get_llvm_type(*ty); - let fields = [ - self.ctx.i32_type().into(), - element_type.ptr_type(AddressSpace::Generic).into(), - ]; - self.ctx.struct_type(&fields, false).ptr_type(AddressSpace::Generic).into() - } - TVirtual { .. } => unimplemented!(), - _ => unreachable!(), - }) - } - fn gen_symbol_val(&mut self, val: &SymbolValue) -> BasicValueEnum<'ctx> { match val { SymbolValue::I32(v) => self.ctx.i32_type().const_int(*v as u64, true).into(), @@ -113,6 +75,10 @@ impl<'ctx> CodeGenContext<'ctx> { } } + pub fn get_llvm_type(&mut self, ty: Type) -> BasicTypeEnum<'ctx> { + get_llvm_type(self.ctx, &mut self.unifier, self.top_level, &mut self.type_cache, ty) + } + fn gen_call( &mut self, obj: Option<(Type, BasicValueEnum<'ctx>)>, diff --git a/nac3core/src/codegen/mod.rs b/nac3core/src/codegen/mod.rs index 95ee4bbf..c3fba2db 100644 --- a/nac3core/src/codegen/mod.rs +++ b/nac3core/src/codegen/mod.rs @@ -1,2 +1,197 @@ +use crate::{ + symbol_resolver::SymbolResolver, + top_level::{TopLevelContext, TopLevelDef}, + typecheck::{ + type_inferencer::PrimitiveStore, + typedef::{FunSignature, Type, TypeEnum, Unifier}, + }, +}; +use inkwell::{ + basic_block::BasicBlock, + builder::Builder, + context::Context, + module::Module, + types::{BasicType, BasicTypeEnum}, + values::PointerValue, + AddressSpace, +}; +use itertools::Itertools; +use rayon::current_thread_index; +use rustpython_parser::ast::{Stmt, StmtKind}; +use std::collections::HashMap; +use std::sync::Arc; + mod expr; mod stmt; + +pub struct CodeGenContext<'ctx> { + pub ctx: &'ctx Context, + pub builder: Builder<'ctx>, + pub module: Module<'ctx>, + pub top_level: &'ctx TopLevelContext, + pub unifier: Unifier, + pub resolver: Box, + pub var_assignment: HashMap>, + pub type_cache: HashMap>, + pub primitives: PrimitiveStore, + // stores the alloca for variables + pub init_bb: BasicBlock<'ctx>, + // where continue and break should go to respectively + // the first one is the test_bb, and the second one is bb after the loop + pub loop_bb: Option<(BasicBlock<'ctx>, BasicBlock<'ctx>)>, +} + +pub struct CodeGenTask { + pub subst: Vec<(Type, Type)>, + pub symbol_name: String, + pub signature: FunSignature, + pub body: Stmt>, + pub unifier_index: usize, + pub resolver: Box, +} + +fn get_llvm_type<'ctx>( + ctx: &'ctx Context, + unifier: &mut Unifier, + top_level: &TopLevelContext, + type_cache: &mut HashMap>, + ty: Type, +) -> BasicTypeEnum<'ctx> { + use TypeEnum::*; + // we assume the type cache should already contain primitive types, + // and they should be passed by value instead of passing as pointer. + type_cache.get(&ty).cloned().unwrap_or_else(|| match &*unifier.get_ty(ty) { + TObj { obj_id, fields, .. } => { + // a struct with fields in the order of declaration + let defs = top_level.definitions.read(); + let definition = defs.get(obj_id.0).unwrap(); + let ty = if let TopLevelDef::Class { fields: fields_list, .. } = &*definition.read() { + let fields = fields.borrow(); + let fields = fields_list + .iter() + .map(|f| get_llvm_type(ctx, unifier, top_level, type_cache, fields[&f.0])) + .collect_vec(); + ctx.struct_type(&fields, false).ptr_type(AddressSpace::Generic).into() + } else { + unreachable!() + }; + ty + } + TTuple { ty } => { + // a struct with fields in the order present in the tuple + let fields = ty + .iter() + .map(|ty| get_llvm_type(ctx, unifier, top_level, type_cache, *ty)) + .collect_vec(); + ctx.struct_type(&fields, false).ptr_type(AddressSpace::Generic).into() + } + TList { ty } => { + // a struct with an integer and a pointer to an array + let element_type = get_llvm_type(ctx, unifier, top_level, type_cache, *ty); + let fields = + [ctx.i32_type().into(), element_type.ptr_type(AddressSpace::Generic).into()]; + ctx.struct_type(&fields, false).ptr_type(AddressSpace::Generic).into() + } + TVirtual { .. } => unimplemented!(), + _ => unreachable!(), + }) +} + +pub fn gen_func(task: CodeGenTask, top_level_ctx: Arc) { + // unwrap_or(0) is for unit tests without using rayon + let thread_id = current_thread_index().unwrap_or(0); + let (mut unifier, primitives) = { + let unifiers = top_level_ctx.unifiers.read(); + let (unifier, primitives) = &unifiers[task.unifier_index]; + (Unifier::from_shared_unifier(unifier), *primitives) + }; + let contexts = top_level_ctx.conetexts.read(); + let context = contexts[thread_id].lock(); + + for (a, b) in task.subst.iter() { + // this should be unification between variables and concrete types + // and should not cause any problem... + unifier.unify(*a, *b).unwrap(); + } + + // rebuild primitive store with unique representatives + let primitives = PrimitiveStore { + int32: unifier.get_representative(primitives.int32), + int64: unifier.get_representative(primitives.int64), + float: unifier.get_representative(primitives.float), + bool: unifier.get_representative(primitives.bool), + none: unifier.get_representative(primitives.none), + }; + + let mut type_cache: HashMap<_, _> = [ + (primitives.int32, context.i32_type().into()), + (primitives.int64, context.i64_type().into()), + (primitives.float, context.f64_type().into()), + (primitives.bool, context.bool_type().into()), + ] + .iter() + .cloned() + .collect(); + + let params = task + .signature + .args + .iter() + .map(|arg| { + get_llvm_type(&context, &mut unifier, top_level_ctx.as_ref(), &mut type_cache, arg.ty) + }) + .collect_vec(); + + let fn_type = if unifier.unioned(task.signature.ret, primitives.none) { + context.void_type().fn_type(¶ms, false) + } else { + get_llvm_type( + &context, + &mut unifier, + top_level_ctx.as_ref(), + &mut type_cache, + task.signature.ret, + ) + .fn_type(¶ms, false) + }; + + let builder = context.create_builder(); + let module = context.create_module(&task.symbol_name); + let fn_val = module.add_function(&task.symbol_name, fn_type, None); + let init_bb = context.append_basic_block(fn_val, "init"); + builder.position_at_end(init_bb); + let body_bb = context.append_basic_block(fn_val, "body"); + + let mut var_assignment = HashMap::new(); + for (n, arg) in task.signature.args.iter().enumerate() { + let param = fn_val.get_nth_param(n as u32).unwrap(); + let alloca = builder.build_alloca( + get_llvm_type(&context, &mut unifier, top_level_ctx.as_ref(), &mut type_cache, arg.ty), + &arg.name, + ); + builder.build_store(alloca, param); + var_assignment.insert(arg.name.clone(), alloca); + } + builder.build_unconditional_branch(body_bb); + builder.position_at_end(body_bb); + + let mut code_gen_context = CodeGenContext { + ctx: &context, + resolver: task.resolver, + top_level: top_level_ctx.as_ref(), + loop_bb: None, + var_assignment, + type_cache, + primitives, + init_bb, + builder, + module, + unifier, + }; + + if let StmtKind::FunctionDef { body, .. } = &task.body.node { + for stmt in body.iter() { + code_gen_context.gen_stmt(stmt); + } + } +} diff --git a/nac3core/src/codegen/stmt.rs b/nac3core/src/codegen/stmt.rs index f2d7dd33..fa9727f8 100644 --- a/nac3core/src/codegen/stmt.rs +++ b/nac3core/src/codegen/stmt.rs @@ -1,12 +1,14 @@ -use crate::{top_level::CodeGenContext, typecheck::typedef::Type}; -use inkwell::values::{BasicValueEnum, PointerValue}; +use super::CodeGenContext; +use crate::typecheck::typedef::Type; +use inkwell::values::{BasicValue, BasicValueEnum, PointerValue}; use rustpython_parser::ast::{Expr, ExprKind, Stmt, StmtKind}; impl<'ctx> CodeGenContext<'ctx> { fn gen_var(&mut self, ty: Type) -> PointerValue<'ctx> { // put the alloca in init block let current = self.builder.get_insert_block().unwrap(); - self.builder.position_at_end(self.init_bb); + // position before the last branching instruction... + self.builder.position_before(&self.init_bb.get_last_instruction().unwrap()); let ty = self.get_llvm_type(ty); let ptr = self.builder.build_alloca(ty, "tmp"); self.builder.position_at_end(current); @@ -71,6 +73,11 @@ impl<'ctx> CodeGenContext<'ctx> { StmtKind::Expr { value } => { self.gen_expr(&value); } + StmtKind::Return { value } => { + let value = value.as_ref().map(|v| self.gen_expr(&v)); + let value = value.as_ref().map(|v| v as &dyn BasicValue); + self.builder.build_return(value); + } StmtKind::AnnAssign { target, value, .. } => { if let Some(value) = value { let value = self.gen_expr(&value); diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index a5123986..f1ace2d9 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -3,11 +3,8 @@ use std::{collections::HashMap, sync::Arc}; use super::typecheck::type_inferencer::PrimitiveStore; use super::typecheck::typedef::{SharedUnifier, Type, TypeEnum, Unifier}; use crate::symbol_resolver::SymbolResolver; -use inkwell::{ - basic_block::BasicBlock, builder::Builder, context::Context, module::Module, - types::BasicTypeEnum, values::PointerValue, -}; -use parking_lot::RwLock; +use inkwell::context::Context; +use parking_lot::{Mutex, RwLock}; use rustpython_parser::ast::{self, Stmt}; #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy)] @@ -49,33 +46,10 @@ pub enum TopLevelDef { }, } -pub struct CodeGenTask { - pub subst: HashMap, - pub symbol_name: String, - pub body: Stmt>, - pub unifier: SharedUnifier, -} - pub struct TopLevelContext { pub definitions: Arc>>>, - pub unifiers: Arc>>, -} - -pub struct CodeGenContext<'ctx> { - pub ctx: &'ctx Context, - pub builder: Builder<'ctx>, - pub module: Module<'ctx>, - pub top_level: &'ctx TopLevelContext, - pub unifier: Unifier, - pub resolver: Box, - pub var_assignment: HashMap>, - pub type_cache: HashMap>, - pub primitives: PrimitiveStore, - // stores the alloca for variables - pub init_bb: BasicBlock<'ctx>, - // where continue and break should go to respectively - // the first one is the test_bb, and the second one is bb after the loop - pub loop_bb: Option<(BasicBlock<'ctx>, BasicBlock<'ctx>)>, + pub unifiers: Arc>>, + pub conetexts: Arc>>>, } pub struct TopLevelDefInfo<'a> { From 42a636b4cee511703ce8882976066a79f51ab15b Mon Sep 17 00:00:00 2001 From: ychenfo Date: Wed, 11 Aug 2021 15:11:51 +0800 Subject: [PATCH 096/131] add Arc> and change from Box to Arc, need format and cleanup --- nac3core/src/top_level.rs | 83 ++++++++++--------- nac3core/src/typecheck/type_inferencer/mod.rs | 2 +- .../src/typecheck/type_inferencer/test.rs | 8 +- 3 files changed, 47 insertions(+), 46 deletions(-) diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index 6f1dcbb7..856222d4 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -8,7 +8,7 @@ use inkwell::{ basic_block::BasicBlock, builder::Builder, context::Context, module::Module, types::BasicTypeEnum, values::PointerValue, }; -use parking_lot::RwLock; +use parking_lot::{Mutex, RwLock}; use rustpython_parser::ast::{self, Stmt}; #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy)] @@ -26,6 +26,8 @@ pub enum TopLevelDef { methods: Vec<(String, Type, DefinitionId)>, // ancestor classes, including itself. ancestors: Vec, + // symbol resolver of the module defined the class, none if it is built-in type + resolver: Option>> }, Function { // prefix for symbol, should be unique globally, and not ending with numbers @@ -44,6 +46,8 @@ pub enum TopLevelDef { /// Value: AST annotated with types together with a unification table index. Could contain /// rigid type variables that would be substituted when the function is instantiated. instance_to_stmt: HashMap>, usize)>, + // symbol resolver of the module defined the class + resolver: Option>> }, Initializer { class_id: DefinitionId, @@ -80,27 +84,28 @@ pub struct CodeGenContext<'ctx> { } -pub fn name_mangling(mut class_name: String, method_name: &str) -> String { // need to further extend to more name mangling like instantiations of typevar +pub fn name_mangling(mut class_name: String, method_name: &str) -> String { + // need to further extend to more name mangling like instantiations of typevar class_name.push_str(method_name); class_name } -pub struct TopLevelDefInfo<'a> { +pub struct TopLevelDefInfo { // like adding some info on top of the TopLevelDef for later parsing the class bases, method, // and function sigatures def: TopLevelDef, // the definition entry ty: Type, // the entry in the top_level unifier ast: Option>, // the ast submitted by applications, primitives and class methods will have None value here - resolver: Option<&'a dyn SymbolResolver> // the resolver + // resolver: Option<&'a dyn SymbolResolver> // the resolver } -pub struct TopLevelComposer<'a> { - pub definition_list: Vec>, +pub struct TopLevelComposer { + pub definition_list: Vec, pub primitives: PrimitiveStore, pub unifier: Unifier, } -impl<'a> TopLevelComposer<'a> { +impl TopLevelComposer { pub fn make_primitives() -> (PrimitiveStore, Unifier) { let mut unifier = Unifier::new(); let int32 = unifier.add_ty(TypeEnum::TObj { @@ -135,35 +140,30 @@ impl<'a> TopLevelComposer<'a> { pub fn new() -> Self { let primitives = Self::make_primitives(); - let definition_list: Vec> = vec![ + let definition_list: Vec = vec![ TopLevelDefInfo { - def: Self::make_top_level_class_def(0), + def: Self::make_top_level_class_def(0, None), ast: None, - resolver: None, ty: primitives.0.int32, }, TopLevelDefInfo { - def: Self::make_top_level_class_def(1), + def: Self::make_top_level_class_def(1, None), ast: None, - resolver: None, ty: primitives.0.int64, }, TopLevelDefInfo { - def: Self::make_top_level_class_def(2), + def: Self::make_top_level_class_def(2, None), ast: None, - resolver: None, ty: primitives.0.float, }, TopLevelDefInfo { - def: Self::make_top_level_class_def(3), + def: Self::make_top_level_class_def(3, None), ast: None, - resolver: None, ty: primitives.0.bool, }, TopLevelDefInfo { - def: Self::make_top_level_class_def(4), + def: Self::make_top_level_class_def(4, None), ast: None, - resolver: None, ty: primitives.0.none, }, ]; // the entries for primitive types @@ -171,22 +171,24 @@ impl<'a> TopLevelComposer<'a> { } /// already include the definition_id of itself inside the ancestors vector - pub fn make_top_level_class_def(index: usize) -> TopLevelDef { + pub fn make_top_level_class_def(index: usize, resolver: Option>>) -> TopLevelDef { TopLevelDef::Class { object_id: DefinitionId(index), type_vars: Default::default(), fields: Default::default(), methods: Default::default(), ancestors: vec![DefinitionId(index)], + resolver } } - pub fn make_top_level_function_def(name: String, ty: Type) -> TopLevelDef { + pub fn make_top_level_function_def(name: String, ty: Type, resolver: Option>>) -> TopLevelDef { TopLevelDef::Function { name, signature: ty, instance_to_symbol: Default::default(), instance_to_stmt: Default::default(), + resolver } } @@ -205,7 +207,7 @@ impl<'a> TopLevelComposer<'a> { pub fn register_top_level( &mut self, ast: ast::Stmt<()>, - resolver: &'a dyn SymbolResolver, + resolver: Option>> ) -> Result, String> { match &ast.node { ast::StmtKind::ClassDef { name, body, .. } => { @@ -239,8 +241,7 @@ impl<'a> TopLevelComposer<'a> { // add to the definition list self.definition_list.push( TopLevelDefInfo { - def: Self::make_top_level_function_def(fun_name.clone(), ty), - resolver: Some(resolver), + def: Self::make_top_level_function_def(fun_name.clone(), ty, None), // FIXME: ty, ast: None // since it is inside the class def body statments } @@ -257,7 +258,6 @@ impl<'a> TopLevelComposer<'a> { }, ty: self.primitives.none, // arbitary picked one ast: None, // it is inside the class def body statments - resolver: Some(resolver) } ) // FIXME: should we return this to the symbol resolver?, should be yes @@ -267,8 +267,7 @@ impl<'a> TopLevelComposer<'a> { // add to the definition list self.definition_list.push( TopLevelDefInfo { - def: Self::make_top_level_class_def(class_def_id), - resolver: Some(resolver), + def: Self::make_top_level_class_def(class_def_id, resolver), ast: Some(ast), ty, } @@ -291,9 +290,9 @@ impl<'a> TopLevelComposer<'a> { self.definition_list.push(TopLevelDefInfo { def: Self::make_top_level_function_def( name.into(), - self.primitives.none + self.primitives.none, + resolver ), - resolver: Some(resolver), ast: Some(ast), ty, }); @@ -308,7 +307,7 @@ impl<'a> TopLevelComposer<'a> { /// this should be called after all top level classes are registered, and will actually fill in those fields of the previous dummy one pub fn analyze_top_level(&mut self) -> Result<(), String> { for mut d in &mut self.definition_list { - if let (Some(ast), Some(resolver)) = (&d.ast, d.resolver) { + if let Some(ast) = &d.ast { match &ast.node { ast::StmtKind::ClassDef { bases, @@ -320,15 +319,17 @@ impl<'a> TopLevelComposer<'a> { ancestors, fields, methods, - type_vars + type_vars, + // resolver, ) = if let TopLevelDef::Class { object_id, ancestors, fields, methods, - type_vars + type_vars, + resolver } = &mut d.def { - (object_id, ancestors, fields, methods, type_vars) + (object_id, ancestors, fields, methods, type_vars) // FIXME: this unwrap is not safe } else { unreachable!() }; // try to get mutable reference of the entry in the unification table, get the `TypeEnum` @@ -361,7 +362,7 @@ impl<'a> TopLevelComposer<'a> { match &e.node { ast::ExprKind::Name {id, ..} => { // the def_list - type_vars.push(resolver.get_symbol_type(id).ok_or_else(|| "unknown type variable".to_string())?); + // type_vars.push(resolver.get_symbol_type(id).ok_or_else(|| "unknown type variable".to_string())?); FIXME: // the TypeEnum of the class // FIXME: the `params` destructed above is not mutable, even if this is mutable, what should the key be? @@ -376,7 +377,7 @@ impl<'a> TopLevelComposer<'a> { // `class Foo(Generic[T]):` ast::ExprKind::Name {id, ..} => { // the def_list - type_vars.push(resolver.get_symbol_type(id).ok_or_else(|| "unknown type variable".to_string())?); + // type_vars.push(resolver.get_symbol_type(id).ok_or_else(|| "unknown type variable".to_string())?); FIXME: // the TypeEnum of the class // FIXME: the `params` destructed above is not mutable, even if this is mutable, what should the key be? @@ -396,20 +397,20 @@ impl<'a> TopLevelComposer<'a> { // base class, name directly available inside the // module, can use this module's symbol resolver ast::ExprKind::Name {id, ..} => { - let def_id = resolver.get_identifier_def(id); + // let def_id = resolver.get_identifier_def(id); FIXME: // the definition list - ancestors.push(def_id); + // ancestors.push(def_id); }, // base class, things can be like `class A(BaseModule.Base)`, here we have to get the // symbol resolver of the module `BaseModule`? ast::ExprKind::Attribute {value, attr, ..} => { if let ast::ExprKind::Name {id, ..} = &value.node { - if let Some(base_module_resolver) = resolver.get_module_resolver(id) { - let def_id = base_module_resolver.get_identifier_def(attr); - // the definition list - ancestors.push(def_id); - } else { return Err("unkown imported module".into()) } + // if let Some(base_module_resolver) = resolver.get_module_resolver(id) { + // let def_id = base_module_resolver.get_identifier_def(attr); + // // the definition list + // ancestors.push(def_id); + // } else { return Err("unkown imported module".into()) } FIXME: } else { return Err("unkown imported module".into()) } }, diff --git a/nac3core/src/typecheck/type_inferencer/mod.rs b/nac3core/src/typecheck/type_inferencer/mod.rs index 7f5bcbc8..d81c0d02 100644 --- a/nac3core/src/typecheck/type_inferencer/mod.rs +++ b/nac3core/src/typecheck/type_inferencer/mod.rs @@ -38,7 +38,7 @@ pub struct PrimitiveStore { } pub struct FunctionData { - pub resolver: Box, + pub resolver: Arc, pub return_type: Option, pub bound_variables: Vec, } diff --git a/nac3core/src/typecheck/type_inferencer/test.rs b/nac3core/src/typecheck/type_inferencer/test.rs index f4d2cd8d..44775c80 100644 --- a/nac3core/src/typecheck/type_inferencer/test.rs +++ b/nac3core/src/typecheck/type_inferencer/test.rs @@ -100,10 +100,10 @@ impl TestEnvironment { let mut identifier_mapping = HashMap::new(); identifier_mapping.insert("None".into(), none); - let resolver = Box::new(Resolver { + let resolver = Arc::new(Resolver { identifier_mapping: identifier_mapping.clone(), class_names: Default::default(), - }) as Box; + }) as Arc; TestEnvironment { unifier, @@ -226,8 +226,8 @@ impl TestEnvironment { .collect(); let resolver = - Box::new(Resolver { identifier_mapping: identifier_mapping.clone(), class_names }) - as Box; + Arc::new(Resolver { identifier_mapping: identifier_mapping.clone(), class_names }) + as Arc; TestEnvironment { unifier, From 99276c8f31bed7f275713e3dcc8c247a43437679 Mon Sep 17 00:00:00 2001 From: ychenfo Date: Wed, 11 Aug 2021 15:18:21 +0800 Subject: [PATCH 097/131] formatted --- nac3core/src/symbol_resolver.rs | 4 +- nac3core/src/top_level.rs | 159 ++++++++------- nac3core/src/typecheck/magic_methods.rs | 261 +++++++++++++----------- 3 files changed, 224 insertions(+), 200 deletions(-) diff --git a/nac3core/src/symbol_resolver.rs b/nac3core/src/symbol_resolver.rs index 7d34423f..8efcd435 100644 --- a/nac3core/src/symbol_resolver.rs +++ b/nac3core/src/symbol_resolver.rs @@ -1,6 +1,6 @@ use crate::location::Location; -use crate::typecheck::typedef::Type; use crate::top_level::DefinitionId; +use crate::typecheck::typedef::Type; use rustpython_parser::ast::Expr; #[derive(Clone, PartialEq)] @@ -21,5 +21,5 @@ pub trait SymbolResolver { fn get_symbol_value(&self, str: &str) -> Option; fn get_symbol_location(&self, str: &str) -> Option; fn get_module_resolver(&self, module_name: &str) -> Option<&dyn SymbolResolver>; // NOTE: for getting imported modules' symbol resolver? - // handle function call etc. + // handle function call etc. } diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index 856222d4..ea482555 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -27,7 +27,7 @@ pub enum TopLevelDef { // ancestor classes, including itself. ancestors: Vec, // symbol resolver of the module defined the class, none if it is built-in type - resolver: Option>> + resolver: Option>>, }, Function { // prefix for symbol, should be unique globally, and not ending with numbers @@ -47,7 +47,7 @@ pub enum TopLevelDef { /// rigid type variables that would be substituted when the function is instantiated. instance_to_stmt: HashMap>, usize)>, // symbol resolver of the module defined the class - resolver: Option>> + resolver: Option>>, }, Initializer { class_id: DefinitionId, @@ -83,20 +83,19 @@ pub struct CodeGenContext<'ctx> { pub loop_bb: Option<(BasicBlock<'ctx>, BasicBlock<'ctx>)>, } - pub fn name_mangling(mut class_name: String, method_name: &str) -> String { // need to further extend to more name mangling like instantiations of typevar class_name.push_str(method_name); class_name } -pub struct TopLevelDefInfo { +pub struct TopLevelDefInfo { // like adding some info on top of the TopLevelDef for later parsing the class bases, method, // and function sigatures - def: TopLevelDef, // the definition entry - ty: Type, // the entry in the top_level unifier - ast: Option>, // the ast submitted by applications, primitives and class methods will have None value here - // resolver: Option<&'a dyn SymbolResolver> // the resolver + def: TopLevelDef, // the definition entry + ty: Type, // the entry in the top_level unifier + ast: Option>, // the ast submitted by applications, primitives and class methods will have None value here + // resolver: Option<&'a dyn SymbolResolver> // the resolver } pub struct TopLevelComposer { @@ -171,28 +170,35 @@ impl TopLevelComposer { } /// already include the definition_id of itself inside the ancestors vector - pub fn make_top_level_class_def(index: usize, resolver: Option>>) -> TopLevelDef { + pub fn make_top_level_class_def( + index: usize, + resolver: Option>>, + ) -> TopLevelDef { TopLevelDef::Class { object_id: DefinitionId(index), type_vars: Default::default(), fields: Default::default(), methods: Default::default(), ancestors: vec![DefinitionId(index)], - resolver + resolver, } } - pub fn make_top_level_function_def(name: String, ty: Type, resolver: Option>>) -> TopLevelDef { + pub fn make_top_level_function_def( + name: String, + ty: Type, + resolver: Option>>, + ) -> TopLevelDef { TopLevelDef::Function { name, signature: ty, instance_to_symbol: Default::default(), instance_to_stmt: Default::default(), - resolver + resolver, } } - // like to make and return a "primitive" symbol resolver? so that the symbol resolver + // like to make and return a "primitive" symbol resolver? so that the symbol resolver // can later figure out primitive type definitions when passed a primitive type name pub fn get_primitives_definition(&self) -> Vec<(String, DefinitionId, Type)> { vec![ @@ -207,13 +213,13 @@ impl TopLevelComposer { pub fn register_top_level( &mut self, ast: ast::Stmt<()>, - resolver: Option>> + resolver: Option>>, ) -> Result, String> { match &ast.node { ast::StmtKind::ClassDef { name, body, .. } => { let class_name = name.to_string(); let class_def_id = self.definition_list.len(); - + // add the class to the unifier let ty = self.unifier.add_ty(TypeEnum::TObj { obj_id: DefinitionId(class_def_id), @@ -221,60 +227,58 @@ impl TopLevelComposer { params: Default::default(), }); - let mut ret_vector: Vec<(String, DefinitionId, Type)> = vec![(class_name.clone(), DefinitionId(class_def_id), ty)]; + let mut ret_vector: Vec<(String, DefinitionId, Type)> = + vec![(class_name.clone(), DefinitionId(class_def_id), ty)]; // parse class def body and register class methods into the def list - // NOTE: module's symbol resolver would not know the name of the class methods, - // thus cannot return their definition_id? so we have to manage it ourselves? - // or do we return the class method list of (method_name, def_id, type) to + // NOTE: module's symbol resolver would not know the name of the class methods, + // thus cannot return their definition_id? so we have to manage it ourselves? + // or do we return the class method list of (method_name, def_id, type) to // application to be used to build symbol resolver? <- current implementation // FIXME: better do not return and let symbol resolver to manage the mangled name for b in body { - if let ast::StmtKind::FunctionDef {name, ..} = &b.node { + if let ast::StmtKind::FunctionDef { name, .. } = &b.node { let fun_name = name_mangling(class_name.clone(), name); let def_id = self.definition_list.len(); // add to unifier - let ty = self.unifier.add_ty(TypeEnum::TFunc(crate::typecheck::typedef::FunSignature { - args: Default::default(), - ret: self.primitives.none, - vars: Default::default() - })); + let ty = self.unifier.add_ty(TypeEnum::TFunc( + crate::typecheck::typedef::FunSignature { + args: Default::default(), + ret: self.primitives.none, + vars: Default::default(), + }, + )); // add to the definition list - self.definition_list.push( - TopLevelDefInfo { - def: Self::make_top_level_function_def(fun_name.clone(), ty, None), // FIXME: - ty, - ast: None // since it is inside the class def body statments - } - ); + self.definition_list.push(TopLevelDefInfo { + def: Self::make_top_level_function_def(fun_name.clone(), ty, None), // FIXME: + ty, + ast: None, // since it is inside the class def body statments + }); ret_vector.push((fun_name, DefinitionId(def_id), ty)); - + // if it is the contructor, special handling is needed. In the above // handling, we still add __init__ function to the class method if name == "__init__" { - self.definition_list.push( - TopLevelDefInfo { - def: TopLevelDef::Initializer { - class_id: DefinitionId(class_def_id) - }, - ty: self.primitives.none, // arbitary picked one - ast: None, // it is inside the class def body statments - } - ) + self.definition_list.push(TopLevelDefInfo { + def: TopLevelDef::Initializer { + class_id: DefinitionId(class_def_id), + }, + ty: self.primitives.none, // arbitary picked one + ast: None, // it is inside the class def body statments + }) // FIXME: should we return this to the symbol resolver?, should be yes } - } else { } // else do nothing + } else { + } // else do nothing } // add to the definition list - self.definition_list.push( - TopLevelDefInfo { - def: Self::make_top_level_class_def(class_def_id, resolver), - ast: Some(ast), - ty, - } - ); - + self.definition_list.push(TopLevelDefInfo { + def: Self::make_top_level_class_def(class_def_id, resolver), + ast: Some(ast), + ty, + }); + Ok(ret_vector) - }, + } ast::StmtKind::FunctionDef { name, .. } => { let fun_name = name.to_string(); @@ -283,18 +287,18 @@ impl TopLevelComposer { let ty = self.unifier.add_ty(TypeEnum::TFunc(crate::typecheck::typedef::FunSignature { args: Default::default(), - ret: self.primitives.none, - vars: Default::default() - })); + ret: self.primitives.none, + vars: Default::default(), + })); // add to the definition list self.definition_list.push(TopLevelDefInfo { - def: Self::make_top_level_function_def( - name.into(), - self.primitives.none, - resolver - ), - ast: Some(ast), - ty, + def: Self::make_top_level_function_def( + name.into(), + self.primitives.none, + resolver, + ), + ast: Some(ast), + ty, }); Ok(vec![(fun_name, DefinitionId(def_id), ty)]) @@ -462,20 +466,25 @@ impl TopLevelComposer { } } - - -pub fn parse_type_var(input: &ast::Expr, resolver: &dyn SymbolResolver) -> Result { +pub fn parse_type_var( + input: &ast::Expr, + resolver: &dyn SymbolResolver, +) -> Result { match &input.node { - ast::ExprKind::Name {id, ..} => { - resolver.get_symbol_type(id).ok_or_else(|| "unknown type variable identifer".to_string()) - }, + ast::ExprKind::Name { id, .. } => resolver + .get_symbol_type(id) + .ok_or_else(|| "unknown type variable identifer".to_string()), - ast::ExprKind::Attribute {value, attr, ..} => { - if let ast::ExprKind::Name {id, ..} = &value.node { - let next_resolver = resolver.get_module_resolver(id).ok_or_else(|| "unknown imported module".to_string())?; - next_resolver.get_symbol_type(attr).ok_or_else(|| "unknown type variable identifer".to_string()) + ast::ExprKind::Attribute { value, attr, .. } => { + if let ast::ExprKind::Name { id, .. } = &value.node { + let next_resolver = resolver + .get_module_resolver(id) + .ok_or_else(|| "unknown imported module".to_string())?; + next_resolver + .get_symbol_type(attr) + .ok_or_else(|| "unknown type variable identifer".to_string()) } else { - unimplemented!() + unimplemented!() // recursively resolve attr thing, FIXME: new problem: how do we handle this? // # A.py // class A: @@ -486,8 +495,8 @@ pub fn parse_type_var(input: &ast::Expr, resolver: &dyn SymbolResolver) -> // class B(Generic[A.A.T]): // pass } - }, + } - _ => Err("not supported".into()) + _ => Err("not supported".into()), } } diff --git a/nac3core/src/typecheck/magic_methods.rs b/nac3core/src/typecheck/magic_methods.rs index 9d374394..29615aa2 100644 --- a/nac3core/src/typecheck/magic_methods.rs +++ b/nac3core/src/typecheck/magic_methods.rs @@ -1,8 +1,11 @@ +use crate::typecheck::{ + type_inferencer::*, + typedef::{FunSignature, FuncArg, Type, TypeEnum, Unifier}, +}; +use rustpython_parser::ast; +use rustpython_parser::ast::{Cmpop, Operator, Unaryop}; use std::borrow::Borrow; use std::collections::HashMap; -use rustpython_parser::ast::{Cmpop, Operator, Unaryop}; -use crate::typecheck::{type_inferencer::*, typedef::{FunSignature, FuncArg, TypeEnum, Unifier, Type}}; -use rustpython_parser::ast; pub fn binop_name(op: &Operator) -> &'static str { match op { @@ -42,206 +45,218 @@ pub fn binop_assign_name(op: &Operator) -> &'static str { pub fn unaryop_name(op: &Unaryop) -> &'static str { match op { - Unaryop::UAdd => "__pos__", - Unaryop::USub => "__neg__", - Unaryop::Not => "__not__", + Unaryop::UAdd => "__pos__", + Unaryop::USub => "__neg__", + Unaryop::Not => "__not__", Unaryop::Invert => "__inv__", } } pub fn comparison_name(op: &Cmpop) -> Option<&'static str> { match op { - Cmpop::Lt => Some("__lt__"), - Cmpop::LtE => Some("__le__"), - Cmpop::Gt => Some("__gt__"), - Cmpop::GtE => Some("__ge__"), - Cmpop::Eq => Some("__eq__"), + Cmpop::Lt => Some("__lt__"), + Cmpop::LtE => Some("__le__"), + Cmpop::Gt => Some("__gt__"), + Cmpop::GtE => Some("__ge__"), + Cmpop::Eq => Some("__eq__"), Cmpop::NotEq => Some("__ne__"), _ => None, } } -pub fn impl_binop(unifier: &mut Unifier, _store: &PrimitiveStore, ty: Type, other_ty: &[Type], ret_ty: Type, ops: &[ast::Operator]) { - if let TypeEnum::TObj {fields, ..} = unifier.get_ty(ty).borrow() { +pub fn impl_binop( + unifier: &mut Unifier, + _store: &PrimitiveStore, + ty: Type, + other_ty: &[Type], + ret_ty: Type, + ops: &[ast::Operator], +) { + if let TypeEnum::TObj { fields, .. } = unifier.get_ty(ty).borrow() { for op in ops { - fields.borrow_mut().insert( - binop_name(op).into(), - { - let other = if other_ty.len() == 1 { - other_ty[0] - } else { - unifier.get_fresh_var_with_range(other_ty).0 - }; - unifier.add_ty(TypeEnum::TFunc(FunSignature { - ret: ret_ty, - vars: HashMap::new(), - args: vec![FuncArg { - ty: other, - default_value: None, - name: "other".into() - }] - })) - } - ); + fields.borrow_mut().insert(binop_name(op).into(), { + let other = if other_ty.len() == 1 { + other_ty[0] + } else { + unifier.get_fresh_var_with_range(other_ty).0 + }; + unifier.add_ty(TypeEnum::TFunc(FunSignature { + ret: ret_ty, + vars: HashMap::new(), + args: vec![FuncArg { ty: other, default_value: None, name: "other".into() }], + })) + }); - fields.borrow_mut().insert( - binop_assign_name(op).into(), - { - let other = if other_ty.len() == 1 { - other_ty[0] - } else { - unifier.get_fresh_var_with_range(other_ty).0 - }; - unifier.add_ty(TypeEnum::TFunc(FunSignature { - ret: ret_ty, - vars: HashMap::new(), - args: vec![FuncArg { - ty: other, - default_value: None, - name: "other".into() - }] - })) - } - ); + fields.borrow_mut().insert(binop_assign_name(op).into(), { + let other = if other_ty.len() == 1 { + other_ty[0] + } else { + unifier.get_fresh_var_with_range(other_ty).0 + }; + unifier.add_ty(TypeEnum::TFunc(FunSignature { + ret: ret_ty, + vars: HashMap::new(), + args: vec![FuncArg { ty: other, default_value: None, name: "other".into() }], + })) + }); } - } else { unreachable!("") } + } else { + unreachable!("") + } } -pub fn impl_unaryop(unifier: &mut Unifier, _store: &PrimitiveStore, ty: Type, ret_ty: Type, ops: &[ast::Unaryop]) { - if let TypeEnum::TObj {fields, ..} = unifier.get_ty(ty).borrow() { +pub fn impl_unaryop( + unifier: &mut Unifier, + _store: &PrimitiveStore, + ty: Type, + ret_ty: Type, + ops: &[ast::Unaryop], +) { + if let TypeEnum::TObj { fields, .. } = unifier.get_ty(ty).borrow() { for op in ops { fields.borrow_mut().insert( unaryop_name(op).into(), unifier.add_ty(TypeEnum::TFunc(FunSignature { ret: ret_ty, vars: HashMap::new(), - args: vec![] - })) + args: vec![], + })), ); } - } else { unreachable!() } + } else { + unreachable!() + } } -pub fn impl_cmpop(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: Type, ops: &[ast::Cmpop]) { - if let TypeEnum::TObj {fields, ..} = unifier.get_ty(ty).borrow() { +pub fn impl_cmpop( + unifier: &mut Unifier, + store: &PrimitiveStore, + ty: Type, + other_ty: Type, + ops: &[ast::Cmpop], +) { + if let TypeEnum::TObj { fields, .. } = unifier.get_ty(ty).borrow() { for op in ops { fields.borrow_mut().insert( comparison_name(op).unwrap().into(), unifier.add_ty(TypeEnum::TFunc(FunSignature { ret: store.bool, vars: HashMap::new(), - args: vec![FuncArg { - ty: other_ty, - default_value: None, - name: "other".into() - }] - })) + args: vec![FuncArg { ty: other_ty, default_value: None, name: "other".into() }], + })), ); } - } else { unreachable!() } + } else { + unreachable!() + } } /// Add, Sub, Mult, Pow -pub fn impl_basic_arithmetic(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: &[Type], ret_ty: Type) { - impl_binop(unifier, store, ty, other_ty, ret_ty, &[ - ast::Operator::Add, - ast::Operator::Sub, - ast::Operator::Mult, - ]) +pub fn impl_basic_arithmetic( + unifier: &mut Unifier, + store: &PrimitiveStore, + ty: Type, + other_ty: &[Type], + ret_ty: Type, +) { + impl_binop( + unifier, + store, + ty, + other_ty, + ret_ty, + &[ast::Operator::Add, ast::Operator::Sub, ast::Operator::Mult], + ) } -pub fn impl_pow(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: &[Type], ret_ty: Type) { - impl_binop(unifier, store, ty, other_ty, ret_ty, &[ - ast::Operator::Pow, - ]) +pub fn impl_pow( + unifier: &mut Unifier, + store: &PrimitiveStore, + ty: Type, + other_ty: &[Type], + ret_ty: Type, +) { + impl_binop(unifier, store, ty, other_ty, ret_ty, &[ast::Operator::Pow]) } /// BitOr, BitXor, BitAnd pub fn impl_bitwise_arithmetic(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type) { - impl_binop(unifier, store, ty, &[ty], ty, &[ - ast::Operator::BitAnd, - ast::Operator::BitOr, - ast::Operator::BitXor, - ]) + impl_binop( + unifier, + store, + ty, + &[ty], + ty, + &[ast::Operator::BitAnd, ast::Operator::BitOr, ast::Operator::BitXor], + ) } /// LShift, RShift pub fn impl_bitwise_shift(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type) { - impl_binop(unifier, store, ty, &[ty], ty, &[ - ast::Operator::LShift, - ast::Operator::RShift, - ]) + impl_binop(unifier, store, ty, &[ty], ty, &[ast::Operator::LShift, ast::Operator::RShift]) } /// Div pub fn impl_div(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: &[Type]) { - impl_binop(unifier, store, ty, other_ty, store.float, &[ - ast::Operator::Div, - ]) + impl_binop(unifier, store, ty, other_ty, store.float, &[ast::Operator::Div]) } /// FloorDiv -pub fn impl_floordiv(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: &[Type], ret_ty: Type) { - impl_binop(unifier, store, ty, other_ty, ret_ty, &[ - ast::Operator::FloorDiv, - ]) +pub fn impl_floordiv( + unifier: &mut Unifier, + store: &PrimitiveStore, + ty: Type, + other_ty: &[Type], + ret_ty: Type, +) { + impl_binop(unifier, store, ty, other_ty, ret_ty, &[ast::Operator::FloorDiv]) } /// Mod -pub fn impl_mod(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: &[Type], ret_ty: Type) { - impl_binop(unifier, store, ty, other_ty, ret_ty, &[ - ast::Operator::Mod, - ]) +pub fn impl_mod( + unifier: &mut Unifier, + store: &PrimitiveStore, + ty: Type, + other_ty: &[Type], + ret_ty: Type, +) { + impl_binop(unifier, store, ty, other_ty, ret_ty, &[ast::Operator::Mod]) } /// UAdd, USub pub fn impl_sign(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type) { - impl_unaryop(unifier, store, ty, ty, &[ - ast::Unaryop::UAdd, - ast::Unaryop::USub, - ]) + impl_unaryop(unifier, store, ty, ty, &[ast::Unaryop::UAdd, ast::Unaryop::USub]) } /// Invert pub fn impl_invert(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type) { - impl_unaryop(unifier, store, ty, ty, &[ - ast::Unaryop::Invert, - ]) + impl_unaryop(unifier, store, ty, ty, &[ast::Unaryop::Invert]) } /// Not pub fn impl_not(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type) { - impl_unaryop(unifier, store, ty, store.bool, &[ - ast::Unaryop::Not, - ]) + impl_unaryop(unifier, store, ty, store.bool, &[ast::Unaryop::Not]) } /// Lt, LtE, Gt, GtE pub fn impl_comparison(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type, other_ty: Type) { - impl_cmpop(unifier, store, ty, other_ty, &[ - ast::Cmpop::Lt, - ast::Cmpop::Gt, - ast::Cmpop::LtE, - ast::Cmpop::GtE, - ]) + impl_cmpop( + unifier, + store, + ty, + other_ty, + &[ast::Cmpop::Lt, ast::Cmpop::Gt, ast::Cmpop::LtE, ast::Cmpop::GtE], + ) } /// Eq, NotEq pub fn impl_eq(unifier: &mut Unifier, store: &PrimitiveStore, ty: Type) { - impl_cmpop(unifier, store, ty, ty, &[ - ast::Cmpop::Eq, - ast::Cmpop::NotEq, - ]) + impl_cmpop(unifier, store, ty, ty, &[ast::Cmpop::Eq, ast::Cmpop::NotEq]) } pub fn set_primitives_magic_methods(store: &PrimitiveStore, unifier: &mut Unifier) { - let PrimitiveStore { - int32: int32_t, - int64: int64_t, - float: float_t, - bool: bool_t, - .. - } = *store; + let PrimitiveStore { int32: int32_t, int64: int64_t, float: float_t, bool: bool_t, .. } = + *store; /* int32 ======== */ impl_basic_arithmetic(unifier, store, int32_t, &[int32_t], int32_t); impl_pow(unifier, store, int32_t, &[int32_t], int32_t); From de8b67b6056697306fe01a6479adb2fdcbdca797 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Wed, 11 Aug 2021 17:28:29 +0800 Subject: [PATCH 098/131] refactored symbol resolver --- nac3core/src/symbol_resolver.rs | 131 +++++++++++++++++- nac3core/src/top_level.rs | 34 ----- nac3core/src/typecheck/type_inferencer/mod.rs | 40 ++++-- .../src/typecheck/type_inferencer/test.rs | 94 ++++++++++--- nac3core/src/typecheck/typedef/mod.rs | 2 +- 5 files changed, 221 insertions(+), 80 deletions(-) diff --git a/nac3core/src/symbol_resolver.rs b/nac3core/src/symbol_resolver.rs index 8efcd435..aa01c7dc 100644 --- a/nac3core/src/symbol_resolver.rs +++ b/nac3core/src/symbol_resolver.rs @@ -1,6 +1,13 @@ -use crate::location::Location; -use crate::top_level::DefinitionId; -use crate::typecheck::typedef::Type; +use std::cell::RefCell; +use std::collections::HashMap; + +use crate::top_level::{DefinitionId, TopLevelContext, TopLevelDef}; +use crate::typecheck::{ + type_inferencer::PrimitiveStore, + typedef::{Type, Unifier}, +}; +use crate::{location::Location, typecheck::typedef::TypeEnum}; +use itertools::{chain, izip}; use rustpython_parser::ast::Expr; #[derive(Clone, PartialEq)] @@ -15,11 +22,121 @@ pub enum SymbolValue { } pub trait SymbolResolver { - fn get_symbol_type(&self, str: &str) -> Option; - fn parse_type_name(&self, expr: &Expr<()>) -> Option; + fn get_symbol_type( + &self, + unifier: &mut Unifier, + primitives: &PrimitiveStore, + str: &str, + ) -> Option; fn get_identifier_def(&self, str: &str) -> DefinitionId; fn get_symbol_value(&self, str: &str) -> Option; fn get_symbol_location(&self, str: &str) -> Option; - fn get_module_resolver(&self, module_name: &str) -> Option<&dyn SymbolResolver>; // NOTE: for getting imported modules' symbol resolver? - // handle function call etc. + // handle function call etc. +} + +impl dyn SymbolResolver { + pub fn parse_type_annotation( + &self, + top_level: &TopLevelContext, + unifier: &mut Unifier, + primitives: &PrimitiveStore, + expr: &Expr, + ) -> Result { + use rustpython_parser::ast::ExprKind::*; + match &expr.node { + Name { id, .. } => match id.as_str() { + "int32" => Ok(primitives.int32), + "int64" => Ok(primitives.int64), + "float" => Ok(primitives.float), + "bool" => Ok(primitives.bool), + "None" => Ok(primitives.none), + x => { + let obj_id = self.get_identifier_def(x); + let defs = top_level.definitions.read(); + let def = defs[obj_id.0].read(); + if let TopLevelDef::Class { fields, methods, type_vars, .. } = &*def { + if !type_vars.is_empty() { + return Err(format!( + "Unexpected number of type parameters: expected {} but got 0", + type_vars.len() + )); + } + let fields = RefCell::new( + chain( + fields.iter().map(|(k, v)| (k.clone(), *v)), + methods.iter().map(|(k, v, _)| (k.clone(), *v)), + ) + .collect(), + ); + Ok(unifier.add_ty(TypeEnum::TObj { + obj_id, + fields, + params: Default::default(), + })) + } else { + Err("Cannot use function name as type".into()) + } + } + }, + Subscript { value, slice, .. } => { + if let Name { id, .. } = &value.node { + if id == "virtual" { + let ty = + self.parse_type_annotation(top_level, unifier, primitives, slice)?; + Ok(unifier.add_ty(TypeEnum::TVirtual { ty })) + } else { + let types = if let Tuple { elts, .. } = &slice.node { + elts.iter() + .map(|v| { + self.parse_type_annotation(top_level, unifier, primitives, v) + }) + .collect::, _>>()? + } else { + vec![self.parse_type_annotation(top_level, unifier, primitives, slice)?] + }; + + let obj_id = self.get_identifier_def(id); + let defs = top_level.definitions.read(); + let def = defs[obj_id.0].read(); + if let TopLevelDef::Class { fields, methods, type_vars, .. } = &*def { + if types.len() != type_vars.len() { + return Err(format!( + "Unexpected number of type parameters: expected {} but got {}", + type_vars.len(), + types.len() + )); + } + let mut subst = HashMap::new(); + for (var, ty) in izip!(type_vars.iter(), types.iter()) { + let id = if let TypeEnum::TVar { id, .. } = &*unifier.get_ty(*var) { + *id + } else { + unreachable!() + }; + subst.insert(id, *ty); + } + let mut fields = fields + .iter() + .map(|(attr, ty)| { + let ty = unifier.subst(*ty, &subst).unwrap_or(*ty); + (attr.clone(), ty) + }) + .collect::>(); + fields.extend(methods.iter().map(|(attr, ty, _)| { + let ty = unifier.subst(*ty, &subst).unwrap_or(*ty); + (attr.clone(), ty) + })); + let fields = RefCell::new(fields); + Ok(unifier.add_ty(TypeEnum::TObj { obj_id, fields, params: subst })) + } else { + Err("Cannot use function name as type".into()) + } + } + } else { + Err("unsupported type expression".into()) + } + } + _ => Err("unsupported type expression".into()), + } + } } diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index 9c607955..5497e90d 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -440,37 +440,3 @@ impl TopLevelComposer { } } -pub fn parse_type_var( - input: &ast::Expr, - resolver: &dyn SymbolResolver, -) -> Result { - match &input.node { - ast::ExprKind::Name { id, .. } => resolver - .get_symbol_type(id) - .ok_or_else(|| "unknown type variable identifer".to_string()), - - ast::ExprKind::Attribute { value, attr, .. } => { - if let ast::ExprKind::Name { id, .. } = &value.node { - let next_resolver = resolver - .get_module_resolver(id) - .ok_or_else(|| "unknown imported module".to_string())?; - next_resolver - .get_symbol_type(attr) - .ok_or_else(|| "unknown type variable identifer".to_string()) - } else { - unimplemented!() - // recursively resolve attr thing, FIXME: new problem: how do we handle this? - // # A.py - // class A: - // T = TypeVar('T', int, bool) - // pass - // # B.py - // import A - // class B(Generic[A.A.T]): - // pass - } - } - - _ => Err("not supported".into()), - } -} diff --git a/nac3core/src/typecheck/type_inferencer/mod.rs b/nac3core/src/typecheck/type_inferencer/mod.rs index d81c0d02..cdd83a5b 100644 --- a/nac3core/src/typecheck/type_inferencer/mod.rs +++ b/nac3core/src/typecheck/type_inferencer/mod.rs @@ -5,7 +5,7 @@ use std::{cell::RefCell, sync::Arc}; use super::magic_methods::*; use super::typedef::{Call, FunSignature, FuncArg, Type, TypeEnum, Unifier}; -use crate::symbol_resolver::SymbolResolver; +use crate::{symbol_resolver::SymbolResolver, top_level::TopLevelContext}; use itertools::izip; use rustpython_parser::ast::{ self, @@ -44,6 +44,7 @@ pub struct FunctionData { } pub struct Inferencer<'a> { + pub top_level: &'a TopLevelContext, pub function_data: &'a mut FunctionData, pub unifier: &'a mut Unifier, pub primitives: &'a PrimitiveStore, @@ -81,11 +82,12 @@ impl<'a> fold::Fold<()> for Inferencer<'a> { } else { None }; - let annotation_type = self - .function_data - .resolver - .parse_type_name(annotation.as_ref()) - .ok_or_else(|| "cannot parse type name".to_string())?; + let annotation_type = self.function_data.resolver.parse_type_annotation( + self.top_level, + self.unifier, + &self.primitives, + annotation.as_ref(), + )?; self.unifier.unify(annotation_type, target.custom.unwrap())?; let annotation = Box::new(NaiveFolder().fold_expr(*annotation)?); Located { @@ -235,6 +237,7 @@ impl<'a> Inferencer<'a> { primitives: self.primitives, virtual_checks: self.virtual_checks, calls: self.calls, + top_level: self.top_level, variable_mapping, }; let fun = FunSignature { @@ -275,6 +278,7 @@ impl<'a> Inferencer<'a> { function_data: self.function_data, unifier: self.unifier, virtual_checks: self.virtual_checks, + top_level: self.top_level, variable_mapping, primitives: self.primitives, calls: self.calls, @@ -336,10 +340,12 @@ impl<'a> Inferencer<'a> { } let arg0 = self.fold_expr(args.remove(0))?; let ty = if let Some(arg) = args.pop() { - self.function_data - .resolver - .parse_type_name(&arg) - .ok_or_else(|| "error parsing type".to_string())? + self.function_data.resolver.parse_type_annotation( + self.top_level, + self.unifier, + self.primitives, + &arg, + )? } else { self.unifier.get_fresh_var().0 }; @@ -412,11 +418,15 @@ impl<'a> Inferencer<'a> { if let Some(ty) = self.variable_mapping.get(id) { Ok(*ty) } else { - Ok(self.function_data.resolver.get_symbol_type(id).unwrap_or_else(|| { - let ty = self.unifier.get_fresh_var().0; - self.variable_mapping.insert(id.to_string(), ty); - ty - })) + Ok(self + .function_data + .resolver + .get_symbol_type(self.unifier, self.primitives, id) + .unwrap_or_else(|| { + let ty = self.unifier.get_fresh_var().0; + self.variable_mapping.insert(id.to_string(), ty); + ty + })) } } diff --git a/nac3core/src/typecheck/type_inferencer/test.rs b/nac3core/src/typecheck/type_inferencer/test.rs index 44775c80..8fbd88f9 100644 --- a/nac3core/src/typecheck/type_inferencer/test.rs +++ b/nac3core/src/typecheck/type_inferencer/test.rs @@ -1,30 +1,23 @@ use super::super::typedef::*; use super::*; -use crate::location::Location; use crate::symbol_resolver::*; use crate::top_level::DefinitionId; +use crate::{location::Location, top_level::TopLevelDef}; use indoc::indoc; use itertools::zip; -use rustpython_parser::ast; +use parking_lot::RwLock; use rustpython_parser::parser::parse_program; use test_case::test_case; struct Resolver { - identifier_mapping: HashMap, + id_to_type: HashMap, + id_to_def: HashMap, class_names: HashMap, } impl SymbolResolver for Resolver { - fn get_symbol_type(&self, str: &str) -> Option { - self.identifier_mapping.get(str).cloned() - } - - fn parse_type_name(&self, ty: &ast::Expr<()>) -> Option { - if let ExprKind::Name { id, .. } = &ty.node { - self.class_names.get(id).cloned() - } else { - unimplemented!() - } + fn get_symbol_type(&self, _: &mut Unifier, _: &PrimitiveStore, str: &str) -> Option { + self.id_to_type.get(str).cloned() } fn get_symbol_value(&self, _: &str) -> Option { @@ -35,12 +28,8 @@ impl SymbolResolver for Resolver { unimplemented!() } - fn get_identifier_def(&self, _: &str) -> DefinitionId { - unimplemented!() - } - - fn get_module_resolver(&self, _: &str) -> Option<&dyn SymbolResolver> { - unimplemented!() + fn get_identifier_def(&self, id: &str) -> DefinitionId { + self.id_to_def.get(id).cloned().unwrap() } } @@ -52,6 +41,7 @@ struct TestEnvironment { pub identifier_mapping: HashMap, pub virtual_checks: Vec<(Type, Type)>, pub calls: HashMap>, + pub top_level: TopLevelContext, } impl TestEnvironment { @@ -101,11 +91,17 @@ impl TestEnvironment { identifier_mapping.insert("None".into(), none); let resolver = Arc::new(Resolver { - identifier_mapping: identifier_mapping.clone(), + id_to_type: identifier_mapping.clone(), + id_to_def: Default::default(), class_names: Default::default(), }) as Arc; TestEnvironment { + top_level: TopLevelContext { + definitions: Default::default(), + unifiers: Default::default(), + conetexts: Default::default(), + }, unifier, function_data: FunctionData { resolver, @@ -123,6 +119,7 @@ impl TestEnvironment { fn new() -> TestEnvironment { let mut unifier = Unifier::new(); let mut identifier_mapping = HashMap::new(); + let mut top_level_defs = Vec::new(); let int32 = unifier.add_ty(TypeEnum::TObj { obj_id: DefinitionId(0), fields: HashMap::new().into(), @@ -149,6 +146,16 @@ impl TestEnvironment { params: HashMap::new(), }); identifier_mapping.insert("None".into(), none); + for i in 0..5 { + top_level_defs.push(RwLock::new(TopLevelDef::Class { + object_id: DefinitionId(i), + type_vars: Default::default(), + fields: Default::default(), + methods: Default::default(), + ancestors: Default::default(), + resolver: None, + })); + } let primitives = PrimitiveStore { int32, int64, float, bool, none }; @@ -159,6 +166,14 @@ impl TestEnvironment { fields: [("a".into(), v0)].iter().cloned().collect::>().into(), params: [(id, v0)].iter().cloned().collect(), }); + top_level_defs.push(RwLock::new(TopLevelDef::Class { + object_id: DefinitionId(5), + type_vars: vec![v0], + fields: [("a".into(), v0)].into(), + methods: Default::default(), + ancestors: Default::default(), + resolver: None, + })); identifier_mapping.insert( "Foo".into(), @@ -183,6 +198,14 @@ impl TestEnvironment { .into(), params: Default::default(), }); + top_level_defs.push(RwLock::new(TopLevelDef::Class { + object_id: DefinitionId(6), + type_vars: Default::default(), + fields: [("a".into(), int32), ("b".into(), fun)].into(), + methods: Default::default(), + ancestors: Default::default(), + resolver: None, + })); identifier_mapping.insert( "Bar".into(), unifier.add_ty(TypeEnum::TFunc(FunSignature { @@ -201,6 +224,14 @@ impl TestEnvironment { .into(), params: Default::default(), }); + top_level_defs.push(RwLock::new(TopLevelDef::Class { + object_id: DefinitionId(7), + type_vars: Default::default(), + fields: [("a".into(), bool), ("b".into(), fun)].into(), + methods: Default::default(), + ancestors: Default::default(), + resolver: None, + })); identifier_mapping.insert( "Bar2".into(), unifier.add_ty(TypeEnum::TFunc(FunSignature { @@ -225,12 +256,28 @@ impl TestEnvironment { .cloned() .collect(); - let resolver = - Arc::new(Resolver { identifier_mapping: identifier_mapping.clone(), class_names }) - as Arc; + let top_level = TopLevelContext { + definitions: Arc::new(RwLock::new(top_level_defs)), + unifiers: Default::default(), + conetexts: Default::default(), + }; + + let resolver = Arc::new(Resolver { + id_to_type: identifier_mapping.clone(), + id_to_def: [ + ("Foo".into(), DefinitionId(5)), + ("Bar".into(), DefinitionId(6)), + ("Bar2".into(), DefinitionId(7)), + ] + .iter() + .cloned() + .collect(), + class_names, + }) as Arc; TestEnvironment { unifier, + top_level, function_data: FunctionData { resolver, bound_variables: Vec::new(), @@ -246,6 +293,7 @@ impl TestEnvironment { fn get_inferencer(&mut self) -> Inferencer { Inferencer { + top_level: &self.top_level, function_data: &mut self.function_data, unifier: &mut self.unifier, variable_mapping: Default::default(), diff --git a/nac3core/src/typecheck/typedef/mod.rs b/nac3core/src/typecheck/typedef/mod.rs index 38e2a9ff..5126d842 100644 --- a/nac3core/src/typecheck/typedef/mod.rs +++ b/nac3core/src/typecheck/typedef/mod.rs @@ -645,7 +645,7 @@ impl Unifier { /// If this returns Some(T), T would be the substituted type. /// If this returns None, the result type would be the original type /// (no substitution has to be done). - fn subst(&mut self, a: Type, mapping: &VarMap) -> Option { + pub fn subst(&mut self, a: Type, mapping: &VarMap) -> Option { use TypeVarMeta::*; let ty = self.unification_table.probe_value(a).clone(); // this function would only be called when we instantiate functions. From d46a4b2d38729ea3a4deb43a8862eab7acb67bdc Mon Sep 17 00:00:00 2001 From: pca006132 Date: Thu, 12 Aug 2021 10:25:32 +0800 Subject: [PATCH 099/131] symbol_resolver: fixed type variable handling --- nac3core/src/symbol_resolver.rs | 63 ++++++++++++------- .../src/typecheck/type_inferencer/test.rs | 4 +- 2 files changed, 42 insertions(+), 25 deletions(-) diff --git a/nac3core/src/symbol_resolver.rs b/nac3core/src/symbol_resolver.rs index aa01c7dc..b6e3b792 100644 --- a/nac3core/src/symbol_resolver.rs +++ b/nac3core/src/symbol_resolver.rs @@ -22,19 +22,22 @@ pub enum SymbolValue { } pub trait SymbolResolver { + // get type of type variable identifier or top-level function type fn get_symbol_type( &self, unifier: &mut Unifier, primitives: &PrimitiveStore, str: &str, ) -> Option; - fn get_identifier_def(&self, str: &str) -> DefinitionId; + // get the top-level definition of identifiers + fn get_identifier_def(&self, str: &str) -> Option; fn get_symbol_value(&self, str: &str) -> Option; fn get_symbol_location(&self, str: &str) -> Option; // handle function call etc. } impl dyn SymbolResolver { + // convert type annotation into type pub fn parse_type_annotation( &self, top_level: &TopLevelContext, @@ -52,29 +55,41 @@ impl dyn SymbolResolver { "None" => Ok(primitives.none), x => { let obj_id = self.get_identifier_def(x); - let defs = top_level.definitions.read(); - let def = defs[obj_id.0].read(); - if let TopLevelDef::Class { fields, methods, type_vars, .. } = &*def { - if !type_vars.is_empty() { - return Err(format!( - "Unexpected number of type parameters: expected {} but got 0", - type_vars.len() - )); + if let Some(obj_id) = obj_id { + let defs = top_level.definitions.read(); + let def = defs[obj_id.0].read(); + if let TopLevelDef::Class { fields, methods, type_vars, .. } = &*def { + if !type_vars.is_empty() { + return Err(format!( + "Unexpected number of type parameters: expected {} but got 0", + type_vars.len() + )); + } + let fields = RefCell::new( + chain( + fields.iter().map(|(k, v)| (k.clone(), *v)), + methods.iter().map(|(k, v, _)| (k.clone(), *v)), + ) + .collect(), + ); + Ok(unifier.add_ty(TypeEnum::TObj { + obj_id, + fields, + params: Default::default(), + })) + } else { + Err("Cannot use function name as type".into()) } - let fields = RefCell::new( - chain( - fields.iter().map(|(k, v)| (k.clone(), *v)), - methods.iter().map(|(k, v, _)| (k.clone(), *v)), - ) - .collect(), - ); - Ok(unifier.add_ty(TypeEnum::TObj { - obj_id, - fields, - params: Default::default(), - })) } else { - Err("Cannot use function name as type".into()) + // it could be a type variable + let ty = self + .get_symbol_type(unifier, primitives, x) + .ok_or_else(|| "Cannot use function name as type".to_owned())?; + if let TypeEnum::TVar { .. } = &*unifier.get_ty(ty) { + Ok(ty) + } else { + Err(format!("Unknown type annotation {}", x)) + } } } }, @@ -95,7 +110,9 @@ impl dyn SymbolResolver { vec![self.parse_type_annotation(top_level, unifier, primitives, slice)?] }; - let obj_id = self.get_identifier_def(id); + let obj_id = self + .get_identifier_def(id) + .ok_or_else(|| format!("Unknown type annotation {}", id))?; let defs = top_level.definitions.read(); let def = defs[obj_id.0].read(); if let TopLevelDef::Class { fields, methods, type_vars, .. } = &*def { diff --git a/nac3core/src/typecheck/type_inferencer/test.rs b/nac3core/src/typecheck/type_inferencer/test.rs index 8fbd88f9..ee65dbe3 100644 --- a/nac3core/src/typecheck/type_inferencer/test.rs +++ b/nac3core/src/typecheck/type_inferencer/test.rs @@ -28,8 +28,8 @@ impl SymbolResolver for Resolver { unimplemented!() } - fn get_identifier_def(&self, id: &str) -> DefinitionId { - self.id_to_def.get(id).cloned().unwrap() + fn get_identifier_def(&self, id: &str) -> Option { + self.id_to_def.get(id).cloned() } } From 17ee8fe6d057bc8ae59d7c5691c98946dbab6c64 Mon Sep 17 00:00:00 2001 From: ychenfo Date: Wed, 11 Aug 2021 17:35:23 +0800 Subject: [PATCH 100/131] starting cleaning up and further add Arc --- nac3core/src/top_level.rs | 65 +++++++++++++++++++++++---------------- 1 file changed, 38 insertions(+), 27 deletions(-) diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index 5497e90d..1e181cc5 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -63,19 +63,27 @@ pub fn name_mangling(mut class_name: String, method_name: &str) -> String { class_name } +// like adding some info on top of the TopLevelDef for later parsing the class bases, method, +// and function sigatures pub struct TopLevelDefInfo { - // like adding some info on top of the TopLevelDef for later parsing the class bases, method, - // and function sigatures - def: TopLevelDef, // the definition entry - ty: Type, // the entry in the top_level unifier - ast: Option>, // the ast submitted by applications, primitives and class methods will have None value here - // resolver: Option<&'a dyn SymbolResolver> // the resolver + // the definition entry + def: TopLevelDef, + // the entry in the top_level unifier + ty: Type, + // the ast submitted by applications, primitives and + // class methods will have None value here + ast: Option>, } pub struct TopLevelComposer { - pub definition_list: Vec, + // list of top level definitions and their info + pub definition_list: RwLock>, + // primitive store pub primitives: PrimitiveStore, + // start as a primitive unifier, will add more top_level defs inside pub unifier: Unifier, + // class method to definition id + pub class_method_to_def_id: HashMap, } impl TopLevelComposer { @@ -111,8 +119,11 @@ impl TopLevelComposer { (primitives, unifier) } - pub fn new() -> Self { + // return a composer and things to make a "primitive" symbol resolver, so that the symbol + // resolver can later figure out primitive type definitions when passed a primitive type name + pub fn new() -> (Vec<(String, DefinitionId, Type)>, Self) { let primitives = Self::make_primitives(); + // the def list including the entries of primitive info let definition_list: Vec = vec![ TopLevelDefInfo { def: Self::make_top_level_class_def(0, None), @@ -139,8 +150,20 @@ impl TopLevelComposer { ast: None, ty: primitives.0.none, }, - ]; // the entries for primitive types - TopLevelComposer { definition_list, primitives: primitives.0, unifier: primitives.1 } + ]; + let composer = TopLevelComposer { + definition_list: definition_list.into(), + primitives: primitives.0, + unifier: primitives.1, + class_method_to_def_id: Default::default(), + }; + (vec![ + ("int32".into(), DefinitionId(0), composer.primitives.int32), + ("int64".into(), DefinitionId(1), composer.primitives.int64), + ("float".into(), DefinitionId(2), composer.primitives.float), + ("bool".into(), DefinitionId(3), composer.primitives.bool), + ("none".into(), DefinitionId(4), composer.primitives.none), + ], composer) } /// already include the definition_id of itself inside the ancestors vector @@ -172,18 +195,6 @@ impl TopLevelComposer { } } - // like to make and return a "primitive" symbol resolver? so that the symbol resolver - // can later figure out primitive type definitions when passed a primitive type name - pub fn get_primitives_definition(&self) -> Vec<(String, DefinitionId, Type)> { - vec![ - ("int32".into(), DefinitionId(0), self.primitives.int32), - ("int64".into(), DefinitionId(1), self.primitives.int64), - ("float".into(), DefinitionId(2), self.primitives.float), - ("bool".into(), DefinitionId(3), self.primitives.bool), - ("none".into(), DefinitionId(4), self.primitives.none), - ] - } - pub fn register_top_level( &mut self, ast: ast::Stmt<()>, @@ -192,7 +203,8 @@ impl TopLevelComposer { match &ast.node { ast::StmtKind::ClassDef { name, body, .. } => { let class_name = name.to_string(); - let class_def_id = self.definition_list.len(); + let def_list = self.definition_list.write(); + let class_def_id = def_list.len(); // add the class to the unifier let ty = self.unifier.add_ty(TypeEnum::TObj { @@ -241,10 +253,10 @@ impl TopLevelComposer { }) // FIXME: should we return this to the symbol resolver?, should be yes } - } else { - } // else do nothing + } } - // add to the definition list + + // add the class to the definition list self.definition_list.push(TopLevelDefInfo { def: Self::make_top_level_class_def(class_def_id, resolver), ast: Some(ast), @@ -439,4 +451,3 @@ impl TopLevelComposer { Ok(()) } } - From 824a5cb01a5b2bcd82fc916814fcc11ed0ca0786 Mon Sep 17 00:00:00 2001 From: ychenfo Date: Thu, 12 Aug 2021 10:50:01 +0800 Subject: [PATCH 101/131] register top level clean up --- nac3core/src/top_level.rs | 124 +++++++++++++++++++------------------- 1 file changed, 61 insertions(+), 63 deletions(-) diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index 1e181cc5..eaac3621 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -119,8 +119,8 @@ impl TopLevelComposer { (primitives, unifier) } - // return a composer and things to make a "primitive" symbol resolver, so that the symbol - // resolver can later figure out primitive type definitions when passed a primitive type name + /// return a composer and things to make a "primitive" symbol resolver, so that the symbol + /// resolver can later figure out primitive type definitions when passed a primitive type name pub fn new() -> (Vec<(String, DefinitionId, Type)>, Self) { let primitives = Self::make_primitives(); // the def list including the entries of primitive info @@ -199,11 +199,11 @@ impl TopLevelComposer { &mut self, ast: ast::Stmt<()>, resolver: Option>>, - ) -> Result, String> { + ) -> Result<(String, DefinitionId, Type), String> { match &ast.node { ast::StmtKind::ClassDef { name, body, .. } => { let class_name = name.to_string(); - let def_list = self.definition_list.write(); + let mut def_list = self.definition_list.write(); let class_def_id = def_list.len(); // add the class to the unifier @@ -212,19 +212,16 @@ impl TopLevelComposer { fields: Default::default(), params: Default::default(), }); - - let mut ret_vector: Vec<(String, DefinitionId, Type)> = - vec![(class_name.clone(), DefinitionId(class_def_id), ty)]; + // parse class def body and register class methods into the def list - // NOTE: module's symbol resolver would not know the name of the class methods, - // thus cannot return their definition_id? so we have to manage it ourselves? - // or do we return the class method list of (method_name, def_id, type) to - // application to be used to build symbol resolver? <- current implementation - // FIXME: better do not return and let symbol resolver to manage the mangled name + // module's symbol resolver would not know the name of the class methods, + // thus cannot return their definition_id? so we have to manage it ourselves + // by using the field `class_method_to_def_id` for b in body { if let ast::StmtKind::FunctionDef { name, .. } = &b.node { let fun_name = name_mangling(class_name.clone(), name); - let def_id = self.definition_list.len(); + let def_id = def_list.len(); + // add to unifier let ty = self.unifier.add_ty(TypeEnum::TFunc( crate::typecheck::typedef::FunSignature { @@ -233,51 +230,61 @@ impl TopLevelComposer { vars: Default::default(), }, )); + // add to the definition list - self.definition_list.push(TopLevelDefInfo { - def: Self::make_top_level_function_def(fun_name.clone(), ty, None), // FIXME: + def_list.push(TopLevelDefInfo { + def: Self::make_top_level_function_def(fun_name.clone(), ty, resolver.clone()), ty, - ast: None, // since it is inside the class def body statments + // since it is inside the class def body statments, the ast is None + ast: None, }); - ret_vector.push((fun_name, DefinitionId(def_id), ty)); + + // class method, do not let the symbol manager manage it, use our own map + // FIXME: maybe not do name magling, use map to map instead + self.class_method_to_def_id.insert( + fun_name, + DefinitionId(def_id) + ); // if it is the contructor, special handling is needed. In the above // handling, we still add __init__ function to the class method if name == "__init__" { - self.definition_list.push(TopLevelDefInfo { + def_list.push(TopLevelDefInfo { def: TopLevelDef::Initializer { class_id: DefinitionId(class_def_id), }, - ty: self.primitives.none, // arbitary picked one - ast: None, // it is inside the class def body statments + // arbitary picked one for the constructor + ty: self.primitives.none, + // it is inside the class def body statments, so None + ast: None, }) - // FIXME: should we return this to the symbol resolver?, should be yes } } } // add the class to the definition list - self.definition_list.push(TopLevelDefInfo { + def_list.push(TopLevelDefInfo { def: Self::make_top_level_class_def(class_def_id, resolver), ast: Some(ast), ty, }); - Ok(ret_vector) - } + Ok((class_name, DefinitionId(class_def_id), ty)) + }, ast::StmtKind::FunctionDef { name, .. } => { let fun_name = name.to_string(); - let def_id = self.definition_list.len(); + // add to the unifier - let ty = - self.unifier.add_ty(TypeEnum::TFunc(crate::typecheck::typedef::FunSignature { - args: Default::default(), - ret: self.primitives.none, - vars: Default::default(), - })); + let ty = self.unifier.add_ty(TypeEnum::TFunc(crate::typecheck::typedef::FunSignature { + args: Default::default(), + ret: self.primitives.none, + vars: Default::default(), + })); + // add to the definition list - self.definition_list.push(TopLevelDefInfo { + let mut def_list = self.definition_list.write(); + def_list.push(TopLevelDefInfo { def: Self::make_top_level_function_def( name.into(), self.primitives.none, @@ -287,7 +294,7 @@ impl TopLevelComposer { ty, }); - Ok(vec![(fun_name, DefinitionId(def_id), ty)]) + Ok((fun_name, DefinitionId(def_list.len() - 1), ty)) } _ => Err("only registrations of top level classes/functions are supprted".into()), @@ -296,7 +303,8 @@ impl TopLevelComposer { /// this should be called after all top level classes are registered, and will actually fill in those fields of the previous dummy one pub fn analyze_top_level(&mut self) -> Result<(), String> { - for mut d in &mut self.definition_list { + for d in self.definition_list.write().iter_mut() { + // only analyze those with ast, and class_method(ast in class def) if let Some(ast) = &d.ast { match &ast.node { ast::StmtKind::ClassDef { @@ -310,23 +318,25 @@ impl TopLevelComposer { fields, methods, type_vars, - // resolver, + resolver, ) = if let TopLevelDef::Class { object_id, ancestors, fields, methods, type_vars, - resolver + resolver: Some(resolver) } = &mut d.def { - (object_id, ancestors, fields, methods, type_vars) // FIXME: this unwrap is not safe + (object_id, ancestors, fields, methods, type_vars, resolver.lock()) } else { unreachable!() }; // try to get mutable reference of the entry in the unification table, get the `TypeEnum` let (params, fields ) = if let TypeEnum::TObj { - params, // FIXME: this params is immutable, even if this is mutable, what should the key be, get the original typevar's var_id? + // FIXME: this params is immutable, even if this is mutable, what + // should the key be, get the original typevar's var_id? + params, fields, .. } = self.unifier.get_ty(d.ty).borrow() { @@ -337,7 +347,9 @@ impl TopLevelComposer { // into the `bases` ast node for b in bases { match &b.node { - // typevars bounded to the class, things like `class A(Generic[T, V, ImportedModule.T])` + // typevars bounded to the class, only support things like `class A(Generic[T, V])`, + // things like `class A(Generic[T, V, ImportedModule.T])` is not supported + // i.e. only simple names are allowed in the subscript // should update the TopLevelDef::Class.typevars and the TypeEnum::TObj.params ast::ExprKind::Subscript {value, slice, ..} if { if let ast::ExprKind::Name {id, ..} = &value.node { @@ -345,22 +357,10 @@ impl TopLevelComposer { } else { false } } => { match &slice.node { - // `class Foo(Generic[T, V, P, ImportedModule.T]):` + // `class Foo(Generic[T, V, P]):` ast::ExprKind::Tuple {elts, ..} => { for e in elts { - // TODO: I'd better parse the node to get the Type of the type vars(can have things like: A.B.C.typevar?) - match &e.node { - ast::ExprKind::Name {id, ..} => { - // the def_list - // type_vars.push(resolver.get_symbol_type(id).ok_or_else(|| "unknown type variable".to_string())?); FIXME: - - // the TypeEnum of the class - // FIXME: the `params` destructed above is not mutable, even if this is mutable, what should the key be? - unimplemented!() - }, - - _ => unimplemented!() - } + // resolver.parse_type_annotation(self.definition_list.) // FIXME: } }, @@ -374,17 +374,11 @@ impl TopLevelComposer { unimplemented!() }, - // `class Foo(Generic[ImportedModule.T])` - ast::ExprKind::Attribute {value, attr, ..} => { - // TODO: - unimplemented!() - }, - - _ => return Err("not supported".into()) // NOTE: it is really all the supported cases? + _ => return Err("not supported, only simple names are allowed in the subscript".into()) }; }, - // base class, name directly available inside the + /* // base class, name directly available inside the // module, can use this module's symbol resolver ast::ExprKind::Name {id, ..} => { // let def_id = resolver.get_identifier_def(id); FIXME: @@ -407,7 +401,9 @@ impl TopLevelComposer { // `class Foo(ImportedModule.A[int, bool])`, A is a class with associated type variables ast::ExprKind::Subscript {value, slice, ..} => { unimplemented!() - }, + }, */ + + // base class is possible in other cases, we parse for thr base class _ => return Err("not supported".into()) } } @@ -444,7 +440,9 @@ impl TopLevelComposer { unimplemented!() } - _ => return Err("only expect function and class definitions to be submitted here to be analyzed".into()) + node => { + return Err("only expect function and class definitions to be submitted here to be analyzed".into()) + } } } } From 3a93e2b048e77045b97a91299702d8aeac48e1da Mon Sep 17 00:00:00 2001 From: ychenfo Date: Thu, 12 Aug 2021 13:17:51 +0800 Subject: [PATCH 102/131] TypeEnum::TObj.param is now RefCell for interior mutability --- nac3core/src/codegen/expr.rs | 2 +- nac3core/src/symbol_resolver.rs | 3 +- nac3core/src/top_level.rs | 67 +++++++++---------- .../src/typecheck/type_inferencer/test.rs | 22 +++--- nac3core/src/typecheck/typedef/mod.rs | 13 ++-- nac3core/src/typecheck/typedef/test.rs | 13 ++-- 6 files changed, 61 insertions(+), 59 deletions(-) diff --git a/nac3core/src/codegen/expr.rs b/nac3core/src/codegen/expr.rs index 2f4c74db..d21d3ba7 100644 --- a/nac3core/src/codegen/expr.rs +++ b/nac3core/src/codegen/expr.rs @@ -19,7 +19,7 @@ impl<'ctx> CodeGenContext<'ctx> { let mut vars = obj .map(|ty| { if let TypeEnum::TObj { params, .. } = &*self.unifier.get_ty(ty) { - params.clone() + params.borrow().clone() } else { unreachable!() } diff --git a/nac3core/src/symbol_resolver.rs b/nac3core/src/symbol_resolver.rs index b6e3b792..ff048927 100644 --- a/nac3core/src/symbol_resolver.rs +++ b/nac3core/src/symbol_resolver.rs @@ -143,8 +143,7 @@ impl dyn SymbolResolver { let ty = unifier.subst(*ty, &subst).unwrap_or(*ty); (attr.clone(), ty) })); - let fields = RefCell::new(fields); - Ok(unifier.add_ty(TypeEnum::TObj { obj_id, fields, params: subst })) + Ok(unifier.add_ty(TypeEnum::TObj { obj_id, fields: fields.into(), params: subst.into() })) } else { Err("Cannot use function name as type".into()) } diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index eaac3621..98cbacb9 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -57,14 +57,8 @@ pub struct TopLevelContext { pub conetexts: Arc>>>, } -pub fn name_mangling(mut class_name: String, method_name: &str) -> String { - // need to further extend to more name mangling like instantiations of typevar - class_name.push_str(method_name); - class_name -} - -// like adding some info on top of the TopLevelDef for later parsing the class bases, method, -// and function sigatures +// like adding some info on top of the TopLevelDef for +// later parsing the class bases, method, and function sigatures pub struct TopLevelDefInfo { // the definition entry def: TopLevelDef, @@ -82,37 +76,42 @@ pub struct TopLevelComposer { pub primitives: PrimitiveStore, // start as a primitive unifier, will add more top_level defs inside pub unifier: Unifier, - // class method to definition id + // mangled class method name to def_id pub class_method_to_def_id: HashMap, } impl TopLevelComposer { + fn name_mangling(mut class_name: String, method_name: &str) -> String { + class_name.push_str(method_name); + class_name + } + pub fn make_primitives() -> (PrimitiveStore, Unifier) { let mut unifier = Unifier::new(); let int32 = unifier.add_ty(TypeEnum::TObj { obj_id: DefinitionId(0), fields: HashMap::new().into(), - params: HashMap::new(), + params: HashMap::new().into(), }); let int64 = unifier.add_ty(TypeEnum::TObj { obj_id: DefinitionId(1), fields: HashMap::new().into(), - params: HashMap::new(), + params: HashMap::new().into(), }); let float = unifier.add_ty(TypeEnum::TObj { obj_id: DefinitionId(2), fields: HashMap::new().into(), - params: HashMap::new(), + params: HashMap::new().into(), }); let bool = unifier.add_ty(TypeEnum::TObj { obj_id: DefinitionId(3), fields: HashMap::new().into(), - params: HashMap::new(), + params: HashMap::new().into(), }); let none = unifier.add_ty(TypeEnum::TObj { obj_id: DefinitionId(4), fields: HashMap::new().into(), - params: HashMap::new(), + params: HashMap::new().into(), }); let primitives = PrimitiveStore { int32, int64, float, bool, none }; crate::typecheck::magic_methods::set_primitives_magic_methods(&primitives, &mut unifier); @@ -212,6 +211,14 @@ impl TopLevelComposer { fields: Default::default(), params: Default::default(), }); + + // add the class to the definition list + def_list.push(TopLevelDefInfo { + def: Self::make_top_level_class_def(class_def_id, resolver.clone()), + // NOTE: Temporarily none here since function body need to be read later + ast: None, + ty, + }); // parse class def body and register class methods into the def list // module's symbol resolver would not know the name of the class methods, @@ -219,7 +226,7 @@ impl TopLevelComposer { // by using the field `class_method_to_def_id` for b in body { if let ast::StmtKind::FunctionDef { name, .. } = &b.node { - let fun_name = name_mangling(class_name.clone(), name); + let fun_name = Self::name_mangling(class_name.clone(), name); let def_id = def_list.len(); // add to unifier @@ -240,19 +247,14 @@ impl TopLevelComposer { }); // class method, do not let the symbol manager manage it, use our own map - // FIXME: maybe not do name magling, use map to map instead - self.class_method_to_def_id.insert( - fun_name, - DefinitionId(def_id) - ); + self.class_method_to_def_id.insert(fun_name, DefinitionId(def_id)); // if it is the contructor, special handling is needed. In the above // handling, we still add __init__ function to the class method if name == "__init__" { + // FIXME: how can this later be fetched? def_list.push(TopLevelDefInfo { - def: TopLevelDef::Initializer { - class_id: DefinitionId(class_def_id), - }, + def: TopLevelDef::Initializer { class_id: DefinitionId(class_def_id) }, // arbitary picked one for the constructor ty: self.primitives.none, // it is inside the class def body statments, so None @@ -262,13 +264,10 @@ impl TopLevelComposer { } } - // add the class to the definition list - def_list.push(TopLevelDefInfo { - def: Self::make_top_level_class_def(class_def_id, resolver), - ast: Some(ast), - ty, - }); - + // move the ast to the entry of the class in the def_list + def_list.get_mut(class_def_id).unwrap().ast = Some(ast); + + // return Ok((class_name, DefinitionId(class_def_id), ty)) }, @@ -313,28 +312,28 @@ impl TopLevelComposer { .. } => { // get the mutable reference of the entry in the definition list, get the `TopLevelDef` - let (_, + let ( ancestors, fields, methods, type_vars, resolver, ) = if let TopLevelDef::Class { - object_id, + object_id: _, ancestors, fields, methods, type_vars, resolver: Some(resolver) } = &mut d.def { - (object_id, ancestors, fields, methods, type_vars, resolver.lock()) + (ancestors, fields, methods, type_vars, resolver.lock()) } else { unreachable!() }; // try to get mutable reference of the entry in the unification table, get the `TypeEnum` let (params, fields ) = if let TypeEnum::TObj { - // FIXME: this params is immutable, even if this is mutable, what + // FIXME: this params is immutable, and what // should the key be, get the original typevar's var_id? params, fields, diff --git a/nac3core/src/typecheck/type_inferencer/test.rs b/nac3core/src/typecheck/type_inferencer/test.rs index ee65dbe3..21012952 100644 --- a/nac3core/src/typecheck/type_inferencer/test.rs +++ b/nac3core/src/typecheck/type_inferencer/test.rs @@ -51,27 +51,27 @@ impl TestEnvironment { let int32 = unifier.add_ty(TypeEnum::TObj { obj_id: DefinitionId(0), fields: HashMap::new().into(), - params: HashMap::new(), + params: HashMap::new().into(), }); let int64 = unifier.add_ty(TypeEnum::TObj { obj_id: DefinitionId(1), fields: HashMap::new().into(), - params: HashMap::new(), + params: HashMap::new().into(), }); let float = unifier.add_ty(TypeEnum::TObj { obj_id: DefinitionId(2), fields: HashMap::new().into(), - params: HashMap::new(), + params: HashMap::new().into(), }); let bool = unifier.add_ty(TypeEnum::TObj { obj_id: DefinitionId(3), fields: HashMap::new().into(), - params: HashMap::new(), + params: HashMap::new().into(), }); let none = unifier.add_ty(TypeEnum::TObj { obj_id: DefinitionId(4), fields: HashMap::new().into(), - params: HashMap::new(), + params: HashMap::new().into(), }); let primitives = PrimitiveStore { int32, int64, float, bool, none }; set_primitives_magic_methods(&primitives, &mut unifier); @@ -123,27 +123,27 @@ impl TestEnvironment { let int32 = unifier.add_ty(TypeEnum::TObj { obj_id: DefinitionId(0), fields: HashMap::new().into(), - params: HashMap::new(), + params: HashMap::new().into(), }); let int64 = unifier.add_ty(TypeEnum::TObj { obj_id: DefinitionId(1), fields: HashMap::new().into(), - params: HashMap::new(), + params: HashMap::new().into(), }); let float = unifier.add_ty(TypeEnum::TObj { obj_id: DefinitionId(2), fields: HashMap::new().into(), - params: HashMap::new(), + params: HashMap::new().into(), }); let bool = unifier.add_ty(TypeEnum::TObj { obj_id: DefinitionId(3), fields: HashMap::new().into(), - params: HashMap::new(), + params: HashMap::new().into(), }); let none = unifier.add_ty(TypeEnum::TObj { obj_id: DefinitionId(4), fields: HashMap::new().into(), - params: HashMap::new(), + params: HashMap::new().into(), }); identifier_mapping.insert("None".into(), none); for i in 0..5 { @@ -164,7 +164,7 @@ impl TestEnvironment { let foo_ty = unifier.add_ty(TypeEnum::TObj { obj_id: DefinitionId(5), fields: [("a".into(), v0)].iter().cloned().collect::>().into(), - params: [(id, v0)].iter().cloned().collect(), + params: [(id, v0)].iter().cloned().collect::>().into(), }); top_level_defs.push(RwLock::new(TopLevelDef::Class { object_id: DefinitionId(5), diff --git a/nac3core/src/typecheck/typedef/mod.rs b/nac3core/src/typecheck/typedef/mod.rs index 5126d842..979213ad 100644 --- a/nac3core/src/typecheck/typedef/mod.rs +++ b/nac3core/src/typecheck/typedef/mod.rs @@ -68,7 +68,7 @@ pub enum TypeEnum { TObj { obj_id: DefinitionId, fields: RefCell>, - params: VarMap, + params: RefCell, }, TVirtual { ty: Type, @@ -216,6 +216,7 @@ impl Unifier { } } TypeEnum::TObj { params, .. } => { + let params = params.borrow(); let (keys, params): (Vec<&u32>, Vec<&Type>) = params.iter().unzip(); let params = params .into_iter() @@ -253,7 +254,7 @@ impl Unifier { TList { ty } => self.is_concrete(*ty, allowed_typevars), TTuple { ty } => ty.iter().all(|ty| self.is_concrete(*ty, allowed_typevars)), TObj { params: vars, .. } => { - vars.values().all(|ty| self.is_concrete(*ty, allowed_typevars)) + vars.borrow().values().all(|ty| self.is_concrete(*ty, allowed_typevars)) } // functions are instantiated for each call sites, so the function type can contain // type variables. @@ -437,7 +438,7 @@ impl Unifier { if id1 != id2 { return Err(format!("Cannot unify objects with ID {} and {}", id1.0, id2.0)); } - for (x, y) in zip(params1.values(), params2.values()) { + for (x, y) in zip(params1.borrow().values(), params2.borrow().values()) { self.unify(*x, *y)?; } self.set_a_to_b(a, b); @@ -573,6 +574,7 @@ impl Unifier { } TypeEnum::TObj { obj_id, params, .. } => { let name = obj_to_name(obj_id.0); + let params = params.borrow(); if !params.is_empty() { let mut params = params.values().map(|v| self.stringify(*v, obj_to_name, var_to_name)); @@ -679,6 +681,7 @@ impl Unifier { // If the mapping does not contain any type variables in the // parameter list, we don't need to substitute the fields. // This is also used to prevent infinite substitution... + let params = params.borrow(); let need_subst = params.values().any(|v| { let ty = self.unification_table.probe_value(*v); if let TypeEnum::TVar { id, .. } = ty.as_ref() { @@ -693,7 +696,7 @@ impl Unifier { let fields = self .subst_map(&fields.borrow(), mapping) .unwrap_or_else(|| fields.borrow().clone()); - Some(self.add_ty(TypeEnum::TObj { obj_id, params, fields: fields.into() })) + Some(self.add_ty(TypeEnum::TObj { obj_id, params: params.into(), fields: fields.into() })) } else { None } @@ -776,7 +779,7 @@ impl Unifier { self.occur_check(a, *ty)?; } TypeEnum::TObj { params: map, .. } => { - for t in map.values() { + for t in map.borrow().values() { self.occur_check(a, *t)?; } } diff --git a/nac3core/src/typecheck/typedef/test.rs b/nac3core/src/typecheck/typedef/test.rs index 0e4a32f8..d782c14f 100644 --- a/nac3core/src/typecheck/typedef/test.rs +++ b/nac3core/src/typecheck/typedef/test.rs @@ -43,7 +43,7 @@ impl Unifier { ( TypeEnum::TObj { obj_id: id1, params: params1, .. }, TypeEnum::TObj { obj_id: id2, params: params2, .. }, - ) => id1 == id2 && self.map_eq(params1, params2), + ) => id1 == id2 && self.map_eq(¶ms1.borrow(), ¶ms2.borrow()), // TCall and TFunc are not yet implemented _ => false, } @@ -80,7 +80,7 @@ impl TestEnvironment { unifier.add_ty(TypeEnum::TObj { obj_id: DefinitionId(0), fields: HashMap::new().into(), - params: HashMap::new(), + params: HashMap::new().into(), }), ); type_mapping.insert( @@ -88,7 +88,7 @@ impl TestEnvironment { unifier.add_ty(TypeEnum::TObj { obj_id: DefinitionId(1), fields: HashMap::new().into(), - params: HashMap::new(), + params: HashMap::new().into(), }), ); type_mapping.insert( @@ -96,7 +96,7 @@ impl TestEnvironment { unifier.add_ty(TypeEnum::TObj { obj_id: DefinitionId(2), fields: HashMap::new().into(), - params: HashMap::new(), + params: HashMap::new().into(), }), ); let (v0, id) = unifier.get_fresh_var(); @@ -105,7 +105,7 @@ impl TestEnvironment { unifier.add_ty(TypeEnum::TObj { obj_id: DefinitionId(3), fields: [("a".into(), v0)].iter().cloned().collect::>().into(), - params: [(id, v0)].iter().cloned().collect(), + params: [(id, v0)].iter().cloned().collect::>().into(), }), ); @@ -164,6 +164,7 @@ impl TestEnvironment { let mut ty = *self.type_mapping.get(x).unwrap(); let te = self.unifier.get_ty(ty); if let TypeEnum::TObj { params, .. } = &*te.as_ref() { + let params = params.borrow(); if !params.is_empty() { assert!(&s[0..1] == "["); let mut p = Vec::new(); @@ -340,7 +341,7 @@ fn test_virtual() { .cloned() .collect::>() .into(), - params: HashMap::new(), + params: HashMap::new().into(), }); let v0 = env.unifier.get_fresh_var().0; let v1 = env.unifier.get_fresh_var().0; From 77943a81178c18889ea7a9be88cb0913e1f505bf Mon Sep 17 00:00:00 2001 From: pca006132 Date: Thu, 12 Aug 2021 13:55:15 +0800 Subject: [PATCH 103/131] added primitive codegen test --- nac3core/src/codegen/expr.rs | 2 +- nac3core/src/codegen/mod.rs | 41 +++--- nac3core/src/codegen/stmt.rs | 2 +- nac3core/src/codegen/test.rs | 246 +++++++++++++++++++++++++++++++++++ 4 files changed, 267 insertions(+), 24 deletions(-) create mode 100644 nac3core/src/codegen/test.rs diff --git a/nac3core/src/codegen/expr.rs b/nac3core/src/codegen/expr.rs index d21d3ba7..59bb0430 100644 --- a/nac3core/src/codegen/expr.rs +++ b/nac3core/src/codegen/expr.rs @@ -14,7 +14,7 @@ use inkwell::{ use itertools::{chain, izip, zip, Itertools}; use rustpython_parser::ast::{self, Boolop, Constant, Expr, ExprKind, Operator}; -impl<'ctx> CodeGenContext<'ctx> { +impl<'ctx, 'a> CodeGenContext<'ctx, 'a> { fn get_subst_key(&mut self, obj: Option, fun: &FunSignature) -> String { let mut vars = obj .map(|ty| { diff --git a/nac3core/src/codegen/mod.rs b/nac3core/src/codegen/mod.rs index c3fba2db..9ca1cb58 100644 --- a/nac3core/src/codegen/mod.rs +++ b/nac3core/src/codegen/mod.rs @@ -16,21 +16,23 @@ use inkwell::{ AddressSpace, }; use itertools::Itertools; -use rayon::current_thread_index; -use rustpython_parser::ast::{Stmt, StmtKind}; +use rustpython_parser::ast::Stmt; use std::collections::HashMap; use std::sync::Arc; mod expr; mod stmt; -pub struct CodeGenContext<'ctx> { +#[cfg(test)] +mod test; + +pub struct CodeGenContext<'ctx, 'a> { pub ctx: &'ctx Context, pub builder: Builder<'ctx>, pub module: Module<'ctx>, - pub top_level: &'ctx TopLevelContext, + pub top_level: &'a TopLevelContext, pub unifier: Unifier, - pub resolver: Box, + pub resolver: Arc, pub var_assignment: HashMap>, pub type_cache: HashMap>, pub primitives: PrimitiveStore, @@ -45,9 +47,9 @@ pub struct CodeGenTask { pub subst: Vec<(Type, Type)>, pub symbol_name: String, pub signature: FunSignature, - pub body: Stmt>, + pub body: Vec>>, pub unifier_index: usize, - pub resolver: Box, + pub resolver: Arc, } fn get_llvm_type<'ctx>( @@ -60,7 +62,7 @@ fn get_llvm_type<'ctx>( use TypeEnum::*; // we assume the type cache should already contain primitive types, // and they should be passed by value instead of passing as pointer. - type_cache.get(&ty).cloned().unwrap_or_else(|| match &*unifier.get_ty(ty) { + type_cache.get(&unifier.get_representative(ty)).cloned().unwrap_or_else(|| match &*unifier.get_ty(ty) { TObj { obj_id, fields, .. } => { // a struct with fields in the order of declaration let defs = top_level.definitions.read(); @@ -97,16 +99,13 @@ fn get_llvm_type<'ctx>( }) } -pub fn gen_func(task: CodeGenTask, top_level_ctx: Arc) { +pub fn gen_func<'ctx>(context: &'ctx Context, builder: Builder<'ctx>, module: Module<'ctx>, task: CodeGenTask, top_level_ctx: Arc) -> Module<'ctx> { // unwrap_or(0) is for unit tests without using rayon - let thread_id = current_thread_index().unwrap_or(0); let (mut unifier, primitives) = { let unifiers = top_level_ctx.unifiers.read(); let (unifier, primitives) = &unifiers[task.unifier_index]; (Unifier::from_shared_unifier(unifier), *primitives) }; - let contexts = top_level_ctx.conetexts.read(); - let context = contexts[thread_id].lock(); for (a, b) in task.subst.iter() { // this should be unification between variables and concrete types @@ -124,10 +123,10 @@ pub fn gen_func(task: CodeGenTask, top_level_ctx: Arc) { }; let mut type_cache: HashMap<_, _> = [ - (primitives.int32, context.i32_type().into()), - (primitives.int64, context.i64_type().into()), - (primitives.float, context.f64_type().into()), - (primitives.bool, context.bool_type().into()), + (unifier.get_representative(primitives.int32), context.i32_type().into()), + (unifier.get_representative(primitives.int64), context.i64_type().into()), + (unifier.get_representative(primitives.float), context.f64_type().into()), + (unifier.get_representative(primitives.bool), context.bool_type().into()), ] .iter() .cloned() @@ -155,8 +154,6 @@ pub fn gen_func(task: CodeGenTask, top_level_ctx: Arc) { .fn_type(¶ms, false) }; - let builder = context.create_builder(); - let module = context.create_module(&task.symbol_name); let fn_val = module.add_function(&task.symbol_name, fn_type, None); let init_bb = context.append_basic_block(fn_val, "init"); builder.position_at_end(init_bb); @@ -189,9 +186,9 @@ pub fn gen_func(task: CodeGenTask, top_level_ctx: Arc) { unifier, }; - if let StmtKind::FunctionDef { body, .. } = &task.body.node { - for stmt in body.iter() { - code_gen_context.gen_stmt(stmt); - } + for stmt in task.body.iter() { + code_gen_context.gen_stmt(stmt); } + + code_gen_context.module } diff --git a/nac3core/src/codegen/stmt.rs b/nac3core/src/codegen/stmt.rs index fa9727f8..9f468609 100644 --- a/nac3core/src/codegen/stmt.rs +++ b/nac3core/src/codegen/stmt.rs @@ -3,7 +3,7 @@ use crate::typecheck::typedef::Type; use inkwell::values::{BasicValue, BasicValueEnum, PointerValue}; use rustpython_parser::ast::{Expr, ExprKind, Stmt, StmtKind}; -impl<'ctx> CodeGenContext<'ctx> { +impl<'ctx, 'a> CodeGenContext<'ctx, 'a> { fn gen_var(&mut self, ty: Type) -> PointerValue<'ctx> { // put the alloca in init block let current = self.builder.get_insert_block().unwrap(); diff --git a/nac3core/src/codegen/test.rs b/nac3core/src/codegen/test.rs new file mode 100644 index 00000000..c9401f09 --- /dev/null +++ b/nac3core/src/codegen/test.rs @@ -0,0 +1,246 @@ +use super::{gen_func, CodeGenTask}; +use crate::{ + location::Location, + symbol_resolver::{SymbolResolver, SymbolValue}, + top_level::{DefinitionId, TopLevelContext}, + typecheck::{ + magic_methods::set_primitives_magic_methods, + type_inferencer::{CodeLocation, FunctionData, Inferencer, PrimitiveStore}, + typedef::{Call, FunSignature, FuncArg, Type, TypeEnum, Unifier}, + }, +}; +use indoc::indoc; +use inkwell::context::Context; +use parking_lot::RwLock; +use rustpython_parser::{ast::fold::Fold, parser::parse_program}; +use std::collections::HashMap; +use std::sync::Arc; + +#[derive(Clone)] +struct Resolver { + id_to_type: HashMap, + id_to_def: HashMap, + class_names: HashMap, +} + +impl SymbolResolver for Resolver { + fn get_symbol_type(&self, _: &mut Unifier, _: &PrimitiveStore, str: &str) -> Option { + self.id_to_type.get(str).cloned() + } + + fn get_symbol_value(&self, _: &str) -> Option { + unimplemented!() + } + + fn get_symbol_location(&self, _: &str) -> Option { + unimplemented!() + } + + fn get_identifier_def(&self, id: &str) -> Option { + self.id_to_def.get(id).cloned() + } +} + +struct TestEnvironment { + pub unifier: Unifier, + pub function_data: FunctionData, + pub primitives: PrimitiveStore, + pub id_to_name: HashMap, + pub identifier_mapping: HashMap, + pub virtual_checks: Vec<(Type, Type)>, + pub calls: HashMap>, + pub top_level: TopLevelContext, +} + +impl TestEnvironment { + pub fn basic_test_env() -> TestEnvironment { + let mut unifier = Unifier::new(); + + let int32 = unifier.add_ty(TypeEnum::TObj { + obj_id: DefinitionId(0), + fields: HashMap::new().into(), + params: HashMap::new(), + }); + let int64 = unifier.add_ty(TypeEnum::TObj { + obj_id: DefinitionId(1), + fields: HashMap::new().into(), + params: HashMap::new(), + }); + let float = unifier.add_ty(TypeEnum::TObj { + obj_id: DefinitionId(2), + fields: HashMap::new().into(), + params: HashMap::new(), + }); + let bool = unifier.add_ty(TypeEnum::TObj { + obj_id: DefinitionId(3), + fields: HashMap::new().into(), + params: HashMap::new(), + }); + let none = unifier.add_ty(TypeEnum::TObj { + obj_id: DefinitionId(4), + fields: HashMap::new().into(), + params: HashMap::new(), + }); + let primitives = PrimitiveStore { int32, int64, float, bool, none }; + set_primitives_magic_methods(&primitives, &mut unifier); + + let id_to_name = [ + (0, "int32".to_string()), + (1, "int64".to_string()), + (2, "float".to_string()), + (3, "bool".to_string()), + (4, "none".to_string()), + ] + .iter() + .cloned() + .collect(); + + let mut identifier_mapping = HashMap::new(); + identifier_mapping.insert("None".into(), none); + + let resolver = Arc::new(Resolver { + id_to_type: identifier_mapping.clone(), + id_to_def: Default::default(), + class_names: Default::default(), + }) as Arc; + + TestEnvironment { + unifier, + top_level: TopLevelContext { + definitions: Default::default(), + unifiers: Default::default(), + conetexts: Default::default(), + }, + function_data: FunctionData { + resolver, + bound_variables: Vec::new(), + return_type: Some(primitives.int32), + }, + primitives, + id_to_name, + identifier_mapping, + virtual_checks: Vec::new(), + calls: HashMap::new(), + } + } + + fn get_inferencer(&mut self) -> Inferencer { + Inferencer { + top_level: &self.top_level, + function_data: &mut self.function_data, + unifier: &mut self.unifier, + variable_mapping: Default::default(), + primitives: &mut self.primitives, + virtual_checks: &mut self.virtual_checks, + calls: &mut self.calls, + } + } +} + +#[test] +fn test_primitives() { + let mut env = TestEnvironment::basic_test_env(); + let context = Context::create(); + let module = context.create_module("test"); + let builder = context.create_builder(); + + let signature = FunSignature { + args: vec![ + FuncArg { name: "a".to_string(), ty: env.primitives.int32, default_value: None }, + FuncArg { name: "b".to_string(), ty: env.primitives.int32, default_value: None }, + ], + ret: env.primitives.int32, + vars: HashMap::new(), + }; + + let mut inferencer = env.get_inferencer(); + let source = indoc! { " + c = a + b + d = a if c == 1 else 0 + return d + "}; + let statements = parse_program(source).unwrap(); + + let statements = statements + .into_iter() + .map(|v| inferencer.fold_stmt(v)) + .collect::, _>>() + .unwrap(); + + let top_level = Arc::new(TopLevelContext { + definitions: Default::default(), + unifiers: Arc::new(RwLock::new(vec![(env.unifier.get_shared_unifier(), env.primitives)])), + conetexts: Default::default(), + }); + + let task = CodeGenTask { + subst: Default::default(), + symbol_name: "testing".to_string(), + body: statements, + unifier_index: 0, + resolver: env.function_data.resolver.clone(), + signature, + }; + + let module = gen_func(&context, builder, module, task, top_level); + // the following IR is equivalent to + // ``` + // ; ModuleID = 'test.ll' + // source_filename = "test" + // + // ; Function Attrs: norecurse nounwind readnone + // define i32 @testing(i32 %0, i32 %1) local_unnamed_addr #0 { + // init: + // %add = add i32 %1, %0 + // %cmp = icmp eq i32 %add, 1 + // %ifexpr = select i1 %cmp, i32 %0, i32 0 + // ret i32 %ifexpr + // } + // + // attributes #0 = { norecurse nounwind readnone } + // ``` + // after O2 optimization + + let expected = indoc! {" + ; ModuleID = 'test' + source_filename = \"test\" + + define i32 @testing(i32 %0, i32 %1) { + init: + %a = alloca i32 + store i32 %0, i32* %a + %b = alloca i32 + store i32 %1, i32* %b + %tmp = alloca i32 + %tmp4 = alloca i32 + br label %body + + body: ; preds = %init + %load = load i32, i32* %a + %load1 = load i32, i32* %b + %add = add i32 %load, %load1 + store i32 %add, i32* %tmp + %load2 = load i32, i32* %tmp + %cmp = icmp eq i32 %load2, 1 + br i1 %cmp, label %then, label %else + + then: ; preds = %body + %load3 = load i32, i32* %a + br label %cont + + else: ; preds = %body + br label %cont + + cont: ; preds = %else, %then + %ifexpr = phi i32 [ %load3, %then ], [ 0, %else ] + store i32 %ifexpr, i32* %tmp4 + %load5 = load i32, i32* %tmp4 + ret i32 %load5 + } + "} + .trim(); + let ir = module.print_to_string().to_string(); + println!("src:\n{}", source); + println!("IR:\n{}", ir); + assert_eq!(expected, ir.trim()); +} From 1f6c16e08b17967e5d94e37ef0ac41462d7c12c7 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Thu, 12 Aug 2021 13:56:51 +0800 Subject: [PATCH 104/131] fixed compilation failure --- nac3core/src/codegen/test.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/nac3core/src/codegen/test.rs b/nac3core/src/codegen/test.rs index c9401f09..bcd58704 100644 --- a/nac3core/src/codegen/test.rs +++ b/nac3core/src/codegen/test.rs @@ -59,27 +59,27 @@ impl TestEnvironment { let int32 = unifier.add_ty(TypeEnum::TObj { obj_id: DefinitionId(0), fields: HashMap::new().into(), - params: HashMap::new(), + params: HashMap::new().into(), }); let int64 = unifier.add_ty(TypeEnum::TObj { obj_id: DefinitionId(1), fields: HashMap::new().into(), - params: HashMap::new(), + params: HashMap::new().into(), }); let float = unifier.add_ty(TypeEnum::TObj { obj_id: DefinitionId(2), fields: HashMap::new().into(), - params: HashMap::new(), + params: HashMap::new().into(), }); let bool = unifier.add_ty(TypeEnum::TObj { obj_id: DefinitionId(3), fields: HashMap::new().into(), - params: HashMap::new(), + params: HashMap::new().into(), }); let none = unifier.add_ty(TypeEnum::TObj { obj_id: DefinitionId(4), fields: HashMap::new().into(), - params: HashMap::new(), + params: HashMap::new().into(), }); let primitives = PrimitiveStore { int32, int64, float, bool, none }; set_primitives_magic_methods(&primitives, &mut unifier); From 8c7ccb626b592101f48e5343f281dea99957eeb1 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Thu, 12 Aug 2021 14:44:50 +0800 Subject: [PATCH 105/131] fixed symbol_resolver blanket implementation --- nac3core/src/symbol_resolver.rs | 243 +++++++++++++++++--------------- 1 file changed, 133 insertions(+), 110 deletions(-) diff --git a/nac3core/src/symbol_resolver.rs b/nac3core/src/symbol_resolver.rs index ff048927..3063eaab 100644 --- a/nac3core/src/symbol_resolver.rs +++ b/nac3core/src/symbol_resolver.rs @@ -36,8 +36,127 @@ pub trait SymbolResolver { // handle function call etc. } +// convert type annotation into type +pub fn parse_type_annotation( + resolver: &dyn SymbolResolver, + top_level: &TopLevelContext, + unifier: &mut Unifier, + primitives: &PrimitiveStore, + expr: &Expr, +) -> Result { + use rustpython_parser::ast::ExprKind::*; + match &expr.node { + Name { id, .. } => match id.as_str() { + "int32" => Ok(primitives.int32), + "int64" => Ok(primitives.int64), + "float" => Ok(primitives.float), + "bool" => Ok(primitives.bool), + "None" => Ok(primitives.none), + x => { + let obj_id = resolver.get_identifier_def(x); + if let Some(obj_id) = obj_id { + let defs = top_level.definitions.read(); + let def = defs[obj_id.0].read(); + if let TopLevelDef::Class { fields, methods, type_vars, .. } = &*def { + if !type_vars.is_empty() { + return Err(format!( + "Unexpected number of type parameters: expected {} but got 0", + type_vars.len() + )); + } + let fields = RefCell::new( + chain( + fields.iter().map(|(k, v)| (k.clone(), *v)), + methods.iter().map(|(k, v, _)| (k.clone(), *v)), + ) + .collect(), + ); + Ok(unifier.add_ty(TypeEnum::TObj { + obj_id, + fields, + params: Default::default(), + })) + } else { + Err("Cannot use function name as type".into()) + } + } else { + // it could be a type variable + let ty = resolver + .get_symbol_type(unifier, primitives, x) + .ok_or_else(|| "Cannot use function name as type".to_owned())?; + if let TypeEnum::TVar { .. } = &*unifier.get_ty(ty) { + Ok(ty) + } else { + Err(format!("Unknown type annotation {}", x)) + } + } + } + }, + Subscript { value, slice, .. } => { + if let Name { id, .. } = &value.node { + if id == "virtual" { + let ty = parse_type_annotation(resolver, top_level, unifier, primitives, slice)?; + Ok(unifier.add_ty(TypeEnum::TVirtual { ty })) + } else { + let types = if let Tuple { elts, .. } = &slice.node { + elts.iter() + .map(|v| parse_type_annotation(resolver, top_level, unifier, primitives, v)) + .collect::, _>>()? + } else { + vec![parse_type_annotation(resolver, top_level, unifier, primitives, slice)?] + }; + + let obj_id = resolver + .get_identifier_def(id) + .ok_or_else(|| format!("Unknown type annotation {}", id))?; + let defs = top_level.definitions.read(); + let def = defs[obj_id.0].read(); + if let TopLevelDef::Class { fields, methods, type_vars, .. } = &*def { + if types.len() != type_vars.len() { + return Err(format!( + "Unexpected number of type parameters: expected {} but got {}", + type_vars.len(), + types.len() + )); + } + let mut subst = HashMap::new(); + for (var, ty) in izip!(type_vars.iter(), types.iter()) { + let id = if let TypeEnum::TVar { id, .. } = &*unifier.get_ty(*var) { + *id + } else { + unreachable!() + }; + subst.insert(id, *ty); + } + let mut fields = fields + .iter() + .map(|(attr, ty)| { + let ty = unifier.subst(*ty, &subst).unwrap_or(*ty); + (attr.clone(), ty) + }) + .collect::>(); + fields.extend(methods.iter().map(|(attr, ty, _)| { + let ty = unifier.subst(*ty, &subst).unwrap_or(*ty); + (attr.clone(), ty) + })); + Ok(unifier.add_ty(TypeEnum::TObj { + obj_id, + fields: fields.into(), + params: subst.into(), + })) + } else { + Err("Cannot use function name as type".into()) + } + } + } else { + Err("unsupported type expression".into()) + } + } + _ => Err("unsupported type expression".into()), + } +} + impl dyn SymbolResolver { - // convert type annotation into type pub fn parse_type_annotation( &self, top_level: &TopLevelContext, @@ -45,114 +164,18 @@ impl dyn SymbolResolver { primitives: &PrimitiveStore, expr: &Expr, ) -> Result { - use rustpython_parser::ast::ExprKind::*; - match &expr.node { - Name { id, .. } => match id.as_str() { - "int32" => Ok(primitives.int32), - "int64" => Ok(primitives.int64), - "float" => Ok(primitives.float), - "bool" => Ok(primitives.bool), - "None" => Ok(primitives.none), - x => { - let obj_id = self.get_identifier_def(x); - if let Some(obj_id) = obj_id { - let defs = top_level.definitions.read(); - let def = defs[obj_id.0].read(); - if let TopLevelDef::Class { fields, methods, type_vars, .. } = &*def { - if !type_vars.is_empty() { - return Err(format!( - "Unexpected number of type parameters: expected {} but got 0", - type_vars.len() - )); - } - let fields = RefCell::new( - chain( - fields.iter().map(|(k, v)| (k.clone(), *v)), - methods.iter().map(|(k, v, _)| (k.clone(), *v)), - ) - .collect(), - ); - Ok(unifier.add_ty(TypeEnum::TObj { - obj_id, - fields, - params: Default::default(), - })) - } else { - Err("Cannot use function name as type".into()) - } - } else { - // it could be a type variable - let ty = self - .get_symbol_type(unifier, primitives, x) - .ok_or_else(|| "Cannot use function name as type".to_owned())?; - if let TypeEnum::TVar { .. } = &*unifier.get_ty(ty) { - Ok(ty) - } else { - Err(format!("Unknown type annotation {}", x)) - } - } - } - }, - Subscript { value, slice, .. } => { - if let Name { id, .. } = &value.node { - if id == "virtual" { - let ty = - self.parse_type_annotation(top_level, unifier, primitives, slice)?; - Ok(unifier.add_ty(TypeEnum::TVirtual { ty })) - } else { - let types = if let Tuple { elts, .. } = &slice.node { - elts.iter() - .map(|v| { - self.parse_type_annotation(top_level, unifier, primitives, v) - }) - .collect::, _>>()? - } else { - vec![self.parse_type_annotation(top_level, unifier, primitives, slice)?] - }; - - let obj_id = self - .get_identifier_def(id) - .ok_or_else(|| format!("Unknown type annotation {}", id))?; - let defs = top_level.definitions.read(); - let def = defs[obj_id.0].read(); - if let TopLevelDef::Class { fields, methods, type_vars, .. } = &*def { - if types.len() != type_vars.len() { - return Err(format!( - "Unexpected number of type parameters: expected {} but got {}", - type_vars.len(), - types.len() - )); - } - let mut subst = HashMap::new(); - for (var, ty) in izip!(type_vars.iter(), types.iter()) { - let id = if let TypeEnum::TVar { id, .. } = &*unifier.get_ty(*var) { - *id - } else { - unreachable!() - }; - subst.insert(id, *ty); - } - let mut fields = fields - .iter() - .map(|(attr, ty)| { - let ty = unifier.subst(*ty, &subst).unwrap_or(*ty); - (attr.clone(), ty) - }) - .collect::>(); - fields.extend(methods.iter().map(|(attr, ty, _)| { - let ty = unifier.subst(*ty, &subst).unwrap_or(*ty); - (attr.clone(), ty) - })); - Ok(unifier.add_ty(TypeEnum::TObj { obj_id, fields: fields.into(), params: subst.into() })) - } else { - Err("Cannot use function name as type".into()) - } - } - } else { - Err("unsupported type expression".into()) - } - } - _ => Err("unsupported type expression".into()), - } + parse_type_annotation(self, top_level, unifier, primitives, expr) + } +} + +impl dyn SymbolResolver + Send { + pub fn parse_type_annotation( + &self, + top_level: &TopLevelContext, + unifier: &mut Unifier, + primitives: &PrimitiveStore, + expr: &Expr, + ) -> Result { + parse_type_annotation(self, top_level, unifier, primitives, expr) } } From 1db8378f60f573c0016f89b83336555ab18a6322 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Thu, 12 Aug 2021 16:36:23 +0800 Subject: [PATCH 106/131] formatting --- nac3core/src/codegen/mod.rs | 71 +++++++++++++++------------ nac3core/src/symbol_resolver.rs | 11 +++-- nac3core/src/typecheck/typedef/mod.rs | 6 ++- 3 files changed, 53 insertions(+), 35 deletions(-) diff --git a/nac3core/src/codegen/mod.rs b/nac3core/src/codegen/mod.rs index 9ca1cb58..de9ff948 100644 --- a/nac3core/src/codegen/mod.rs +++ b/nac3core/src/codegen/mod.rs @@ -62,44 +62,53 @@ fn get_llvm_type<'ctx>( use TypeEnum::*; // we assume the type cache should already contain primitive types, // and they should be passed by value instead of passing as pointer. - type_cache.get(&unifier.get_representative(ty)).cloned().unwrap_or_else(|| match &*unifier.get_ty(ty) { - TObj { obj_id, fields, .. } => { - // a struct with fields in the order of declaration - let defs = top_level.definitions.read(); - let definition = defs.get(obj_id.0).unwrap(); - let ty = if let TopLevelDef::Class { fields: fields_list, .. } = &*definition.read() { - let fields = fields.borrow(); - let fields = fields_list + type_cache.get(&unifier.get_representative(ty)).cloned().unwrap_or_else(|| { + match &*unifier.get_ty(ty) { + TObj { obj_id, fields, .. } => { + // a struct with fields in the order of declaration + let defs = top_level.definitions.read(); + let definition = defs.get(obj_id.0).unwrap(); + let ty = if let TopLevelDef::Class { fields: fields_list, .. } = &*definition.read() + { + let fields = fields.borrow(); + let fields = fields_list + .iter() + .map(|f| get_llvm_type(ctx, unifier, top_level, type_cache, fields[&f.0])) + .collect_vec(); + ctx.struct_type(&fields, false).ptr_type(AddressSpace::Generic).into() + } else { + unreachable!() + }; + ty + } + TTuple { ty } => { + // a struct with fields in the order present in the tuple + let fields = ty .iter() - .map(|f| get_llvm_type(ctx, unifier, top_level, type_cache, fields[&f.0])) + .map(|ty| get_llvm_type(ctx, unifier, top_level, type_cache, *ty)) .collect_vec(); ctx.struct_type(&fields, false).ptr_type(AddressSpace::Generic).into() - } else { - unreachable!() - }; - ty + } + TList { ty } => { + // a struct with an integer and a pointer to an array + let element_type = get_llvm_type(ctx, unifier, top_level, type_cache, *ty); + let fields = + [ctx.i32_type().into(), element_type.ptr_type(AddressSpace::Generic).into()]; + ctx.struct_type(&fields, false).ptr_type(AddressSpace::Generic).into() + } + TVirtual { .. } => unimplemented!(), + _ => unreachable!(), } - TTuple { ty } => { - // a struct with fields in the order present in the tuple - let fields = ty - .iter() - .map(|ty| get_llvm_type(ctx, unifier, top_level, type_cache, *ty)) - .collect_vec(); - ctx.struct_type(&fields, false).ptr_type(AddressSpace::Generic).into() - } - TList { ty } => { - // a struct with an integer and a pointer to an array - let element_type = get_llvm_type(ctx, unifier, top_level, type_cache, *ty); - let fields = - [ctx.i32_type().into(), element_type.ptr_type(AddressSpace::Generic).into()]; - ctx.struct_type(&fields, false).ptr_type(AddressSpace::Generic).into() - } - TVirtual { .. } => unimplemented!(), - _ => unreachable!(), }) } -pub fn gen_func<'ctx>(context: &'ctx Context, builder: Builder<'ctx>, module: Module<'ctx>, task: CodeGenTask, top_level_ctx: Arc) -> Module<'ctx> { +pub fn gen_func<'ctx>( + context: &'ctx Context, + builder: Builder<'ctx>, + module: Module<'ctx>, + task: CodeGenTask, + top_level_ctx: Arc, +) -> Module<'ctx> { // unwrap_or(0) is for unit tests without using rayon let (mut unifier, primitives) = { let unifiers = top_level_ctx.unifiers.read(); diff --git a/nac3core/src/symbol_resolver.rs b/nac3core/src/symbol_resolver.rs index 3063eaab..e539ef14 100644 --- a/nac3core/src/symbol_resolver.rs +++ b/nac3core/src/symbol_resolver.rs @@ -95,15 +95,20 @@ pub fn parse_type_annotation( Subscript { value, slice, .. } => { if let Name { id, .. } = &value.node { if id == "virtual" { - let ty = parse_type_annotation(resolver, top_level, unifier, primitives, slice)?; + let ty = + parse_type_annotation(resolver, top_level, unifier, primitives, slice)?; Ok(unifier.add_ty(TypeEnum::TVirtual { ty })) } else { let types = if let Tuple { elts, .. } = &slice.node { elts.iter() - .map(|v| parse_type_annotation(resolver, top_level, unifier, primitives, v)) + .map(|v| { + parse_type_annotation(resolver, top_level, unifier, primitives, v) + }) .collect::, _>>()? } else { - vec![parse_type_annotation(resolver, top_level, unifier, primitives, slice)?] + vec![parse_type_annotation( + resolver, top_level, unifier, primitives, slice, + )?] }; let obj_id = resolver diff --git a/nac3core/src/typecheck/typedef/mod.rs b/nac3core/src/typecheck/typedef/mod.rs index 979213ad..d2e75c8a 100644 --- a/nac3core/src/typecheck/typedef/mod.rs +++ b/nac3core/src/typecheck/typedef/mod.rs @@ -696,7 +696,11 @@ impl Unifier { let fields = self .subst_map(&fields.borrow(), mapping) .unwrap_or_else(|| fields.borrow().clone()); - Some(self.add_ty(TypeEnum::TObj { obj_id, params: params.into(), fields: fields.into() })) + Some(self.add_ty(TypeEnum::TObj { + obj_id, + params: params.into(), + fields: fields.into(), + })) } else { None } From cb01c79603c478415713cb8179cdc7fbcff0eee9 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Fri, 13 Aug 2021 13:33:59 +0800 Subject: [PATCH 107/131] removed Arc from TypeEnum --- nac3core/src/codegen/mod.rs | 8 ++-- nac3core/src/codegen/test.rs | 8 ++-- nac3core/src/symbol_resolver.rs | 14 +------ nac3core/src/typecheck/type_inferencer/mod.rs | 14 +++---- .../src/typecheck/type_inferencer/test.rs | 6 +-- nac3core/src/typecheck/typedef/mod.rs | 38 +++++++++++++++---- 6 files changed, 50 insertions(+), 38 deletions(-) diff --git a/nac3core/src/codegen/mod.rs b/nac3core/src/codegen/mod.rs index de9ff948..f78581f8 100644 --- a/nac3core/src/codegen/mod.rs +++ b/nac3core/src/codegen/mod.rs @@ -49,7 +49,7 @@ pub struct CodeGenTask { pub signature: FunSignature, pub body: Vec>>, pub unifier_index: usize, - pub resolver: Arc, + pub resolver: Arc, } fn get_llvm_type<'ctx>( @@ -108,7 +108,7 @@ pub fn gen_func<'ctx>( module: Module<'ctx>, task: CodeGenTask, top_level_ctx: Arc, -) -> Module<'ctx> { +) -> (Builder<'ctx>, Module<'ctx>) { // unwrap_or(0) is for unit tests without using rayon let (mut unifier, primitives) = { let unifiers = top_level_ctx.unifiers.read(); @@ -199,5 +199,7 @@ pub fn gen_func<'ctx>( code_gen_context.gen_stmt(stmt); } - code_gen_context.module + let CodeGenContext { builder, module, .. } = code_gen_context; + + (builder, module) } diff --git a/nac3core/src/codegen/test.rs b/nac3core/src/codegen/test.rs index bcd58704..701d026a 100644 --- a/nac3core/src/codegen/test.rs +++ b/nac3core/src/codegen/test.rs @@ -6,7 +6,7 @@ use crate::{ typecheck::{ magic_methods::set_primitives_magic_methods, type_inferencer::{CodeLocation, FunctionData, Inferencer, PrimitiveStore}, - typedef::{Call, FunSignature, FuncArg, Type, TypeEnum, Unifier}, + typedef::{CallId, FunSignature, FuncArg, Type, TypeEnum, Unifier}, }, }; use indoc::indoc; @@ -48,7 +48,7 @@ struct TestEnvironment { pub id_to_name: HashMap, pub identifier_mapping: HashMap, pub virtual_checks: Vec<(Type, Type)>, - pub calls: HashMap>, + pub calls: HashMap, pub top_level: TopLevelContext, } @@ -102,7 +102,7 @@ impl TestEnvironment { id_to_type: identifier_mapping.clone(), id_to_def: Default::default(), class_names: Default::default(), - }) as Arc; + }) as Arc; TestEnvironment { unifier, @@ -239,7 +239,7 @@ fn test_primitives() { } "} .trim(); - let ir = module.print_to_string().to_string(); + let ir = module.1.print_to_string().to_string(); println!("src:\n{}", source); println!("IR:\n{}", ir); assert_eq!(expected, ir.trim()); diff --git a/nac3core/src/symbol_resolver.rs b/nac3core/src/symbol_resolver.rs index e539ef14..5f716ee4 100644 --- a/nac3core/src/symbol_resolver.rs +++ b/nac3core/src/symbol_resolver.rs @@ -161,19 +161,7 @@ pub fn parse_type_annotation( } } -impl dyn SymbolResolver { - pub fn parse_type_annotation( - &self, - top_level: &TopLevelContext, - unifier: &mut Unifier, - primitives: &PrimitiveStore, - expr: &Expr, - ) -> Result { - parse_type_annotation(self, top_level, unifier, primitives, expr) - } -} - -impl dyn SymbolResolver + Send { +impl dyn SymbolResolver + Send + Sync { pub fn parse_type_annotation( &self, top_level: &TopLevelContext, diff --git a/nac3core/src/typecheck/type_inferencer/mod.rs b/nac3core/src/typecheck/type_inferencer/mod.rs index cdd83a5b..304431f3 100644 --- a/nac3core/src/typecheck/type_inferencer/mod.rs +++ b/nac3core/src/typecheck/type_inferencer/mod.rs @@ -3,8 +3,8 @@ use std::convert::{From, TryInto}; use std::iter::once; use std::{cell::RefCell, sync::Arc}; -use super::magic_methods::*; use super::typedef::{Call, FunSignature, FuncArg, Type, TypeEnum, Unifier}; +use super::{magic_methods::*, typedef::CallId}; use crate::{symbol_resolver::SymbolResolver, top_level::TopLevelContext}; use itertools::izip; use rustpython_parser::ast::{ @@ -38,7 +38,7 @@ pub struct PrimitiveStore { } pub struct FunctionData { - pub resolver: Arc, + pub resolver: Arc, pub return_type: Option, pub bound_variables: Vec, } @@ -50,7 +50,7 @@ pub struct Inferencer<'a> { pub primitives: &'a PrimitiveStore, pub virtual_checks: &'a mut Vec<(Type, Type)>, pub variable_mapping: HashMap, - pub calls: &'a mut HashMap>, + pub calls: &'a mut HashMap, } struct NaiveFolder(); @@ -190,13 +190,13 @@ impl<'a> Inferencer<'a> { params: Vec, ret: Type, ) -> InferenceResult { - let call = Arc::new(Call { + let call = self.unifier.add_call(Call { posargs: params, kwargs: HashMap::new(), ret, fun: RefCell::new(None), }); - self.calls.insert(location.into(), call.clone()); + self.calls.insert(location.into(), call); let call = self.unifier.add_ty(TypeEnum::TCall(vec![call].into())); let fields = once((method, call)).collect(); let record = self.unifier.add_record(fields); @@ -398,7 +398,7 @@ impl<'a> Inferencer<'a> { .map(|v| fold::fold_keyword(self, v)) .collect::, _>>()?; let ret = self.unifier.get_fresh_var().0; - let call = Arc::new(Call { + let call = self.unifier.add_call(Call { posargs: args.iter().map(|v| v.custom.unwrap()).collect(), kwargs: keywords .iter() @@ -407,7 +407,7 @@ impl<'a> Inferencer<'a> { fun: RefCell::new(None), ret, }); - self.calls.insert(location.into(), call.clone()); + self.calls.insert(location.into(), call); let call = self.unifier.add_ty(TypeEnum::TCall(vec![call].into())); self.unifier.unify(func.custom.unwrap(), call)?; diff --git a/nac3core/src/typecheck/type_inferencer/test.rs b/nac3core/src/typecheck/type_inferencer/test.rs index 21012952..cebb0e89 100644 --- a/nac3core/src/typecheck/type_inferencer/test.rs +++ b/nac3core/src/typecheck/type_inferencer/test.rs @@ -40,7 +40,7 @@ struct TestEnvironment { pub id_to_name: HashMap, pub identifier_mapping: HashMap, pub virtual_checks: Vec<(Type, Type)>, - pub calls: HashMap>, + pub calls: HashMap, pub top_level: TopLevelContext, } @@ -94,7 +94,7 @@ impl TestEnvironment { id_to_type: identifier_mapping.clone(), id_to_def: Default::default(), class_names: Default::default(), - }) as Arc; + }) as Arc; TestEnvironment { top_level: TopLevelContext { @@ -273,7 +273,7 @@ impl TestEnvironment { .cloned() .collect(), class_names, - }) as Arc; + }) as Arc; TestEnvironment { unifier, diff --git a/nac3core/src/typecheck/typedef/mod.rs b/nac3core/src/typecheck/typedef/mod.rs index d2e75c8a..6f0c34d7 100644 --- a/nac3core/src/typecheck/typedef/mod.rs +++ b/nac3core/src/typecheck/typedef/mod.rs @@ -16,6 +16,9 @@ mod test; /// Handle for a type, implementated as a key in the unification table. pub type Type = UnificationKey; +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct CallId(usize); + pub type Mapping = HashMap; type VarMap = Mapping; @@ -73,7 +76,7 @@ pub enum TypeEnum { TVirtual { ty: Type, }, - TCall(RefCell>>), + TCall(RefCell>), TFunc(FunSignature), } @@ -92,17 +95,18 @@ impl TypeEnum { } } -pub type SharedUnifier = Arc, u32)>>; +pub type SharedUnifier = Arc, u32, Vec)>>; pub struct Unifier { unification_table: UnificationTable>, + calls: Vec>, var_id: u32, } impl Unifier { /// Get an empty unifier pub fn new() -> Unifier { - Unifier { unification_table: UnificationTable::new(), var_id: 0 } + Unifier { unification_table: UnificationTable::new(), var_id: 0, calls: Vec::new() } } /// Determine if the two types are the same @@ -112,11 +116,19 @@ impl Unifier { pub fn from_shared_unifier(unifier: &SharedUnifier) -> Unifier { let lock = unifier.lock().unwrap(); - Unifier { unification_table: UnificationTable::from_send(&lock.0), var_id: lock.1 } + Unifier { + unification_table: UnificationTable::from_send(&lock.0), + var_id: lock.1, + calls: lock.2.iter().map(|v| Rc::new(v.clone())).collect_vec(), + } } pub fn get_shared_unifier(&self) -> SharedUnifier { - Arc::new(Mutex::new((self.unification_table.get_send(), self.var_id))) + Arc::new(Mutex::new(( + self.unification_table.get_send(), + self.var_id, + self.calls.iter().map(|v| v.as_ref().clone()).collect_vec(), + ))) } /// Register a type to the unifier. @@ -135,6 +147,12 @@ impl Unifier { }) } + pub fn add_call(&mut self, call: Call) -> CallId { + let id = CallId(self.calls.len()); + self.calls.push(Rc::new(call)); + id + } + pub fn get_representative(&mut self, ty: Type) -> Type { self.unification_table.get_representative(ty) } @@ -463,11 +481,11 @@ impl Unifier { .collect(); // we unify every calls to the function signature. for c in calls.borrow().iter() { - let Call { posargs, kwargs, ret, fun } = c.as_ref(); + let Call { posargs, kwargs, ret, fun } = &*self.calls[c.0].clone(); let instantiated = self.instantiate_fun(b, signature); - let signature; let r = self.get_ty(instantiated); let r = r.as_ref(); + let signature; if let TypeEnum::TFunc(s) = &*r { signature = s; } else { @@ -765,10 +783,14 @@ impl Unifier { } } TypeEnum::TCall(calls) => { + let call_store = self.calls.clone(); for t in calls .borrow() .iter() - .map(|call| chain!(call.posargs.iter(), call.kwargs.values(), once(&call.ret))) + .map(|call| { + let call = call_store[call.0].as_ref(); + chain!(call.posargs.iter(), call.kwargs.values(), once(&call.ret)) + }) .flatten() { self.occur_check(a, *t)?; From e176aa660d43ad389b862345078ec2a579ffa0a9 Mon Sep 17 00:00:00 2001 From: ychenfo Date: Thu, 12 Aug 2021 14:48:11 +0800 Subject: [PATCH 108/131] commit for pull new symbol resolver --- nac3core/src/top_level.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index 98cbacb9..6be9e69c 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -333,8 +333,6 @@ impl TopLevelComposer { let (params, fields ) = if let TypeEnum::TObj { - // FIXME: this params is immutable, and what - // should the key be, get the original typevar's var_id? params, fields, .. @@ -359,7 +357,7 @@ impl TopLevelComposer { // `class Foo(Generic[T, V, P]):` ast::ExprKind::Tuple {elts, ..} => { for e in elts { - // resolver.parse_type_annotation(self.definition_list.) // FIXME: + // let ty_def_id = resolver. } }, @@ -367,9 +365,6 @@ impl TopLevelComposer { ast::ExprKind::Name {id, ..} => { // the def_list // type_vars.push(resolver.get_symbol_type(id).ok_or_else(|| "unknown type variable".to_string())?); FIXME: - - // the TypeEnum of the class - // FIXME: the `params` destructed above is not mutable, even if this is mutable, what should the key be? unimplemented!() }, From ba5bb78f11d761b525e542e8635b69c5d07710dc Mon Sep 17 00:00:00 2001 From: ychenfo Date: Fri, 13 Aug 2021 02:38:29 +0800 Subject: [PATCH 109/131] top level parse class base/generic --- nac3core/src/top_level.rs | 585 +++++++++++++++++++++++--------------- 1 file changed, 361 insertions(+), 224 deletions(-) diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index 6be9e69c..74f531d8 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -1,9 +1,11 @@ -use std::borrow::Borrow; +use std::borrow::{Borrow, BorrowMut}; +use std::collections::HashSet; use std::{collections::HashMap, sync::Arc}; use super::typecheck::type_inferencer::PrimitiveStore; use super::typecheck::typedef::{SharedUnifier, Type, TypeEnum, Unifier}; use crate::symbol_resolver::SymbolResolver; +use crate::typecheck::typedef::{FunSignature, FuncArg}; use inkwell::context::Context; use parking_lot::{Mutex, RwLock}; use rustpython_parser::ast::{self, Stmt}; @@ -57,30 +59,31 @@ pub struct TopLevelContext { pub conetexts: Arc>>>, } -// like adding some info on top of the TopLevelDef for -// later parsing the class bases, method, and function sigatures -pub struct TopLevelDefInfo { - // the definition entry - def: TopLevelDef, - // the entry in the top_level unifier - ty: Type, - // the ast submitted by applications, primitives and - // class methods will have None value here - ast: Option>, -} - pub struct TopLevelComposer { - // list of top level definitions and their info - pub definition_list: RwLock>, + // list of top level definitions, same as top level context + pub definition_list: Arc>>>, + // list of top level Type, the index is same as the field `definition_list` + pub ty_list: RwLock>, + // list of top level ast, the index is same as the field `definition_list` and `ty_list` + pub ast_list: RwLock>>>, + // start as a primitive unifier, will add more top_level defs inside + pub unifier: RwLock, // primitive store pub primitives: PrimitiveStore, - // start as a primitive unifier, will add more top_level defs inside - pub unifier: Unifier, // mangled class method name to def_id - pub class_method_to_def_id: HashMap, + pub class_method_to_def_id: RwLock>, } impl TopLevelComposer { + pub fn to_top_level_context(&self) -> TopLevelContext { + TopLevelContext { + definitions: self.definition_list.clone(), + // FIXME: all the big unifier or? + unifiers: Default::default(), + conetexts: Default::default(), + } + } + fn name_mangling(mut class_name: String, method_name: &str) -> String { class_name.push_str(method_name); class_name @@ -118,51 +121,47 @@ impl TopLevelComposer { (primitives, unifier) } - /// return a composer and things to make a "primitive" symbol resolver, so that the symbol + /// return a composer and things to make a "primitive" symbol resolver, so that the symbol /// resolver can later figure out primitive type definitions when passed a primitive type name pub fn new() -> (Vec<(String, DefinitionId, Type)>, Self) { let primitives = Self::make_primitives(); - // the def list including the entries of primitive info - let definition_list: Vec = vec![ - TopLevelDefInfo { - def: Self::make_top_level_class_def(0, None), - ast: None, - ty: primitives.0.int32, - }, - TopLevelDefInfo { - def: Self::make_top_level_class_def(1, None), - ast: None, - ty: primitives.0.int64, - }, - TopLevelDefInfo { - def: Self::make_top_level_class_def(2, None), - ast: None, - ty: primitives.0.float, - }, - TopLevelDefInfo { - def: Self::make_top_level_class_def(3, None), - ast: None, - ty: primitives.0.bool, - }, - TopLevelDefInfo { - def: Self::make_top_level_class_def(4, None), - ast: None, - ty: primitives.0.none, - }, + + let top_level_def_list = vec![ + RwLock::new(Self::make_top_level_class_def(0, None)), + RwLock::new(Self::make_top_level_class_def(1, None)), + RwLock::new(Self::make_top_level_class_def(2, None)), + RwLock::new(Self::make_top_level_class_def(3, None)), + RwLock::new(Self::make_top_level_class_def(4, None)), ]; - let composer = TopLevelComposer { - definition_list: definition_list.into(), + + let ast_list: Vec>> = vec![None, None, None, None, None]; + + let ty_list: Vec = vec![ + primitives.0.int32, + primitives.0.int64, + primitives.0.float, + primitives.0.bool, + primitives.0.none, + ]; + + let composer = TopLevelComposer { + definition_list: RwLock::new(top_level_def_list).into(), + ty_list: RwLock::new(ty_list), + ast_list: RwLock::new(ast_list), primitives: primitives.0, - unifier: primitives.1, + unifier: primitives.1.into(), class_method_to_def_id: Default::default(), }; - (vec![ - ("int32".into(), DefinitionId(0), composer.primitives.int32), - ("int64".into(), DefinitionId(1), composer.primitives.int64), - ("float".into(), DefinitionId(2), composer.primitives.float), - ("bool".into(), DefinitionId(3), composer.primitives.bool), - ("none".into(), DefinitionId(4), composer.primitives.none), - ], composer) + ( + vec![ + ("int32".into(), DefinitionId(0), composer.primitives.int32), + ("int64".into(), DefinitionId(1), composer.primitives.int64), + ("float".into(), DefinitionId(2), composer.primitives.float), + ("bool".into(), DefinitionId(3), composer.primitives.bool), + ("none".into(), DefinitionId(4), composer.primitives.none), + ], + composer, + ) } /// already include the definition_id of itself inside the ancestors vector @@ -202,24 +201,31 @@ impl TopLevelComposer { match &ast.node { ast::StmtKind::ClassDef { name, body, .. } => { let class_name = name.to_string(); - let mut def_list = self.definition_list.write(); + + let (mut def_list, mut ty_list, mut ast_list) = + (self.definition_list.write(), self.ty_list.write(), self.ast_list.write()); + + // will be deleted after tested + assert_eq!(ty_list.len(), def_list.len()); + assert_eq!(def_list.len(), ast_list.len()); + let class_def_id = def_list.len(); // add the class to the unifier - let ty = self.unifier.add_ty(TypeEnum::TObj { + let ty = self.unifier.write().add_ty(TypeEnum::TObj { obj_id: DefinitionId(class_def_id), fields: Default::default(), params: Default::default(), }); - // add the class to the definition list - def_list.push(TopLevelDefInfo { - def: Self::make_top_level_class_def(class_def_id, resolver.clone()), - // NOTE: Temporarily none here since function body need to be read later - ast: None, - ty, - }); - + // add the class to the definition lists + def_list + .push(Self::make_top_level_class_def(class_def_id, resolver.clone()).into()); + ty_list.push(ty); + // since later when registering class method, ast will still be used, + // here push None temporarly, later will push the ast + ast_list.push(None); + // parse class def body and register class methods into the def list // module's symbol resolver would not know the name of the class methods, // thus cannot return their definition_id? so we have to manage it ourselves @@ -228,9 +234,9 @@ impl TopLevelComposer { if let ast::StmtKind::FunctionDef { name, .. } = &b.node { let fun_name = Self::name_mangling(class_name.clone(), name); let def_id = def_list.len(); - + // add to unifier - let ty = self.unifier.add_ty(TypeEnum::TFunc( + let ty = self.unifier.write().add_ty(TypeEnum::TFunc( crate::typecheck::typedef::FunSignature { args: Default::default(), ret: self.primitives.none, @@ -239,60 +245,66 @@ impl TopLevelComposer { )); // add to the definition list - def_list.push(TopLevelDefInfo { - def: Self::make_top_level_function_def(fun_name.clone(), ty, resolver.clone()), - ty, - // since it is inside the class def body statments, the ast is None - ast: None, - }); + def_list.push( + Self::make_top_level_function_def( + fun_name.clone(), + ty, + resolver.clone(), + ) + .into(), + ); + ty_list.push(ty); + // the ast of class method is in the class, push None in to the list here + ast_list.push(None); // class method, do not let the symbol manager manage it, use our own map - self.class_method_to_def_id.insert(fun_name, DefinitionId(def_id)); + self.class_method_to_def_id.write().insert(fun_name, DefinitionId(def_id)); // if it is the contructor, special handling is needed. In the above // handling, we still add __init__ function to the class method if name == "__init__" { - // FIXME: how can this later be fetched? - def_list.push(TopLevelDefInfo { - def: TopLevelDef::Initializer { class_id: DefinitionId(class_def_id) }, - // arbitary picked one for the constructor - ty: self.primitives.none, - // it is inside the class def body statments, so None - ast: None, - }) + // NOTE: how can this later be fetched? + def_list.push( + TopLevelDef::Initializer { class_id: DefinitionId(class_def_id) } + .into(), + ); + // arbitarily push one to make sure the index is correct + ty_list.push(self.primitives.none); + ast_list.push(None); } } } - // move the ast to the entry of the class in the def_list - def_list.get_mut(class_def_id).unwrap().ast = Some(ast); - + // move the ast to the entry of the class in the ast_list + ast_list[class_def_id] = Some(ast); + // return Ok((class_name, DefinitionId(class_def_id), ty)) - }, + } ast::StmtKind::FunctionDef { name, .. } => { let fun_name = name.to_string(); - - // add to the unifier - let ty = self.unifier.add_ty(TypeEnum::TFunc(crate::typecheck::typedef::FunSignature { - args: Default::default(), - ret: self.primitives.none, - vars: Default::default(), - })); - - // add to the definition list - let mut def_list = self.definition_list.write(); - def_list.push(TopLevelDefInfo { - def: Self::make_top_level_function_def( - name.into(), - self.primitives.none, - resolver, - ), - ast: Some(ast), - ty, - }); + // add to the unifier + let ty = self.unifier.write().add_ty(TypeEnum::TFunc( + crate::typecheck::typedef::FunSignature { + args: Default::default(), + ret: self.primitives.none, + vars: Default::default(), + }, + )); + + let (mut def_list, mut ty_list, mut ast_list) = + (self.definition_list.write(), self.ty_list.write(), self.ast_list.write()); + // add to the definition list + def_list.push( + Self::make_top_level_function_def(name.into(), self.primitives.none, resolver) + .into(), + ); + ty_list.push(ty); + ast_list.push(Some(ast)); + + // return Ok((fun_name, DefinitionId(def_list.len() - 1), ty)) } @@ -300,144 +312,269 @@ impl TopLevelComposer { } } - /// this should be called after all top level classes are registered, and will actually fill in those fields of the previous dummy one + /// this should be called after all top level classes are registered, and + /// will actually fill in those fields of the previous dummy one pub fn analyze_top_level(&mut self) -> Result<(), String> { - for d in self.definition_list.write().iter_mut() { - // only analyze those with ast, and class_method(ast in class def) - if let Some(ast) = &d.ast { - match &ast.node { - ast::StmtKind::ClassDef { - bases, - body, + let mut def_list = self.definition_list.write(); + let ty_list = self.ty_list.read(); + let ast_list = self.ast_list.read(); + let mut unifier = self.unifier.write(); + + for (def, ty, ast) in def_list + .iter_mut() + .zip(ty_list.iter()) + .zip(ast_list.iter()) + .map(|((x, y), z)| (x, y, z)) + .collect::, &Type, &Option>)>>() + { + // only analyze those entries with ast, and class_method(whose ast in class def) + match ast { + Some(ast::Located{node: ast::StmtKind::ClassDef { + bases, + body, + name: class_name, + .. + }, .. }) => { + // get the mutable reference of the entry in the + // definition list, get the `TopLevelDef` + let ( + def_ancestors, + def_fields, + def_methods, + def_type_vars, + resolver, + ) = if let TopLevelDef::Class { + object_id: _, + ancestors, + fields, + methods, + type_vars, + resolver: Some(resolver) + } = def.get_mut() { + (ancestors, fields, methods, type_vars, resolver.lock()) + } else { unreachable!() }; + + // try to get mutable reference of the entry in the + // unification table, get the `TypeEnum` + let type_enum = unifier.get_ty(*ty); + let ( + enum_params, + enum_fields + ) = if let TypeEnum::TObj { + params, + fields, .. - } => { - // get the mutable reference of the entry in the definition list, get the `TopLevelDef` - let ( - ancestors, - fields, - methods, - type_vars, - resolver, - ) = if let TopLevelDef::Class { - object_id: _, - ancestors, - fields, - methods, - type_vars, - resolver: Some(resolver) - } = &mut d.def { - (ancestors, fields, methods, type_vars, resolver.lock()) - } else { unreachable!() }; + } = type_enum.borrow() { + (params, fields) + } else { unreachable!() }; - // try to get mutable reference of the entry in the unification table, get the `TypeEnum` - let (params, - fields - ) = if let TypeEnum::TObj { - params, - fields, - .. - } = self.unifier.get_ty(d.ty).borrow() { - (params, fields) - } else { unreachable!() }; - - // ancestors and typevars associate with the class are analyzed by looking - // into the `bases` ast node - for b in bases { - match &b.node { - // typevars bounded to the class, only support things like `class A(Generic[T, V])`, - // things like `class A(Generic[T, V, ImportedModule.T])` is not supported - // i.e. only simple names are allowed in the subscript - // should update the TopLevelDef::Class.typevars and the TypeEnum::TObj.params - ast::ExprKind::Subscript {value, slice, ..} if { - if let ast::ExprKind::Name {id, ..} = &value.node { - id == "Generic" + // ancestors and typevars associate with the class are analyzed by looking + // into the `bases` ast node + // `Generic` should only occur once, use this flag + let mut generic_occured = false; + // TODO: haven't check this yet + let mut occured_type_var: HashSet = Default::default(); + // TODO: haven't check this yet + let mut occured_base: HashSet = Default::default(); + for b in bases { + match &b.node { + // analyze typevars bounded to the class, + // only support things like `class A(Generic[T, V])`, + // things like `class A(Generic[T, V, ImportedModule.T])` is not supported + // i.e. only simple names are allowed in the subscript + // should update the TopLevelDef::Class.typevars and the TypeEnum::TObj.params + ast::ExprKind::Subscript {value, slice, ..} if { + // can only be `Generic[...]` and this can only appear once + if let ast::ExprKind::Name { id, .. } = &value.node { + if id == "Generic" { + if !generic_occured { + generic_occured = true; + true + } else { + return Err("Only single Generic[...] or Protocol[...] can be in bases".into()) + } } else { false } - } => { - match &slice.node { - // `class Foo(Generic[T, V, P]):` - ast::ExprKind::Tuple {elts, ..} => { - for e in elts { - // let ty_def_id = resolver. - } - }, + } else { false } + } => { + match &slice.node { + // `class Foo(Generic[T, V, P]):` multiple element inside the subscript + ast::ExprKind::Tuple {elts, ..} => { + let tys = elts + .iter() + .map(|x| {resolver.parse_type_annotation( + &self.to_top_level_context(), + unifier.borrow_mut(), + &self.primitives, + x)}) + .collect::, _>>()?; + + let ty_var_ids = tys + .iter() + .map(|t| unifier.get_ty(*t)) + .collect::>() + .iter() + .map(|x| { + let x = x.as_ref(); + if let TypeEnum::TVar {id, ..} = x { + Ok(*id) + } else { + Err("Expect type variabls here".to_string()) + } + }) + .collect::, _>>()?; - // `class Foo(Generic[T]):` - ast::ExprKind::Name {id, ..} => { - // the def_list - // type_vars.push(resolver.get_symbol_type(id).ok_or_else(|| "unknown type variable".to_string())?); FIXME: - unimplemented!() - }, + // write to TypeEnum + for (id, ty) in ty_var_ids.iter().zip(tys.iter()) { + enum_params.borrow_mut().insert(*id, *ty); + } - _ => return Err("not supported, only simple names are allowed in the subscript".into()) + // write to TopLevelDef + for ty in tys{ + def_type_vars.push(ty) + } + }, + + // `class Foo(Generic[T]):`, only single element + _ => { + let ty = resolver.parse_type_annotation( + &self.to_top_level_context(), + unifier.borrow_mut(), + &self.primitives, + &slice + )?; + + let ty_var_id = if let TypeEnum::TVar { id, .. } = unifier + .get_ty(ty) + .as_ref() { *id } else { + return Err("Expect type variabls here".to_string()) + }; + + // write to TypeEnum + enum_params.borrow_mut().insert(ty_var_id, ty); + + // write to TopLevelDef + def_type_vars.push(ty); + }, + }; + } + + // analyze base classes, which is possible in + // other cases, we parse for the base class + _ => { + let ty = resolver.parse_type_annotation( + &self.to_top_level_context(), + unifier.borrow_mut(), + &self.primitives, + b + )?; + + let obj_def_id = if let TypeEnum::TObj { obj_id, .. } = unifier + .get_ty(ty) + .as_ref() { + *obj_id + } else { + return Err("Expect concrete classes/types here".into()) }; - }, - - /* // base class, name directly available inside the - // module, can use this module's symbol resolver - ast::ExprKind::Name {id, ..} => { - // let def_id = resolver.get_identifier_def(id); FIXME: - // the definition list - // ancestors.push(def_id); - }, - - // base class, things can be like `class A(BaseModule.Base)`, here we have to get the - // symbol resolver of the module `BaseModule`? - ast::ExprKind::Attribute {value, attr, ..} => { - if let ast::ExprKind::Name {id, ..} = &value.node { - // if let Some(base_module_resolver) = resolver.get_module_resolver(id) { - // let def_id = base_module_resolver.get_identifier_def(attr); - // // the definition list - // ancestors.push(def_id); - // } else { return Err("unkown imported module".into()) } FIXME: - } else { return Err("unkown imported module".into()) } - }, - - // `class Foo(ImportedModule.A[int, bool])`, A is a class with associated type variables - ast::ExprKind::Subscript {value, slice, ..} => { - unimplemented!() - }, */ - // base class is possible in other cases, we parse for thr base class - _ => return Err("not supported".into()) + // write to TopLevelDef + def_ancestors.push(obj_def_id); } } + } - // class method and field are analyzed by - // looking into the class body ast node - for stmt in body { - if let ast::StmtKind::FunctionDef { - name, - args, - body, - returns, - .. - } = &stmt.node { + // class method and field are analyzed by + // looking into the class body ast node + // NOTE: should consider parents' method and fields(check re-def and add), + // but we do it later we go over these again after we finish analyze the + // fields/methods as declared in the ast + // method with same name should not occur twice, so use this + let defined_method: HashSet = Default::default(); + for stmt in body { + if let ast::StmtKind::FunctionDef { + name, + args, + body, + returns, + .. + } = &stmt.node { + // build type enum, need FunSignature {args, vars, ret} + // args. Now only args with no default TODO: other kinds of args + let func_args = args.args + .iter() + .map(|x| -> Result { + Ok(FuncArg { + name: x.node.arg.clone(), + ty: resolver.parse_type_annotation( + &self.to_top_level_context(), + unifier.borrow_mut(), + &self.primitives, + x + .node + .annotation + .as_ref() + .ok_or_else(|| "type annotations required for function parameters".to_string())? + )?, + default_value: None + }) + }) + .collect::, _>>()?; + // vars. find TypeVars used in the argument type annotation + let func_vars = func_args + .iter() + .filter_map(|FuncArg { ty, .. } | { + if let TypeEnum::TVar { id, .. } = unifier.get_ty(*ty).as_ref() { + Some((*id, *ty)) + } else { None } + }) + .collect::>(); + // return type + let func_ret = resolver + .parse_type_annotation( + &self.to_top_level_context(), + unifier.borrow_mut(), + &self.primitives, + returns + .as_ref() + .ok_or_else(|| "return type annotations required here".to_string())? + .as_ref(), + )?; + // build the TypeEnum + let func_ty = TypeEnum::TFunc(FunSignature { + args: func_args, + vars: func_vars, + ret: func_ret + }); + // TODO: write to the TypeEnum and Def_list + - } else { } + + if name == "__init__" { + // special for constructor, need to look into the fields + // TODO: look into the function body and see + } + } else { // do nothing. we do not care about things like this? // class A: // a = 3 // b = [2, 3] - - } - }, - - // top level function definition - ast::StmtKind::FunctionDef { - name, - args, - body, - returns, - .. - } => { - unimplemented!() } + }, - node => { - return Err("only expect function and class definitions to be submitted here to be analyzed".into()) - } + // top level function definition + Some(ast::Located{node: ast::StmtKind::FunctionDef { + name, + args, + body, + returns, + .. + }, .. }) => { + // TODO: + unimplemented!() } + + // only expect class def and function def ast + _ => return Err("only expect function and class definitions to be submitted here to be analyzed".into()) } } Ok(()) From 3f65e1b1335b3e50afe9671ea9610021c18f0eb7 Mon Sep 17 00:00:00 2001 From: ychenfo Date: Fri, 13 Aug 2021 13:55:44 +0800 Subject: [PATCH 110/131] start refactor top_level --- nac3core/src/top_level.rs | 108 ++++++++++++++++++++++++-------------- 1 file changed, 69 insertions(+), 39 deletions(-) diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index 74f531d8..3e26c670 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -198,17 +198,24 @@ impl TopLevelComposer { ast: ast::Stmt<()>, resolver: Option>>, ) -> Result<(String, DefinitionId, Type), String> { + // get write access to the lists + let ( + mut def_list, + mut ty_list, + mut ast_list + ) = ( + self.definition_list.write(), + self.ty_list.write(), + self.ast_list.write() + ); + + // will be deleted after tested + assert_eq!(ty_list.len(), def_list.len()); + assert_eq!(def_list.len(), ast_list.len()); + match &ast.node { ast::StmtKind::ClassDef { name, body, .. } => { let class_name = name.to_string(); - - let (mut def_list, mut ty_list, mut ast_list) = - (self.definition_list.write(), self.ty_list.write(), self.ast_list.write()); - - // will be deleted after tested - assert_eq!(ty_list.len(), def_list.len()); - assert_eq!(def_list.len(), ast_list.len()); - let class_def_id = def_list.len(); // add the class to the unifier @@ -226,7 +233,7 @@ impl TopLevelComposer { // here push None temporarly, later will push the ast ast_list.push(None); - // parse class def body and register class methods into the def list + // parse class def body and register class methods into the def list. // module's symbol resolver would not know the name of the class methods, // thus cannot return their definition_id? so we have to manage it ourselves // by using the field `class_method_to_def_id` @@ -236,13 +243,11 @@ impl TopLevelComposer { let def_id = def_list.len(); // add to unifier - let ty = self.unifier.write().add_ty(TypeEnum::TFunc( - crate::typecheck::typedef::FunSignature { - args: Default::default(), - ret: self.primitives.none, - vars: Default::default(), - }, - )); + let ty = self.unifier.write().add_ty(TypeEnum::TFunc(FunSignature { + args: Default::default(), + ret: self.primitives.none, + vars: Default::default(), + })); // add to the definition list def_list.push( @@ -286,16 +291,12 @@ impl TopLevelComposer { let fun_name = name.to_string(); // add to the unifier - let ty = self.unifier.write().add_ty(TypeEnum::TFunc( - crate::typecheck::typedef::FunSignature { - args: Default::default(), - ret: self.primitives.none, - vars: Default::default(), - }, - )); + let ty = self.unifier.write().add_ty(TypeEnum::TFunc(FunSignature { + args: Default::default(), + ret: self.primitives.none, + vars: Default::default(), + })); - let (mut def_list, mut ty_list, mut ast_list) = - (self.definition_list.write(), self.ty_list.write(), self.ast_list.write()); // add to the definition list def_list.push( Self::make_top_level_function_def(name.into(), self.primitives.none, resolver) @@ -312,6 +313,23 @@ impl TopLevelComposer { } } + pub fn analyze_top_level_class_type_var(&mut self) -> Result<(), String> { + let mut def_list = self.definition_list.write(); + let ty_list = self.ty_list.read(); + let ast_list = self.ast_list.read(); + let mut unifier = self.unifier.write(); + + for (def, ty, ast) in def_list + .iter_mut() + .zip(ty_list.iter()) + .zip(ast_list.iter()) + .map(|((x, y), z)| (x, y, z)) + .collect::, &Type, &Option>)>>() { + unimplemented!() + }; + unimplemented!() + } + /// this should be called after all top level classes are registered, and /// will actually fill in those fields of the previous dummy one pub fn analyze_top_level(&mut self) -> Result<(), String> { @@ -401,21 +419,22 @@ impl TopLevelComposer { ast::ExprKind::Tuple {elts, ..} => { let tys = elts .iter() - .map(|x| {resolver.parse_type_annotation( + // here parse_type_annotation should be fine, + // since we only expect type vars, which is not relevant + // to the top-level parsing + .map(|x| resolver.parse_type_annotation( &self.to_top_level_context(), unifier.borrow_mut(), &self.primitives, - x)}) + x)) .collect::, _>>()?; let ty_var_ids = tys .iter() - .map(|t| unifier.get_ty(*t)) - .collect::>() - .iter() - .map(|x| { - let x = x.as_ref(); - if let TypeEnum::TVar {id, ..} = x { + .map(|t| { + let tmp = unifier.get_ty(*t); + // make sure it is type var + if let TypeEnum::TVar {id, ..} = tmp.as_ref() { Ok(*id) } else { Err("Expect type variabls here".to_string()) @@ -460,6 +479,14 @@ impl TopLevelComposer { // analyze base classes, which is possible in // other cases, we parse for the base class + // FIXME: calling parse_type_annotation here might cause some problem + // when the base class is parametrized `BaseClass[int, bool]`, since the + // analysis of type var of some class is not done yet. + // we can first only look at the name, and later check the + // parameter when others are done + // Or + // first get all the class' type var analyzed, and then + // analyze the base class _ => { let ty = resolver.parse_type_annotation( &self.to_top_level_context(), @@ -491,7 +518,7 @@ impl TopLevelComposer { let defined_method: HashSet = Default::default(); for stmt in body { if let ast::StmtKind::FunctionDef { - name, + name: func_name, args, body, returns, @@ -539,16 +566,19 @@ impl TopLevelComposer { .as_ref(), )?; // build the TypeEnum - let func_ty = TypeEnum::TFunc(FunSignature { + let func_type_sig = FunSignature { args: func_args, vars: func_vars, ret: func_ret - }); - // TODO: write to the TypeEnum and Def_list + }; + + // write to the TypeEnum and Def_list (by replacing the ty with the new Type created above) + let func_name_mangled = Self::name_mangling(class_name.clone(), func_name); + let def_id = self.class_method_to_def_id.read()[&func_name_mangled]; + unimplemented!(); - - if name == "__init__" { + if func_name == "__init__" { // special for constructor, need to look into the fields // TODO: look into the function body and see } From 33391c55c241ac20af3f472ac1d516e5f373000d Mon Sep 17 00:00:00 2001 From: ychenfo Date: Fri, 13 Aug 2021 14:22:49 +0800 Subject: [PATCH 111/131] add Sync bound to Symbol resolver in top level --- nac3core/src/top_level.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index 3e26c670..e502903c 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -26,7 +26,7 @@ pub enum TopLevelDef { // ancestor classes, including itself. ancestors: Vec, // symbol resolver of the module defined the class, none if it is built-in type - resolver: Option>>, + resolver: Option>>, }, Function { // prefix for symbol, should be unique globally, and not ending with numbers @@ -46,7 +46,7 @@ pub enum TopLevelDef { /// rigid type variables that would be substituted when the function is instantiated. instance_to_stmt: HashMap>, usize)>, // symbol resolver of the module defined the class - resolver: Option>>, + resolver: Option>>, }, Initializer { class_id: DefinitionId, @@ -167,7 +167,7 @@ impl TopLevelComposer { /// already include the definition_id of itself inside the ancestors vector pub fn make_top_level_class_def( index: usize, - resolver: Option>>, + resolver: Option>>, ) -> TopLevelDef { TopLevelDef::Class { object_id: DefinitionId(index), @@ -182,7 +182,7 @@ impl TopLevelComposer { pub fn make_top_level_function_def( name: String, ty: Type, - resolver: Option>>, + resolver: Option>>, ) -> TopLevelDef { TopLevelDef::Function { name, @@ -196,7 +196,7 @@ impl TopLevelComposer { pub fn register_top_level( &mut self, ast: ast::Stmt<()>, - resolver: Option>>, + resolver: Option>>, ) -> Result<(String, DefinitionId, Type), String> { // get write access to the lists let ( From e2adf82229bbf54730859b267329eedf6e0959b1 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Fri, 13 Aug 2021 14:48:46 +0800 Subject: [PATCH 112/131] threadpool for parallel code generation --- nac3core/src/codegen/mod.rs | 109 +++++++++++++++ nac3core/src/codegen/test.rs | 125 +++++++++--------- nac3core/src/top_level.rs | 26 ++-- .../src/typecheck/type_inferencer/test.rs | 4 +- 4 files changed, 184 insertions(+), 80 deletions(-) diff --git a/nac3core/src/codegen/mod.rs b/nac3core/src/codegen/mod.rs index f78581f8..e25b6fe1 100644 --- a/nac3core/src/codegen/mod.rs +++ b/nac3core/src/codegen/mod.rs @@ -6,6 +6,7 @@ use crate::{ typedef::{FunSignature, Type, TypeEnum, Unifier}, }, }; +use crossbeam::channel::{unbounded, Receiver, Sender}; use inkwell::{ basic_block::BasicBlock, builder::Builder, @@ -16,9 +17,11 @@ use inkwell::{ AddressSpace, }; use itertools::Itertools; +use parking_lot::{Condvar, Mutex}; use rustpython_parser::ast::Stmt; use std::collections::HashMap; use std::sync::Arc; +use std::thread; mod expr; mod stmt; @@ -43,6 +46,112 @@ pub struct CodeGenContext<'ctx, 'a> { pub loop_bb: Option<(BasicBlock<'ctx>, BasicBlock<'ctx>)>, } +type Fp = Box; + +pub struct WithCall { + fp: Fp, +} + +impl WithCall { + pub fn new(fp: Fp) -> WithCall { + WithCall { fp } + } + + pub fn run<'ctx>(&self, m: &Module<'ctx>) { + (self.fp)(m) + } +} + +pub struct WorkerRegistry { + sender: Arc>>, + receiver: Arc>>, + task_count: Mutex, + thread_count: usize, + wait_condvar: Condvar, +} + +impl WorkerRegistry { + pub fn create_workers( + names: &[&str], + top_level_ctx: Arc, + f: Arc, + ) -> Arc { + let (sender, receiver) = unbounded(); + let task_count = Mutex::new(0); + let wait_condvar = Condvar::new(); + + let registry = Arc::new(WorkerRegistry { + sender: Arc::new(sender), + receiver: Arc::new(receiver), + thread_count: names.len(), + task_count, + wait_condvar, + }); + + for name in names.iter() { + let top_level_ctx = top_level_ctx.clone(); + let registry = registry.clone(); + let name = name.to_string(); + let f = f.clone(); + thread::spawn(move || { + registry.worker_thread(name, top_level_ctx, f); + }); + } + registry + } + + pub fn wait_tasks_complete(&self) { + { + let mut count = self.task_count.lock(); + while *count != 0 { + self.wait_condvar.wait(&mut count); + } + } + for _ in 0..self.thread_count { + self.sender.send(None).unwrap(); + } + { + let mut count = self.task_count.lock(); + while *count != self.thread_count { + self.wait_condvar.wait(&mut count); + } + } + } + + pub fn add_task(&self, task: CodeGenTask) { + *self.task_count.lock() += 1; + self.sender.send(Some(task)).unwrap(); + } + + fn worker_thread( + &self, + module_name: String, + top_level_ctx: Arc, + f: Arc, + ) { + let context = Context::create(); + let mut builder = context.create_builder(); + let mut module = context.create_module(&module_name); + + while let Some(task) = self.receiver.recv().unwrap() { + let result = gen_func(&context, builder, module, task, top_level_ctx.clone()); + builder = result.0; + module = result.1; + + println!("{}", *self.task_count.lock()); + *self.task_count.lock() -= 1; + self.wait_condvar.notify_all(); + } + + // do whatever... + let mut lock = self.task_count.lock(); + module.verify().unwrap(); + f.run(&module); + *lock += 1; + self.wait_condvar.notify_all(); + } +} + pub struct CodeGenTask { pub subst: Vec<(Type, Type)>, pub symbol_name: String, diff --git a/nac3core/src/codegen/test.rs b/nac3core/src/codegen/test.rs index 701d026a..54982897 100644 --- a/nac3core/src/codegen/test.rs +++ b/nac3core/src/codegen/test.rs @@ -1,5 +1,6 @@ -use super::{gen_func, CodeGenTask}; +use super::{CodeGenTask, WorkerRegistry}; use crate::{ + codegen::WithCall, location::Location, symbol_resolver::{SymbolResolver, SymbolValue}, top_level::{DefinitionId, TopLevelContext}, @@ -10,7 +11,6 @@ use crate::{ }, }; use indoc::indoc; -use inkwell::context::Context; use parking_lot::RwLock; use rustpython_parser::{ast::fold::Fold, parser::parse_program}; use std::collections::HashMap; @@ -109,7 +109,7 @@ impl TestEnvironment { top_level: TopLevelContext { definitions: Default::default(), unifiers: Default::default(), - conetexts: Default::default(), + // conetexts: Default::default(), }, function_data: FunctionData { resolver, @@ -140,10 +140,7 @@ impl TestEnvironment { #[test] fn test_primitives() { let mut env = TestEnvironment::basic_test_env(); - let context = Context::create(); - let module = context.create_module("test"); - let builder = context.create_builder(); - + let threads = ["test"]; let signature = FunSignature { args: vec![ FuncArg { name: "a".to_string(), ty: env.primitives.int32, default_value: None }, @@ -170,9 +167,8 @@ fn test_primitives() { let top_level = Arc::new(TopLevelContext { definitions: Default::default(), unifiers: Arc::new(RwLock::new(vec![(env.unifier.get_shared_unifier(), env.primitives)])), - conetexts: Default::default(), + // conetexts: Default::default(), }); - let task = CodeGenTask { subst: Default::default(), symbol_name: "testing".to_string(), @@ -182,65 +178,66 @@ fn test_primitives() { signature, }; - let module = gen_func(&context, builder, module, task, top_level); - // the following IR is equivalent to - // ``` - // ; ModuleID = 'test.ll' - // source_filename = "test" - // - // ; Function Attrs: norecurse nounwind readnone - // define i32 @testing(i32 %0, i32 %1) local_unnamed_addr #0 { - // init: - // %add = add i32 %1, %0 - // %cmp = icmp eq i32 %add, 1 - // %ifexpr = select i1 %cmp, i32 %0, i32 0 - // ret i32 %ifexpr - // } - // - // attributes #0 = { norecurse nounwind readnone } - // ``` - // after O2 optimization + let f = Arc::new(WithCall::new(Box::new(|module| { + // the following IR is equivalent to + // ``` + // ; ModuleID = 'test.ll' + // source_filename = "test" + // + // ; Function Attrs: norecurse nounwind readnone + // define i32 @testing(i32 %0, i32 %1) local_unnamed_addr #0 { + // init: + // %add = add i32 %1, %0 + // %cmp = icmp eq i32 %add, 1 + // %ifexpr = select i1 %cmp, i32 %0, i32 0 + // ret i32 %ifexpr + // } + // + // attributes #0 = { norecurse nounwind readnone } + // ``` + // after O2 optimization - let expected = indoc! {" - ; ModuleID = 'test' - source_filename = \"test\" + let expected = indoc! {" + ; ModuleID = 'test' + source_filename = \"test\" - define i32 @testing(i32 %0, i32 %1) { - init: - %a = alloca i32 - store i32 %0, i32* %a - %b = alloca i32 - store i32 %1, i32* %b - %tmp = alloca i32 - %tmp4 = alloca i32 - br label %body + define i32 @testing(i32 %0, i32 %1) { + init: + %a = alloca i32 + store i32 %0, i32* %a + %b = alloca i32 + store i32 %1, i32* %b + %tmp = alloca i32 + %tmp4 = alloca i32 + br label %body - body: ; preds = %init - %load = load i32, i32* %a - %load1 = load i32, i32* %b - %add = add i32 %load, %load1 - store i32 %add, i32* %tmp - %load2 = load i32, i32* %tmp - %cmp = icmp eq i32 %load2, 1 - br i1 %cmp, label %then, label %else + body: ; preds = %init + %load = load i32, i32* %a + %load1 = load i32, i32* %b + %add = add i32 %load, %load1 + store i32 %add, i32* %tmp + %load2 = load i32, i32* %tmp + %cmp = icmp eq i32 %load2, 1 + br i1 %cmp, label %then, label %else - then: ; preds = %body - %load3 = load i32, i32* %a - br label %cont + then: ; preds = %body + %load3 = load i32, i32* %a + br label %cont - else: ; preds = %body - br label %cont + else: ; preds = %body + br label %cont - cont: ; preds = %else, %then - %ifexpr = phi i32 [ %load3, %then ], [ 0, %else ] - store i32 %ifexpr, i32* %tmp4 - %load5 = load i32, i32* %tmp4 - ret i32 %load5 - } - "} - .trim(); - let ir = module.1.print_to_string().to_string(); - println!("src:\n{}", source); - println!("IR:\n{}", ir); - assert_eq!(expected, ir.trim()); + cont: ; preds = %else, %then + %ifexpr = phi i32 [ %load3, %then ], [ 0, %else ] + store i32 %ifexpr, i32* %tmp4 + %load5 = load i32, i32* %tmp4 + ret i32 %load5 + } + "} + .trim(); + assert_eq!(expected, module.print_to_string().to_str().unwrap().trim()); + }))); + let registry = WorkerRegistry::create_workers(&threads, top_level, f); + registry.add_task(task); + registry.wait_tasks_complete(); } diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index 98cbacb9..3fb3a045 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -4,7 +4,6 @@ use std::{collections::HashMap, sync::Arc}; use super::typecheck::type_inferencer::PrimitiveStore; use super::typecheck::typedef::{SharedUnifier, Type, TypeEnum, Unifier}; use crate::symbol_resolver::SymbolResolver; -use inkwell::context::Context; use parking_lot::{Mutex, RwLock}; use rustpython_parser::ast::{self, Stmt}; @@ -54,17 +53,16 @@ pub enum TopLevelDef { pub struct TopLevelContext { pub definitions: Arc>>>, pub unifiers: Arc>>, - pub conetexts: Arc>>>, } -// like adding some info on top of the TopLevelDef for +// like adding some info on top of the TopLevelDef for // later parsing the class bases, method, and function sigatures pub struct TopLevelDefInfo { // the definition entry def: TopLevelDef, // the entry in the top_level unifier ty: Type, - // the ast submitted by applications, primitives and + // the ast submitted by applications, primitives and // class methods will have None value here ast: Option>, } @@ -118,7 +116,7 @@ impl TopLevelComposer { (primitives, unifier) } - /// return a composer and things to make a "primitive" symbol resolver, so that the symbol + /// return a composer and things to make a "primitive" symbol resolver, so that the symbol /// resolver can later figure out primitive type definitions when passed a primitive type name pub fn new() -> (Vec<(String, DefinitionId, Type)>, Self) { let primitives = Self::make_primitives(); @@ -150,7 +148,7 @@ impl TopLevelComposer { ty: primitives.0.none, }, ]; - let composer = TopLevelComposer { + let composer = TopLevelComposer { definition_list: definition_list.into(), primitives: primitives.0, unifier: primitives.1, @@ -219,7 +217,7 @@ impl TopLevelComposer { ast: None, ty, }); - + // parse class def body and register class methods into the def list // module's symbol resolver would not know the name of the class methods, // thus cannot return their definition_id? so we have to manage it ourselves @@ -228,7 +226,7 @@ impl TopLevelComposer { if let ast::StmtKind::FunctionDef { name, .. } = &b.node { let fun_name = Self::name_mangling(class_name.clone(), name); let def_id = def_list.len(); - + // add to unifier let ty = self.unifier.add_ty(TypeEnum::TFunc( crate::typecheck::typedef::FunSignature { @@ -266,21 +264,21 @@ impl TopLevelComposer { // move the ast to the entry of the class in the def_list def_list.get_mut(class_def_id).unwrap().ast = Some(ast); - + // return Ok((class_name, DefinitionId(class_def_id), ty)) }, ast::StmtKind::FunctionDef { name, .. } => { let fun_name = name.to_string(); - + // add to the unifier let ty = self.unifier.add_ty(TypeEnum::TFunc(crate::typecheck::typedef::FunSignature { args: Default::default(), ret: self.primitives.none, vars: Default::default(), })); - + // add to the definition list let mut def_list = self.definition_list.write(); def_list.push(TopLevelDefInfo { @@ -333,7 +331,7 @@ impl TopLevelComposer { let (params, fields ) = if let TypeEnum::TObj { - // FIXME: this params is immutable, and what + // FIXME: this params is immutable, and what // should the key be, get the original typevar's var_id? params, fields, @@ -346,7 +344,7 @@ impl TopLevelComposer { // into the `bases` ast node for b in bases { match &b.node { - // typevars bounded to the class, only support things like `class A(Generic[T, V])`, + // typevars bounded to the class, only support things like `class A(Generic[T, V])`, // things like `class A(Generic[T, V, ImportedModule.T])` is not supported // i.e. only simple names are allowed in the subscript // should update the TopLevelDef::Class.typevars and the TypeEnum::TObj.params @@ -401,7 +399,7 @@ impl TopLevelComposer { ast::ExprKind::Subscript {value, slice, ..} => { unimplemented!() }, */ - + // base class is possible in other cases, we parse for thr base class _ => return Err("not supported".into()) } diff --git a/nac3core/src/typecheck/type_inferencer/test.rs b/nac3core/src/typecheck/type_inferencer/test.rs index cebb0e89..9ec63676 100644 --- a/nac3core/src/typecheck/type_inferencer/test.rs +++ b/nac3core/src/typecheck/type_inferencer/test.rs @@ -100,7 +100,7 @@ impl TestEnvironment { top_level: TopLevelContext { definitions: Default::default(), unifiers: Default::default(), - conetexts: Default::default(), + // conetexts: Default::default(), }, unifier, function_data: FunctionData { @@ -259,7 +259,7 @@ impl TestEnvironment { let top_level = TopLevelContext { definitions: Arc::new(RwLock::new(top_level_defs)), unifiers: Default::default(), - conetexts: Default::default(), + // conetexts: Default::default(), }; let resolver = Arc::new(Resolver { From d30918bea011908b5e249a6e5927d83a7c1ce62e Mon Sep 17 00:00:00 2001 From: pca006132 Date: Fri, 13 Aug 2021 16:20:14 +0800 Subject: [PATCH 113/131] worker thread panic handling --- nac3core/src/codegen/mod.rs | 45 +++++++++++++++++++++++++++++------- nac3core/src/codegen/test.rs | 4 ++-- 2 files changed, 39 insertions(+), 10 deletions(-) diff --git a/nac3core/src/codegen/mod.rs b/nac3core/src/codegen/mod.rs index e25b6fe1..4df3ee99 100644 --- a/nac3core/src/codegen/mod.rs +++ b/nac3core/src/codegen/mod.rs @@ -20,7 +20,10 @@ use itertools::Itertools; use parking_lot::{Condvar, Mutex}; use rustpython_parser::ast::Stmt; use std::collections::HashMap; -use std::sync::Arc; +use std::sync::{ + atomic::{AtomicBool, Ordering}, + Arc, +}; use std::thread; mod expr; @@ -57,7 +60,7 @@ impl WithCall { WithCall { fp } } - pub fn run<'ctx>(&self, m: &Module<'ctx>) { + pub fn run<'ctx>(&self, m: &Module<'ctx>) { (self.fp)(m) } } @@ -65,6 +68,7 @@ impl WithCall { pub struct WorkerRegistry { sender: Arc>>, receiver: Arc>>, + panicked: AtomicBool, task_count: Mutex, thread_count: usize, wait_condvar: Condvar, @@ -75,7 +79,7 @@ impl WorkerRegistry { names: &[&str], top_level_ctx: Arc, f: Arc, - ) -> Arc { + ) -> (Arc, Vec>) { let (sender, receiver) = unbounded(); let task_count = Mutex::new(0); let wait_condvar = Condvar::new(); @@ -84,26 +88,44 @@ impl WorkerRegistry { sender: Arc::new(sender), receiver: Arc::new(receiver), thread_count: names.len(), + panicked: AtomicBool::new(false), task_count, wait_condvar, }); + let mut handles = Vec::new(); for name in names.iter() { let top_level_ctx = top_level_ctx.clone(); let registry = registry.clone(); + let registry2 = registry.clone(); let name = name.to_string(); let f = f.clone(); - thread::spawn(move || { + let handle = thread::spawn(move || { registry.worker_thread(name, top_level_ctx, f); }); + let handle = thread::spawn(move || { + if let Err(e) = handle.join() { + if let Some(e) = e.downcast_ref::<&'static str>() { + eprintln!("Got an error: {}", e); + } else { + eprintln!("Got an unknown error: {:?}", e); + } + registry2.panicked.store(true, Ordering::SeqCst); + registry2.wait_condvar.notify_all(); + } + }); + handles.push(handle); } - registry + (registry, handles) } - pub fn wait_tasks_complete(&self) { + pub fn wait_tasks_complete(&self, handles: Vec>) { { let mut count = self.task_count.lock(); while *count != 0 { + if self.panicked.load(Ordering::SeqCst) { + break; + } self.wait_condvar.wait(&mut count); } } @@ -113,9 +135,18 @@ impl WorkerRegistry { { let mut count = self.task_count.lock(); while *count != self.thread_count { + if self.panicked.load(Ordering::SeqCst) { + break; + } self.wait_condvar.wait(&mut count); } } + for handle in handles { + handle.join().unwrap(); + } + if self.panicked.load(Ordering::SeqCst) { + panic!("tasks panicked"); + } } pub fn add_task(&self, task: CodeGenTask) { @@ -137,8 +168,6 @@ impl WorkerRegistry { let result = gen_func(&context, builder, module, task, top_level_ctx.clone()); builder = result.0; module = result.1; - - println!("{}", *self.task_count.lock()); *self.task_count.lock() -= 1; self.wait_condvar.notify_all(); } diff --git a/nac3core/src/codegen/test.rs b/nac3core/src/codegen/test.rs index 54982897..e56c7b45 100644 --- a/nac3core/src/codegen/test.rs +++ b/nac3core/src/codegen/test.rs @@ -237,7 +237,7 @@ fn test_primitives() { .trim(); assert_eq!(expected, module.print_to_string().to_str().unwrap().trim()); }))); - let registry = WorkerRegistry::create_workers(&threads, top_level, f); + let (registry, handles) = WorkerRegistry::create_workers(&threads, top_level, f); registry.add_task(task); - registry.wait_tasks_complete(); + registry.wait_tasks_complete(handles); } From d3ad894521e90e2a4f14e92c2c7241c303d6cefe Mon Sep 17 00:00:00 2001 From: pca006132 Date: Fri, 13 Aug 2021 16:30:33 +0800 Subject: [PATCH 114/131] removed code comment --- nac3core/src/typecheck/type_inferencer/test.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/nac3core/src/typecheck/type_inferencer/test.rs b/nac3core/src/typecheck/type_inferencer/test.rs index 9ec63676..e85269ea 100644 --- a/nac3core/src/typecheck/type_inferencer/test.rs +++ b/nac3core/src/typecheck/type_inferencer/test.rs @@ -100,7 +100,6 @@ impl TestEnvironment { top_level: TopLevelContext { definitions: Default::default(), unifiers: Default::default(), - // conetexts: Default::default(), }, unifier, function_data: FunctionData { @@ -259,7 +258,6 @@ impl TestEnvironment { let top_level = TopLevelContext { definitions: Arc::new(RwLock::new(top_level_defs)), unifiers: Default::default(), - // conetexts: Default::default(), }; let resolver = Arc::new(Resolver { From d8c3c063ecb5cfc17200bdf7c9782f178bce793e Mon Sep 17 00:00:00 2001 From: ychenfo Date: Mon, 16 Aug 2021 09:46:55 +0800 Subject: [PATCH 115/131] split top level handling in several functions --- nac3core/src/top_level.rs | 574 +++++++++++++++++--------------------- 1 file changed, 251 insertions(+), 323 deletions(-) diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index 5143f997..ed1b24f4 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -60,8 +60,6 @@ pub struct TopLevelContext { pub struct TopLevelComposer { // list of top level definitions, same as top level context pub definition_list: Arc>>>, - // list of top level Type, the index is same as the field `definition_list` - pub ty_list: RwLock>, // list of top level ast, the index is same as the field `definition_list` and `ty_list` pub ast_list: RwLock>>>, // start as a primitive unifier, will add more top_level defs inside @@ -70,6 +68,8 @@ pub struct TopLevelComposer { pub primitives: PrimitiveStore, // mangled class method name to def_id pub class_method_to_def_id: RwLock>, + // record the def id of the classes whoses fields and methods are to be analyzed + pub to_be_analyzed_class: RwLock>, } impl TopLevelComposer { @@ -133,21 +133,13 @@ impl TopLevelComposer { let ast_list: Vec>> = vec![None, None, None, None, None]; - let ty_list: Vec = vec![ - primitives.0.int32, - primitives.0.int64, - primitives.0.float, - primitives.0.bool, - primitives.0.none, - ]; - let composer = TopLevelComposer { definition_list: RwLock::new(top_level_def_list).into(), - ty_list: RwLock::new(ty_list), ast_list: RwLock::new(ast_list), primitives: primitives.0, unifier: primitives.1.into(), class_method_to_def_id: Default::default(), + to_be_analyzed_class: Default::default(), }; ( vec![ @@ -190,17 +182,20 @@ impl TopLevelComposer { } } + /// step 0, register, just remeber the names of top level classes/function pub fn register_top_level( &mut self, ast: ast::Stmt<()>, resolver: Option>>, - ) -> Result<(String, DefinitionId, Type), String> { - // get write access to the lists - let (mut def_list, mut ty_list, mut ast_list) = - (self.definition_list.write(), self.ty_list.write(), self.ast_list.write()); - - // will be deleted after tested - assert_eq!(ty_list.len(), def_list.len()); + ) -> Result<(String, DefinitionId), String> { + let ( + mut def_list, + mut ast_list + ) = ( + self.definition_list.write(), + self.ast_list.write() + ); + assert_eq!(def_list.len(), ast_list.len()); match &ast.node { @@ -208,25 +203,17 @@ impl TopLevelComposer { let class_name = name.to_string(); let class_def_id = def_list.len(); - // add the class to the unifier - let ty = self.unifier.write().add_ty(TypeEnum::TObj { - obj_id: DefinitionId(class_def_id), - fields: Default::default(), - params: Default::default(), - }); - // add the class to the definition lists def_list .push(Self::make_top_level_class_def(class_def_id, resolver.clone()).into()); - ty_list.push(ty); // since later when registering class method, ast will still be used, - // here push None temporarly, later will push the ast + // here push None temporarly, later will move the ast inside ast_list.push(None); // parse class def body and register class methods into the def list. // module's symbol resolver would not know the name of the class methods, // thus cannot return their definition_id? so we have to manage it ourselves - // by using the field `class_method_to_def_id` + // by using `class_method_to_def_id` for b in body { if let ast::StmtKind::FunctionDef { name, .. } = &b.node { let fun_name = Self::name_mangling(class_name.clone(), name); @@ -248,356 +235,297 @@ impl TopLevelComposer { ) .into(), ); - ty_list.push(ty); // the ast of class method is in the class, push None in to the list here ast_list.push(None); // class method, do not let the symbol manager manage it, use our own map self.class_method_to_def_id.write().insert(fun_name, DefinitionId(def_id)); - - // if it is the contructor, special handling is needed. In the above - // handling, we still add __init__ function to the class method - if name == "__init__" { - // NOTE: how can this later be fetched? - def_list.push( - TopLevelDef::Initializer { class_id: DefinitionId(class_def_id) } - .into(), - ); - // arbitarily push one to make sure the index is correct - ty_list.push(self.primitives.none); - ast_list.push(None); - } } } // move the ast to the entry of the class in the ast_list ast_list[class_def_id] = Some(ast); - // return - Ok((class_name, DefinitionId(class_def_id), ty)) + // put the constructor into the def_list + def_list.push( + TopLevelDef::Initializer { class_id: DefinitionId(class_def_id) } + .into(), + ); + ast_list.push(None); + + // class, put its def_id into the to be analyzed set + let mut to_be_analyzed = self.to_be_analyzed_class.write(); + to_be_analyzed.push(DefinitionId(class_def_id)); + + + Ok((class_name, DefinitionId(class_def_id))) } ast::StmtKind::FunctionDef { name, .. } => { let fun_name = name.to_string(); - // add to the unifier - let ty = self.unifier.write().add_ty(TypeEnum::TFunc(FunSignature { - args: Default::default(), - ret: self.primitives.none, - vars: Default::default(), - })); - // add to the definition list def_list.push( Self::make_top_level_function_def(name.into(), self.primitives.none, resolver) .into(), ); - ty_list.push(ty); ast_list.push(Some(ast)); // return - Ok((fun_name, DefinitionId(def_list.len() - 1), ty)) + Ok((fun_name, DefinitionId(def_list.len() - 1))) } _ => Err("only registrations of top level classes/functions are supprted".into()), } } - pub fn analyze_top_level_class_type_var(&mut self) -> Result<(), String> { + /// step 1, analyze the type vars associated with top level class + fn analyze_top_level_class_type_var(&mut self) -> Result<(), String> { let mut def_list = self.definition_list.write(); - let ty_list = self.ty_list.read(); let ast_list = self.ast_list.read(); let mut unifier = self.unifier.write(); - for (def, ty, ast) in def_list + for (class_def, class_ast) in def_list .iter_mut() - .zip(ty_list.iter()) .zip(ast_list.iter()) - .map(|((x, y), z)| (x, y, z)) - .collect::, &Type, &Option>)>>() - { - unimplemented!() - } - unimplemented!() - } - - /// this should be called after all top level classes are registered, and - /// will actually fill in those fields of the previous dummy one - pub fn analyze_top_level(&mut self) -> Result<(), String> { - let mut def_list = self.definition_list.write(); - let ty_list = self.ty_list.read(); - let ast_list = self.ast_list.read(); - let mut unifier = self.unifier.write(); - - for (def, ty, ast) in def_list - .iter_mut() - .zip(ty_list.iter()) - .zip(ast_list.iter()) - .map(|((x, y), z)| (x, y, z)) - .collect::, &Type, &Option>)>>() - { - // only analyze those entries with ast, and class_method(whose ast in class def) - match ast { - Some(ast::Located{node: ast::StmtKind::ClassDef { - bases, - body, - name: class_name, + .collect::, &Option>)>>() { + // only deal with class def here + let ( + class_bases, + class_def_type_vars, + class_resolver + ) = { + if let TopLevelDef::Class { + type_vars, + resolver, .. - }, .. }) => { - // get the mutable reference of the entry in the - // definition list, get the `TopLevelDef` - let ( - def_ancestors, - def_fields, - def_methods, - def_type_vars, - resolver, - ) = if let TopLevelDef::Class { - object_id: _, - ancestors, - fields, - methods, - type_vars, - resolver: Some(resolver) - } = def.get_mut() { - (ancestors, fields, methods, type_vars, resolver.lock()) - } else { unreachable!() }; - - // try to get mutable reference of the entry in the - // unification table, get the `TypeEnum` - let type_enum = unifier.get_ty(*ty); - let ( - enum_params, - enum_fields - ) = if let TypeEnum::TObj { - params, - fields, + } = class_def.get_mut() { + if let Some(ast::Located {node: ast::StmtKind::ClassDef { + bases, .. - } = type_enum.borrow() { - (params, fields) - } else { unreachable!() }; - - // ancestors and typevars associate with the class are analyzed by looking - // into the `bases` ast node - // `Generic` should only occur once, use this flag - let mut generic_occured = false; - // TODO: haven't check this yet - let mut occured_type_var: HashSet = Default::default(); - // TODO: haven't check this yet - let mut occured_base: HashSet = Default::default(); - for b in bases { - match &b.node { - // analyze typevars bounded to the class, - // only support things like `class A(Generic[T, V])`, - // things like `class A(Generic[T, V, ImportedModule.T])` is not supported - // i.e. only simple names are allowed in the subscript - // should update the TopLevelDef::Class.typevars and the TypeEnum::TObj.params - ast::ExprKind::Subscript {value, slice, ..} if { - // can only be `Generic[...]` and this can only appear once - if let ast::ExprKind::Name { id, .. } = &value.node { - if id == "Generic" { - if !generic_occured { - generic_occured = true; - true - } else { - return Err("Only single Generic[...] or Protocol[...] can be in bases".into()) - } - } else { false } - } else { false } - } => { - match &slice.node { - // `class Foo(Generic[T, V, P]):` multiple element inside the subscript - ast::ExprKind::Tuple {elts, ..} => { - let tys = elts - .iter() - // here parse_type_annotation should be fine, - // since we only expect type vars, which is not relevant - // to the top-level parsing - .map(|x| resolver.parse_type_annotation( - &self.to_top_level_context(), - unifier.borrow_mut(), - &self.primitives, - x)) - .collect::, _>>()?; - - let ty_var_ids = tys - .iter() - .map(|t| { - let tmp = unifier.get_ty(*t); - // make sure it is type var - if let TypeEnum::TVar {id, ..} = tmp.as_ref() { - Ok(*id) - } else { - Err("Expect type variabls here".to_string()) - } - }) - .collect::, _>>()?; - - // write to TypeEnum - for (id, ty) in ty_var_ids.iter().zip(tys.iter()) { - enum_params.borrow_mut().insert(*id, *ty); - } - - // write to TopLevelDef - for ty in tys{ - def_type_vars.push(ty) - } - }, - - // `class Foo(Generic[T]):`, only single element - _ => { - let ty = resolver.parse_type_annotation( - &self.to_top_level_context(), - unifier.borrow_mut(), - &self.primitives, - &slice - )?; - - let ty_var_id = if let TypeEnum::TVar { id, .. } = unifier - .get_ty(ty) - .as_ref() { *id } else { - return Err("Expect type variabls here".to_string()) - }; - - // write to TypeEnum - enum_params.borrow_mut().insert(ty_var_id, ty); - - // write to TopLevelDef - def_type_vars.push(ty); - }, - }; - } - - // analyze base classes, which is possible in - // other cases, we parse for the base class - // FIXME: calling parse_type_annotation here might cause some problem - // when the base class is parametrized `BaseClass[int, bool]`, since the - // analysis of type var of some class is not done yet. - // we can first only look at the name, and later check the - // parameter when others are done - // Or - // first get all the class' type var analyzed, and then - // analyze the base class - _ => { - let ty = resolver.parse_type_annotation( - &self.to_top_level_context(), - unifier.borrow_mut(), - &self.primitives, - b - )?; - - let obj_def_id = if let TypeEnum::TObj { obj_id, .. } = unifier - .get_ty(ty) - .as_ref() { - *obj_id - } else { - return Err("Expect concrete classes/types here".into()) - }; - - // write to TopLevelDef - def_ancestors.push(obj_def_id); - } - } - } - - // class method and field are analyzed by - // looking into the class body ast node - // NOTE: should consider parents' method and fields(check re-def and add), - // but we do it later we go over these again after we finish analyze the - // fields/methods as declared in the ast - // method with same name should not occur twice, so use this - let defined_method: HashSet = Default::default(); - for stmt in body { - if let ast::StmtKind::FunctionDef { - name: func_name, - args, - body, - returns, - .. - } = &stmt.node { - // build type enum, need FunSignature {args, vars, ret} - // args. Now only args with no default TODO: other kinds of args - let func_args = args.args + }, .. }) = class_ast { + (bases, type_vars, resolver) + } else { unreachable!("must be both class") } + } else { continue } + }; + + let mut generic_occured = false; + for b in class_bases { + match &b.node { + // analyze typevars bounded to the class, + // only support things like `class A(Generic[T, V])`, + // things like `class A(Generic[T, V, ImportedModule.T])` is not supported + // i.e. only simple names are allowed in the subscript + // should update the TopLevelDef::Class.typevars and the TypeEnum::TObj.params + ast::ExprKind::Subscript {value, slice, ..} if { + // can only be `Generic[...]` and this can only appear once + if let ast::ExprKind::Name { id, .. } = &value.node { + if id == "Generic" { + if !generic_occured { + generic_occured = true; + true + } else { + return Err("Only single Generic[...] can be in bases".into()) + } + } else { false } + } else { false } + } => { + // if `class A(Generic[T, V, G])` + if let ast::ExprKind::Tuple { elts, .. } = &slice.node { + // parse the type vars + let type_vars = elts .iter() - .map(|x| -> Result { - Ok(FuncArg { - name: x.node.arg.clone(), - ty: resolver.parse_type_annotation( + .map(|e| + class_resolver + .as_ref() + .unwrap() + .lock() + .parse_type_annotation( &self.to_top_level_context(), unifier.borrow_mut(), &self.primitives, - x - .node - .annotation - .as_ref() - .ok_or_else(|| "type annotations required for function parameters".to_string())? - )?, - default_value: None - }) - }) - .collect::, _>>()?; - // vars. find TypeVars used in the argument type annotation - let func_vars = func_args + e) + ) + .collect::, _>>()?; + + // check if all are unique type vars + let mut occured_type_var_id: HashSet = HashSet::new(); + let all_unique_type_var = type_vars .iter() - .filter_map(|FuncArg { ty, .. } | { - if let TypeEnum::TVar { id, .. } = unifier.get_ty(*ty).as_ref() { - Some((*id, *ty)) - } else { None } - }) - .collect::>(); - // return type - let func_ret = resolver - .parse_type_annotation( + .all(|x| { + let ty = unifier.get_ty(*x); + if let TypeEnum::TVar {id, ..} = ty.as_ref() { + occured_type_var_id.insert(*id) + } else { false } + }); + + if !all_unique_type_var { return Err("expect unique type variables".into()) } + + // add to TopLevelDef + class_def_type_vars.extend(type_vars); + + // `class A(Generic[T])` + } else { + let ty = + class_resolver + .as_ref() + .unwrap() + .lock() + .parse_type_annotation( &self.to_top_level_context(), unifier.borrow_mut(), &self.primitives, - returns - .as_ref() - .ok_or_else(|| "return type annotations required here".to_string())? - .as_ref(), + &slice )?; - // build the TypeEnum - let func_type_sig = FunSignature { - args: func_args, - vars: func_vars, - ret: func_ret - }; - - // write to the TypeEnum and Def_list (by replacing the ty with the new Type created above) - let func_name_mangled = Self::name_mangling(class_name.clone(), func_name); - let def_id = self.class_method_to_def_id.read()[&func_name_mangled]; - unimplemented!(); - - - if func_name == "__init__" { - // special for constructor, need to look into the fields - // TODO: look into the function body and see - } - } else { - // do nothing. we do not care about things like this? - // class A: - // a = 3 - // b = [2, 3] + // check if it is type var + let is_type_var = matches!( + unifier.get_ty(ty).as_ref(), + &TypeEnum::TVar { .. } + ); + if !is_type_var { return Err("expect type variable here".into()) } + + // add to TopLevelDef + class_def_type_vars.push(ty); } } - }, + + // if others, do nothing in this function + _ => continue + } + } + + }; + Ok(()) + } - // top level function definition - Some(ast::Located{node: ast::StmtKind::FunctionDef { - name, - args, - body, - returns, + /// step 2, base classes. Need to separate step1 and step2 for this reason: + /// `class B(Generic[T, V]); + /// class A(B[int, bool])` + /// if the type var associated with class `B` has not been handled properly, + /// the parse of type annotation of `B[int, bool]` will fail + fn analyze_top_level_class_bases(&mut self) -> Result<(), String> { + let mut def_list = self.definition_list.write(); + let ast_list = self.ast_list.read(); + let mut unifier = self.unifier.write(); + + for (class_def, class_ast) in def_list + .iter_mut() + .zip(ast_list.iter()) + .collect::, &Option>)>>() { + let ( + class_bases, + class_ancestors, + class_resolver + ) = { + if let TopLevelDef::Class { + ancestors, + resolver, .. - }, .. }) => { - // TODO: - unimplemented!() + } = class_def.get_mut() { + if let Some(ast::Located {node: ast::StmtKind::ClassDef { + bases, + .. + }, .. }) = class_ast { + (bases, ancestors, resolver) + } else { unreachable!("must be both class") } + } else { continue } + }; + for b in class_bases { + // type vars have already been handled, so skip on `Generic[...]` + if let ast::ExprKind::Subscript {value, ..} = &b.node { + if let ast::ExprKind::Name {id, ..} = &value.node { + if id == "Generic" { continue } + } + } + // get the def id of the base class + let base_ty = class_resolver.as_ref().unwrap().lock().parse_type_annotation( + &self.to_top_level_context(), + unifier.borrow_mut(), + &self.primitives, + b + )?; + let base_id = + if let TypeEnum::TObj {obj_id, ..} = unifier.get_ty(base_ty).as_ref() { + *obj_id + } else { return Err("expect concrete class/type to be base class".into()) }; + + // write to the class ancestors + class_ancestors.push(base_id); + } + + }; + Ok(()) + } + + /// step 3, class_fields + fn analyze_top_level_class_fields_methods(&mut self) -> Result<(), String> { + let mut def_list = self.definition_list.write(); + let ast_list = self.ast_list.read(); + let mut unifier = self.unifier.write(); + let class_method_to_def_id = self.class_method_to_def_id.read(); + let mut to_be_analyzed_class = self.to_be_analyzed_class.write(); + + while !to_be_analyzed_class.is_empty() { + let ind = to_be_analyzed_class.remove(0).0; + + let (class_def, class_ast) = ( + &mut def_list[ind], &ast_list[ind] + ); + let ( + class_name, + class_fields, + class_methods, + class_resolver, + class_body + ) = { + if let TopLevelDef::Class { + resolver, + fields, + methods, + .. + } = class_def.get_mut() { + if let Some(ast::Located {node: ast::StmtKind::ClassDef { + name, + body, + .. + }, .. }) = class_ast { + (name, fields, methods, resolver, body) + } else { unreachable!("must be both class") } + } else { continue } + }; + for b in class_body { + if let ast::StmtKind::FunctionDef { + args: func_args, + body: func_body, + name: func_name, + returns: func_returns, + .. + } = &b.node { + // unwrap should not fail + let method_def_id = + class_method_to_def_id + .get(&Self::name_mangling( + class_name.into(), + func_name) + ).unwrap(); + + let a = &def_list[method_def_id.0]; + } else { + // what should we do with `class A: a = 3`? + continue } - // only expect class def and function def ast - _ => return Err("only expect function and class definitions to be submitted here to be analyzed".into()) } } Ok(()) + + } + + fn analyze_top_level_inheritance(&mut self) -> Result<(), String> { + unimplemented!() + } + + fn analyze_top_level_field_instantiation(&mut self) -> Result<(), String> { + unimplemented!() } } From 3734663188b0551831a12be868561fb90ba1ce26 Mon Sep 17 00:00:00 2001 From: ychenfo Date: Mon, 16 Aug 2021 13:49:10 +0800 Subject: [PATCH 116/131] add RefCell to FunSignature in TypeEnum --- nac3core/src/top_level.rs | 53 ++++++++++++------- nac3core/src/typecheck/magic_methods.rs | 8 +-- nac3core/src/typecheck/type_inferencer/mod.rs | 2 +- .../src/typecheck/type_inferencer/test.rs | 8 +-- nac3core/src/typecheck/typedef/mod.rs | 26 +++++---- nac3core/src/typecheck/typedef/test.rs | 2 +- 6 files changed, 59 insertions(+), 40 deletions(-) diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index ed1b24f4..afec33fe 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -1,6 +1,5 @@ -use std::borrow::{Borrow, BorrowMut}; -use std::collections::HashSet; -use std::{collections::HashMap, sync::Arc}; +use std::borrow::BorrowMut; +use std::{collections::HashMap, collections::HashSet, sync::Arc}; use super::typecheck::type_inferencer::PrimitiveStore; use super::typecheck::typedef::{SharedUnifier, Type, TypeEnum, Unifier}; @@ -52,6 +51,16 @@ pub enum TopLevelDef { }, } +impl TopLevelDef { + fn get_function_type(&self) -> Result { + if let Self::Function { signature, .. } = self { + Ok(*signature) + } else { + Err("only expect function def here".into()) + } + } +} + pub struct TopLevelContext { pub definitions: Arc>>>, pub unifiers: Arc>>, @@ -219,18 +228,15 @@ impl TopLevelComposer { let fun_name = Self::name_mangling(class_name.clone(), name); let def_id = def_list.len(); - // add to unifier - let ty = self.unifier.write().add_ty(TypeEnum::TFunc(FunSignature { - args: Default::default(), - ret: self.primitives.none, - vars: Default::default(), - })); - // add to the definition list def_list.push( Self::make_top_level_function_def( fun_name.clone(), - ty, + self.unifier.write().add_ty(TypeEnum::TFunc(FunSignature { + args: Default::default(), + ret: self.primitives.none.into(), + vars: Default::default(), + }.into())), resolver.clone(), ) .into(), @@ -309,7 +315,7 @@ impl TopLevelComposer { } else { continue } }; - let mut generic_occured = false; + let mut is_generic = false; for b in class_bases { match &b.node { // analyze typevars bounded to the class, @@ -321,8 +327,8 @@ impl TopLevelComposer { // can only be `Generic[...]` and this can only appear once if let ast::ExprKind::Name { id, .. } = &value.node { if id == "Generic" { - if !generic_occured { - generic_occured = true; + if !is_generic { + is_generic = true; true } else { return Err("Only single Generic[...] can be in bases".into()) @@ -467,10 +473,10 @@ impl TopLevelComposer { while !to_be_analyzed_class.is_empty() { let ind = to_be_analyzed_class.remove(0).0; - let (class_def, class_ast) = ( &mut def_list[ind], &ast_list[ind] ); + let ( class_name, class_fields, @@ -491,7 +497,10 @@ impl TopLevelComposer { }, .. }) = class_ast { (name, fields, methods, resolver, body) } else { unreachable!("must be both class") } - } else { continue } + } else { + to_be_analyzed_class.push(DefinitionId(ind)); + continue + } }; for b in class_body { if let ast::StmtKind::FunctionDef { @@ -508,13 +517,19 @@ impl TopLevelComposer { class_name.into(), func_name) ).unwrap(); - - let a = &def_list[method_def_id.0]; + let method_def = def_list[method_def_id.0].write(); + let method_ty = method_def.get_function_type()?; + let method_signature = unifier.get_ty(method_ty); + + if let TypeEnum::TFunc(sig) = method_signature.as_ref() { + let mut sig = &mut *sig.borrow_mut(); + } else { unreachable!() } + + } else { // what should we do with `class A: a = 3`? continue } - } } Ok(()) diff --git a/nac3core/src/typecheck/magic_methods.rs b/nac3core/src/typecheck/magic_methods.rs index 29615aa2..d7eb0dbe 100644 --- a/nac3core/src/typecheck/magic_methods.rs +++ b/nac3core/src/typecheck/magic_methods.rs @@ -84,7 +84,7 @@ pub fn impl_binop( ret: ret_ty, vars: HashMap::new(), args: vec![FuncArg { ty: other, default_value: None, name: "other".into() }], - })) + }.into())) }); fields.borrow_mut().insert(binop_assign_name(op).into(), { @@ -97,7 +97,7 @@ pub fn impl_binop( ret: ret_ty, vars: HashMap::new(), args: vec![FuncArg { ty: other, default_value: None, name: "other".into() }], - })) + }.into())) }); } } else { @@ -120,7 +120,7 @@ pub fn impl_unaryop( ret: ret_ty, vars: HashMap::new(), args: vec![], - })), + }.into())), ); } } else { @@ -143,7 +143,7 @@ pub fn impl_cmpop( ret: store.bool, vars: HashMap::new(), args: vec![FuncArg { ty: other_ty, default_value: None, name: "other".into() }], - })), + }.into())), ); } } else { diff --git a/nac3core/src/typecheck/type_inferencer/mod.rs b/nac3core/src/typecheck/type_inferencer/mod.rs index 304431f3..72c47f60 100644 --- a/nac3core/src/typecheck/type_inferencer/mod.rs +++ b/nac3core/src/typecheck/type_inferencer/mod.rs @@ -258,7 +258,7 @@ impl<'a> Inferencer<'a> { Ok(Located { location, node: ExprKind::Lambda { args: args.into(), body: body.into() }, - custom: Some(self.unifier.add_ty(TypeEnum::TFunc(fun))), + custom: Some(self.unifier.add_ty(TypeEnum::TFunc(fun.into()))), }) } diff --git a/nac3core/src/typecheck/type_inferencer/test.rs b/nac3core/src/typecheck/type_inferencer/test.rs index e85269ea..29eafdb4 100644 --- a/nac3core/src/typecheck/type_inferencer/test.rs +++ b/nac3core/src/typecheck/type_inferencer/test.rs @@ -180,14 +180,14 @@ impl TestEnvironment { args: vec![], ret: foo_ty, vars: [(id, v0)].iter().cloned().collect(), - })), + }.into())), ); let fun = unifier.add_ty(TypeEnum::TFunc(FunSignature { args: vec![], ret: int32, vars: Default::default(), - })); + }.into())); let bar = unifier.add_ty(TypeEnum::TObj { obj_id: DefinitionId(6), fields: [("a".into(), int32), ("b".into(), fun)] @@ -211,7 +211,7 @@ impl TestEnvironment { args: vec![], ret: bar, vars: Default::default(), - })), + }.into())), ); let bar2 = unifier.add_ty(TypeEnum::TObj { @@ -237,7 +237,7 @@ impl TestEnvironment { args: vec![], ret: bar2, vars: Default::default(), - })), + }.into())), ); let class_names = [("Bar".into(), bar), ("Bar2".into(), bar2)].iter().cloned().collect(); diff --git a/nac3core/src/typecheck/typedef/mod.rs b/nac3core/src/typecheck/typedef/mod.rs index 6f0c34d7..05b2336b 100644 --- a/nac3core/src/typecheck/typedef/mod.rs +++ b/nac3core/src/typecheck/typedef/mod.rs @@ -1,5 +1,5 @@ use itertools::{chain, zip, Itertools}; -use std::borrow::Cow; +use std::borrow::{Borrow, Cow}; use std::cell::RefCell; use std::collections::HashMap; use std::iter::once; @@ -77,7 +77,7 @@ pub enum TypeEnum { ty: Type, }, TCall(RefCell>), - TFunc(FunSignature), + TFunc(RefCell), } impl TypeEnum { @@ -472,7 +472,7 @@ impl Unifier { } (TCall(calls), TFunc(signature)) => { self.occur_check(a, b)?; - let required: Vec = signature + let required: Vec = signature.borrow() .args .iter() .filter(|v| v.default_value.is_none()) @@ -482,7 +482,7 @@ impl Unifier { // we unify every calls to the function signature. for c in calls.borrow().iter() { let Call { posargs, kwargs, ret, fun } = &*self.calls[c.0].clone(); - let instantiated = self.instantiate_fun(b, signature); + let instantiated = self.instantiate_fun(b, &*signature.borrow()); let r = self.get_ty(instantiated); let r = r.as_ref(); let signature; @@ -495,9 +495,9 @@ impl Unifier { // arguments) are provided, and do not provide the same argument twice. let mut required = required.clone(); let mut all_names: Vec<_> = - signature.args.iter().map(|v| (v.name.clone(), v.ty)).rev().collect(); + signature.borrow().args.iter().map(|v| (v.name.clone(), v.ty)).rev().collect(); for (i, t) in posargs.iter().enumerate() { - if signature.args.len() <= i { + if signature.borrow().args.len() <= i { return Err("Too many arguments.".to_string()); } if !required.is_empty() { @@ -518,12 +518,13 @@ impl Unifier { if !required.is_empty() { return Err("Expected more arguments".to_string()); } - self.unify(*ret, signature.ret)?; + self.unify(*ret, signature.borrow().ret)?; *fun.borrow_mut() = Some(instantiated); } self.set_a_to_b(a, b); } (TFunc(sign1), TFunc(sign2)) => { + let (sign1, sign2) = (&*sign1.borrow(), &*sign2.borrow()); if !sign1.vars.is_empty() || !sign2.vars.is_empty() { return Err("Polymorphic function pointer is prohibited.".to_string()); } @@ -604,13 +605,14 @@ impl Unifier { TypeEnum::TCall { .. } => "call".to_owned(), TypeEnum::TFunc(signature) => { let params = signature + .borrow() .args .iter() .map(|arg| { format!("{}={}", arg.name, self.stringify(arg.ty, obj_to_name, var_to_name)) }) .join(", "); - let ret = self.stringify(signature.ret, obj_to_name, var_to_name); + let ret = self.stringify(signature.borrow().ret, obj_to_name, var_to_name); format!("fn[[{}], {}]", params, ret) } } @@ -723,7 +725,8 @@ impl Unifier { None } } - TypeEnum::TFunc(FunSignature { args, ret, vars: params }) => { + TypeEnum::TFunc(sig) => { + let FunSignature { args, ret, vars: params } = &*sig.borrow(); let new_params = self.subst_map(params, mapping); let new_ret = self.subst(*ret, mapping); let mut new_args = Cow::from(args); @@ -738,7 +741,7 @@ impl Unifier { let params = new_params.unwrap_or_else(|| params.clone()); let ret = new_ret.unwrap_or_else(|| *ret); let args = new_args.into_owned(); - Some(self.add_ty(TypeEnum::TFunc(FunSignature { args, ret, vars: params }))) + Some(self.add_ty(TypeEnum::TFunc(FunSignature { args, ret, vars: params }.into()))) } else { None } @@ -809,7 +812,8 @@ impl Unifier { self.occur_check(a, *t)?; } } - TypeEnum::TFunc(FunSignature { args, ret, vars: params }) => { + TypeEnum::TFunc(sig) => { + let FunSignature { args, ret, vars: params } = &*sig.borrow(); for t in chain!(args.iter().map(|v| &v.ty), params.values(), once(ret)) { self.occur_check(a, *t)?; } diff --git a/nac3core/src/typecheck/typedef/test.rs b/nac3core/src/typecheck/typedef/test.rs index d782c14f..3aaa7891 100644 --- a/nac3core/src/typecheck/typedef/test.rs +++ b/nac3core/src/typecheck/typedef/test.rs @@ -333,7 +333,7 @@ fn test_virtual() { args: vec![], ret: int, vars: HashMap::new(), - })); + }.into())); let bar = env.unifier.add_ty(TypeEnum::TObj { obj_id: DefinitionId(5), fields: [("f".to_string(), fun), ("a".to_string(), int)] From eb814dd8c3b5e013674c372544d890d156d69774 Mon Sep 17 00:00:00 2001 From: ychenfo Date: Mon, 16 Aug 2021 13:57:21 +0800 Subject: [PATCH 117/131] clean unused use --- nac3core/src/typecheck/typedef/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nac3core/src/typecheck/typedef/mod.rs b/nac3core/src/typecheck/typedef/mod.rs index 05b2336b..66f66354 100644 --- a/nac3core/src/typecheck/typedef/mod.rs +++ b/nac3core/src/typecheck/typedef/mod.rs @@ -1,5 +1,5 @@ use itertools::{chain, zip, Itertools}; -use std::borrow::{Borrow, Cow}; +use std::borrow::Cow; use std::cell::RefCell; use std::collections::HashMap; use std::iter::once; From 79ce13722a6dd7575fbe4e1c6811d3398360bfc7 Mon Sep 17 00:00:00 2001 From: ychenfo Date: Mon, 16 Aug 2021 17:40:12 +0800 Subject: [PATCH 118/131] partially parsed class methods nad fields --- nac3core/src/top_level.rs | 179 ++++++++++++++++++++++++++++---------- 1 file changed, 134 insertions(+), 45 deletions(-) diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index afec33fe..ab90730b 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -5,6 +5,7 @@ use super::typecheck::type_inferencer::PrimitiveStore; use super::typecheck::typedef::{SharedUnifier, Type, TypeEnum, Unifier}; use crate::symbol_resolver::SymbolResolver; use crate::typecheck::typedef::{FunSignature, FuncArg}; +use itertools::chain; use parking_lot::{Mutex, RwLock}; use rustpython_parser::ast::{self, Stmt}; @@ -163,6 +164,7 @@ impl TopLevelComposer { } /// already include the definition_id of itself inside the ancestors vector + /// when first regitering, the type_vars, fields, methods, ancestors are invalid pub fn make_top_level_class_def( index: usize, resolver: Option>>, @@ -177,6 +179,7 @@ impl TopLevelComposer { } } + /// when first registering, the type is a invalid value pub fn make_top_level_function_def( name: String, ty: Type, @@ -463,7 +466,7 @@ impl TopLevelComposer { Ok(()) } - /// step 3, class_fields + /// step 3, class fields and methods fn analyze_top_level_class_fields_methods(&mut self) -> Result<(), String> { let mut def_list = self.definition_list.write(); let ast_list = self.ast_list.read(); @@ -472,68 +475,154 @@ impl TopLevelComposer { let mut to_be_analyzed_class = self.to_be_analyzed_class.write(); while !to_be_analyzed_class.is_empty() { - let ind = to_be_analyzed_class.remove(0).0; - let (class_def, class_ast) = ( - &mut def_list[ind], &ast_list[ind] - ); - - let ( - class_name, - class_fields, - class_methods, - class_resolver, - class_body - ) = { - if let TopLevelDef::Class { - resolver, - fields, - methods, - .. - } = class_def.get_mut() { - if let Some(ast::Located {node: ast::StmtKind::ClassDef { - name, - body, + let class_ind = to_be_analyzed_class.remove(0).0; + let (class_name, class_body) = { + let class_ast = &ast_list[class_ind]; + if let Some( + ast::Located { node: + ast::StmtKind::ClassDef { + name, + body, + .. + }, .. - }, .. }) = class_ast { - (name, fields, methods, resolver, body) - } else { unreachable!("must be both class") } - } else { - to_be_analyzed_class.push(DefinitionId(ind)); - continue - } + } + ) = class_ast { + (name, body) + } else { unreachable!("should be class def ast") } }; + + let class_methods_parsing_result: Vec<(String, Type, DefinitionId)> = Default::default(); + let class_fields_parsing_result: Vec<(String, Type)> = Default::default(); for b in class_body { if let ast::StmtKind::FunctionDef { - args: func_args, - body: func_body, - name: func_name, - returns: func_returns, + args: method_args_ast, + body: method_body_ast, + name: method_name, + returns: method_returns_ast, .. } = &b.node { - // unwrap should not fail - let method_def_id = - class_method_to_def_id + let (class_def, method_def) = { + // unwrap should not fail + let method_ind = class_method_to_def_id .get(&Self::name_mangling( class_name.into(), - func_name) - ).unwrap(); - let method_def = def_list[method_def_id.0].write(); - let method_ty = method_def.get_function_type()?; - let method_signature = unifier.get_ty(method_ty); + method_name) + ).unwrap().0; + + // split the def_list to two parts to get the + // mutable reference to both the method and the class + assert_ne!(method_ind, class_ind); + let min_ind = (if method_ind > class_ind { class_ind } else { method_ind }) + 1; + let (head_slice, + tail_slice + ) = def_list.split_at_mut(min_ind); + let (new_method_ind, new_class_ind) = ( + if method_ind >= min_ind { method_ind - min_ind } else { method_ind }, + if class_ind >= min_ind { class_ind - min_ind } else { class_ind } + ); + if new_class_ind == class_ind { + (&mut head_slice[new_class_ind], &mut tail_slice[new_method_ind]) + } else { + (&mut tail_slice[new_class_ind], &mut head_slice[new_method_ind]) + } + }; + let ( + class_fields, + class_methods, + class_resolver + ) = { + if let TopLevelDef::Class { + resolver, + fields, + methods, + .. + } = class_def.get_mut() { + (fields, methods, resolver) + } else { unreachable!("must be class def here") } + }; - if let TypeEnum::TFunc(sig) = method_signature.as_ref() { - let mut sig = &mut *sig.borrow_mut(); - } else { unreachable!() } + let arg_tys = method_args_ast + .args + .iter() + .map(|x| -> Result { + let annotation = x + .node + .annotation + .as_ref() + .ok_or_else(|| "type annotation for function parameter is needed".to_string())? + .as_ref(); + let ty = class_resolver + .as_ref() + .unwrap() + .lock() + .parse_type_annotation( + &self.to_top_level_context(), + unifier.borrow_mut(), + &self.primitives, + annotation + )?; + Ok(ty) + }) + .collect::, _>>()?; + let ret_ty = method_returns_ast + .as_ref() + .and_then(|x| { + Some( + class_resolver + .as_ref() + .unwrap() + .lock() + .parse_type_annotation( + &self.to_top_level_context(), + unifier.borrow_mut(), + &self.primitives, + x.as_ref() + ) + ) + }).unwrap()?; + + let all_tys_ok = { + let ret_ty_iter = vec![ret_ty]; + let ret_ty_iter = ret_ty_iter.iter(); + let mut all_tys = chain!(arg_tys.iter(), ret_ty_iter); + all_tys.all(|x| { + let type_enum = unifier.get_ty(*x); + match type_enum.as_ref() { + TypeEnum::TObj {obj_id, ..} => { + !to_be_analyzed_class.contains(obj_id) + }, + TypeEnum::TVirtual { ty } => { + if let TypeEnum::TObj {obj_id, ..} = unifier.get_ty(*ty).as_ref() { + !to_be_analyzed_class.contains(obj_id) + } else { unreachable!() } + }, + _ => unreachable!() + } + } + ) + }; + if all_tys_ok { + // TODO: put related value to the `class_methods_parsing_result` + unimplemented!() + } else { + to_be_analyzed_class.push(DefinitionId(class_ind)); + // TODO: go to the next WHILE loop + unimplemented!() + } } else { // what should we do with `class A: a = 3`? continue } } + + // TODO: now it should be confirmed that every + // methods and fields of the class can be correctly typed, put the results + // into the actual def_list and the unifier } Ok(()) - } fn analyze_top_level_inheritance(&mut self) -> Result<(), String> { From fa40fd73c640e9264c6e79f82215c0862c08be91 Mon Sep 17 00:00:00 2001 From: ychenfo Date: Mon, 16 Aug 2021 20:17:08 +0800 Subject: [PATCH 119/131] formatted --- nac3core/src/top_level.rs | 353 +++++++++--------- nac3core/src/typecheck/magic_methods.rs | 59 ++- .../src/typecheck/type_inferencer/test.rs | 37 +- nac3core/src/typecheck/typedef/mod.rs | 18 +- nac3core/src/typecheck/typedef/test.rs | 8 +- 5 files changed, 240 insertions(+), 235 deletions(-) diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index ab90730b..feec273d 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -200,14 +200,8 @@ impl TopLevelComposer { ast: ast::Stmt<()>, resolver: Option>>, ) -> Result<(String, DefinitionId), String> { - let ( - mut def_list, - mut ast_list - ) = ( - self.definition_list.write(), - self.ast_list.write() - ); - + let (mut def_list, mut ast_list) = (self.definition_list.write(), self.ast_list.write()); + assert_eq!(def_list.len(), ast_list.len()); match &ast.node { @@ -235,11 +229,14 @@ impl TopLevelComposer { def_list.push( Self::make_top_level_function_def( fun_name.clone(), - self.unifier.write().add_ty(TypeEnum::TFunc(FunSignature { - args: Default::default(), - ret: self.primitives.none.into(), - vars: Default::default(), - }.into())), + self.unifier.write().add_ty(TypeEnum::TFunc( + FunSignature { + args: Default::default(), + ret: self.primitives.none.into(), + vars: Default::default(), + } + .into(), + )), resolver.clone(), ) .into(), @@ -256,17 +253,14 @@ impl TopLevelComposer { ast_list[class_def_id] = Some(ast); // put the constructor into the def_list - def_list.push( - TopLevelDef::Initializer { class_id: DefinitionId(class_def_id) } - .into(), - ); + def_list + .push(TopLevelDef::Initializer { class_id: DefinitionId(class_def_id) }.into()); ast_list.push(None); // class, put its def_id into the to be analyzed set let mut to_be_analyzed = self.to_be_analyzed_class.write(); to_be_analyzed.push(DefinitionId(class_def_id)); - Ok((class_name, DefinitionId(class_def_id))) } @@ -297,27 +291,24 @@ impl TopLevelComposer { for (class_def, class_ast) in def_list .iter_mut() .zip(ast_list.iter()) - .collect::, &Option>)>>() { + .collect::, &Option>)>>() + { // only deal with class def here - let ( - class_bases, - class_def_type_vars, - class_resolver - ) = { - if let TopLevelDef::Class { - type_vars, - resolver, - .. - } = class_def.get_mut() { - if let Some(ast::Located {node: ast::StmtKind::ClassDef { - bases, - .. - }, .. }) = class_ast { + let (class_bases, class_def_type_vars, class_resolver) = { + if let TopLevelDef::Class { type_vars, resolver, .. } = class_def.get_mut() { + if let Some(ast::Located { + node: ast::StmtKind::ClassDef { bases, .. }, .. + }) = class_ast + { (bases, type_vars, resolver) - } else { unreachable!("must be both class") } - } else { continue } - }; - + } else { + unreachable!("must be both class") + } + } else { + continue; + } + }; + let mut is_generic = false; for b in class_bases { match &b.node { @@ -326,84 +317,86 @@ impl TopLevelComposer { // things like `class A(Generic[T, V, ImportedModule.T])` is not supported // i.e. only simple names are allowed in the subscript // should update the TopLevelDef::Class.typevars and the TypeEnum::TObj.params - ast::ExprKind::Subscript {value, slice, ..} if { - // can only be `Generic[...]` and this can only appear once - if let ast::ExprKind::Name { id, .. } = &value.node { - if id == "Generic" { - if !is_generic { - is_generic = true; - true + ast::ExprKind::Subscript { value, slice, .. } + if { + // can only be `Generic[...]` and this can only appear once + if let ast::ExprKind::Name { id, .. } = &value.node { + if id == "Generic" { + if !is_generic { + is_generic = true; + true + } else { + return Err( + "Only single Generic[...] can be in bases".into() + ); + } } else { - return Err("Only single Generic[...] can be in bases".into()) + false } - } else { false } - } else { false } - } => { + } else { + false + } + } => + { // if `class A(Generic[T, V, G])` if let ast::ExprKind::Tuple { elts, .. } = &slice.node { // parse the type vars let type_vars = elts .iter() - .map(|e| - class_resolver - .as_ref() - .unwrap() - .lock() - .parse_type_annotation( + .map(|e| { + class_resolver.as_ref().unwrap().lock().parse_type_annotation( &self.to_top_level_context(), unifier.borrow_mut(), &self.primitives, - e) - ) + e, + ) + }) .collect::, _>>()?; - + // check if all are unique type vars let mut occured_type_var_id: HashSet = HashSet::new(); - let all_unique_type_var = type_vars - .iter() - .all(|x| { - let ty = unifier.get_ty(*x); - if let TypeEnum::TVar {id, ..} = ty.as_ref() { - occured_type_var_id.insert(*id) - } else { false } - }); - - if !all_unique_type_var { return Err("expect unique type variables".into()) } - + let all_unique_type_var = type_vars.iter().all(|x| { + let ty = unifier.get_ty(*x); + if let TypeEnum::TVar { id, .. } = ty.as_ref() { + occured_type_var_id.insert(*id) + } else { + false + } + }); + + if !all_unique_type_var { + return Err("expect unique type variables".into()); + } + // add to TopLevelDef class_def_type_vars.extend(type_vars); - + // `class A(Generic[T])` } else { - let ty = - class_resolver - .as_ref() - .unwrap() - .lock() - .parse_type_annotation( + let ty = + class_resolver.as_ref().unwrap().lock().parse_type_annotation( &self.to_top_level_context(), unifier.borrow_mut(), &self.primitives, - &slice + &slice, )?; // check if it is type var - let is_type_var = matches!( - unifier.get_ty(ty).as_ref(), - &TypeEnum::TVar { .. } - ); - if !is_type_var { return Err("expect type variable here".into()) } - + let is_type_var = + matches!(unifier.get_ty(ty).as_ref(), &TypeEnum::TVar { .. }); + if !is_type_var { + return Err("expect type variable here".into()); + } + // add to TopLevelDef class_def_type_vars.push(ty); } } - + // if others, do nothing in this function - _ => continue + _ => continue, } } - - }; + } Ok(()) } @@ -420,30 +413,29 @@ impl TopLevelComposer { for (class_def, class_ast) in def_list .iter_mut() .zip(ast_list.iter()) - .collect::, &Option>)>>() { - let ( - class_bases, - class_ancestors, - class_resolver - ) = { - if let TopLevelDef::Class { - ancestors, - resolver, - .. - } = class_def.get_mut() { - if let Some(ast::Located {node: ast::StmtKind::ClassDef { - bases, - .. - }, .. }) = class_ast { + .collect::, &Option>)>>() + { + let (class_bases, class_ancestors, class_resolver) = { + if let TopLevelDef::Class { ancestors, resolver, .. } = class_def.get_mut() { + if let Some(ast::Located { + node: ast::StmtKind::ClassDef { bases, .. }, .. + }) = class_ast + { (bases, ancestors, resolver) - } else { unreachable!("must be both class") } - } else { continue } - }; + } else { + unreachable!("must be both class") + } + } else { + continue; + } + }; for b in class_bases { // type vars have already been handled, so skip on `Generic[...]` - if let ast::ExprKind::Subscript {value, ..} = &b.node { - if let ast::ExprKind::Name {id, ..} = &value.node { - if id == "Generic" { continue } + if let ast::ExprKind::Subscript { value, .. } = &b.node { + if let ast::ExprKind::Name { id, .. } = &value.node { + if id == "Generic" { + continue; + } } } // get the def id of the base class @@ -451,18 +443,19 @@ impl TopLevelComposer { &self.to_top_level_context(), unifier.borrow_mut(), &self.primitives, - b + b, )?; - let base_id = - if let TypeEnum::TObj {obj_id, ..} = unifier.get_ty(base_ty).as_ref() { + let base_id = + if let TypeEnum::TObj { obj_id, .. } = unifier.get_ty(base_ty).as_ref() { *obj_id - } else { return Err("expect concrete class/type to be base class".into()) }; - + } else { + return Err("expect concrete class/type to be base class".into()); + }; + // write to the class ancestors class_ancestors.push(base_id); } - - }; + } Ok(()) } @@ -478,21 +471,18 @@ impl TopLevelComposer { let class_ind = to_be_analyzed_class.remove(0).0; let (class_name, class_body) = { let class_ast = &ast_list[class_ind]; - if let Some( - ast::Located { node: - ast::StmtKind::ClassDef { - name, - body, - .. - }, - .. - } - ) = class_ast { + if let Some(ast::Located { + node: ast::StmtKind::ClassDef { name, body, .. }, .. + }) = class_ast + { (name, body) - } else { unreachable!("should be class def ast") } + } else { + unreachable!("should be class def ast") + } }; - let class_methods_parsing_result: Vec<(String, Type, DefinitionId)> = Default::default(); + let class_methods_parsing_result: Vec<(String, Type, DefinitionId)> = + Default::default(); let class_fields_parsing_result: Vec<(String, Type)> = Default::default(); for b in class_body { if let ast::StmtKind::FunctionDef { @@ -501,25 +491,24 @@ impl TopLevelComposer { name: method_name, returns: method_returns_ast, .. - } = &b.node { + } = &b.node + { let (class_def, method_def) = { // unwrap should not fail let method_ind = class_method_to_def_id - .get(&Self::name_mangling( - class_name.into(), - method_name) - ).unwrap().0; - + .get(&Self::name_mangling(class_name.into(), method_name)) + .unwrap() + .0; + // split the def_list to two parts to get the // mutable reference to both the method and the class assert_ne!(method_ind, class_ind); - let min_ind = (if method_ind > class_ind { class_ind } else { method_ind }) + 1; - let (head_slice, - tail_slice - ) = def_list.split_at_mut(min_ind); + let min_ind = + (if method_ind > class_ind { class_ind } else { method_ind }) + 1; + let (head_slice, tail_slice) = def_list.split_at_mut(min_ind); let (new_method_ind, new_class_ind) = ( if method_ind >= min_ind { method_ind - min_ind } else { method_ind }, - if class_ind >= min_ind { class_ind - min_ind } else { class_ind } + if class_ind >= min_ind { class_ind - min_ind } else { class_ind }, ); if new_class_ind == class_ind { (&mut head_slice[new_class_ind], &mut tail_slice[new_method_ind]) @@ -527,19 +516,14 @@ impl TopLevelComposer { (&mut tail_slice[new_class_ind], &mut head_slice[new_method_ind]) } }; - let ( - class_fields, - class_methods, - class_resolver - ) = { - if let TopLevelDef::Class { - resolver, - fields, - methods, - .. - } = class_def.get_mut() { + let (class_fields, class_methods, class_resolver) = { + if let TopLevelDef::Class { resolver, fields, methods, .. } = + class_def.get_mut() + { (fields, methods, resolver) - } else { unreachable!("must be class def here") } + } else { + unreachable!("must be class def here") + } }; let arg_tys = method_args_ast @@ -550,18 +534,17 @@ impl TopLevelComposer { .node .annotation .as_ref() - .ok_or_else(|| "type annotation for function parameter is needed".to_string())? + .ok_or_else(|| { + "type annotation for function parameter is needed".to_string() + })? .as_ref(); - let ty = class_resolver - .as_ref() - .unwrap() - .lock() - .parse_type_annotation( + let ty = + class_resolver.as_ref().unwrap().lock().parse_type_annotation( &self.to_top_level_context(), unifier.borrow_mut(), &self.primitives, - annotation + annotation, )?; Ok(ty) }) @@ -569,39 +552,37 @@ impl TopLevelComposer { let ret_ty = method_returns_ast .as_ref() .and_then(|x| { - Some( - class_resolver - .as_ref() - .unwrap() - .lock() - .parse_type_annotation( - &self.to_top_level_context(), - unifier.borrow_mut(), - &self.primitives, - x.as_ref() - ) - ) - }).unwrap()?; - + Some(class_resolver.as_ref().unwrap().lock().parse_type_annotation( + &self.to_top_level_context(), + unifier.borrow_mut(), + &self.primitives, + x.as_ref(), + )) + }) + .unwrap()?; + let all_tys_ok = { let ret_ty_iter = vec![ret_ty]; let ret_ty_iter = ret_ty_iter.iter(); let mut all_tys = chain!(arg_tys.iter(), ret_ty_iter); all_tys.all(|x| { - let type_enum = unifier.get_ty(*x); - match type_enum.as_ref() { - TypeEnum::TObj {obj_id, ..} => { - !to_be_analyzed_class.contains(obj_id) - }, - TypeEnum::TVirtual { ty } => { - if let TypeEnum::TObj {obj_id, ..} = unifier.get_ty(*ty).as_ref() { - !to_be_analyzed_class.contains(obj_id) - } else { unreachable!() } - }, - _ => unreachable!() + let type_enum = unifier.get_ty(*x); + match type_enum.as_ref() { + TypeEnum::TObj { obj_id, .. } => { + !to_be_analyzed_class.contains(obj_id) } + TypeEnum::TVirtual { ty } => { + if let TypeEnum::TObj { obj_id, .. } = + unifier.get_ty(*ty).as_ref() + { + !to_be_analyzed_class.contains(obj_id) + } else { + unreachable!() + } + } + _ => unreachable!(), } - ) + }) }; if all_tys_ok { @@ -614,21 +595,21 @@ impl TopLevelComposer { } } else { // what should we do with `class A: a = 3`? - continue + continue; } } - // TODO: now it should be confirmed that every + // TODO: now it should be confirmed that every // methods and fields of the class can be correctly typed, put the results // into the actual def_list and the unifier } Ok(()) } - + fn analyze_top_level_inheritance(&mut self) -> Result<(), String> { unimplemented!() } - + fn analyze_top_level_field_instantiation(&mut self) -> Result<(), String> { unimplemented!() } diff --git a/nac3core/src/typecheck/magic_methods.rs b/nac3core/src/typecheck/magic_methods.rs index d7eb0dbe..7a9fbd6d 100644 --- a/nac3core/src/typecheck/magic_methods.rs +++ b/nac3core/src/typecheck/magic_methods.rs @@ -80,11 +80,18 @@ pub fn impl_binop( } else { unifier.get_fresh_var_with_range(other_ty).0 }; - unifier.add_ty(TypeEnum::TFunc(FunSignature { - ret: ret_ty, - vars: HashMap::new(), - args: vec![FuncArg { ty: other, default_value: None, name: "other".into() }], - }.into())) + unifier.add_ty(TypeEnum::TFunc( + FunSignature { + ret: ret_ty, + vars: HashMap::new(), + args: vec![FuncArg { + ty: other, + default_value: None, + name: "other".into(), + }], + } + .into(), + )) }); fields.borrow_mut().insert(binop_assign_name(op).into(), { @@ -93,11 +100,18 @@ pub fn impl_binop( } else { unifier.get_fresh_var_with_range(other_ty).0 }; - unifier.add_ty(TypeEnum::TFunc(FunSignature { - ret: ret_ty, - vars: HashMap::new(), - args: vec![FuncArg { ty: other, default_value: None, name: "other".into() }], - }.into())) + unifier.add_ty(TypeEnum::TFunc( + FunSignature { + ret: ret_ty, + vars: HashMap::new(), + args: vec![FuncArg { + ty: other, + default_value: None, + name: "other".into(), + }], + } + .into(), + )) }); } } else { @@ -116,11 +130,9 @@ pub fn impl_unaryop( for op in ops { fields.borrow_mut().insert( unaryop_name(op).into(), - unifier.add_ty(TypeEnum::TFunc(FunSignature { - ret: ret_ty, - vars: HashMap::new(), - args: vec![], - }.into())), + unifier.add_ty(TypeEnum::TFunc( + FunSignature { ret: ret_ty, vars: HashMap::new(), args: vec![] }.into(), + )), ); } } else { @@ -139,11 +151,18 @@ pub fn impl_cmpop( for op in ops { fields.borrow_mut().insert( comparison_name(op).unwrap().into(), - unifier.add_ty(TypeEnum::TFunc(FunSignature { - ret: store.bool, - vars: HashMap::new(), - args: vec![FuncArg { ty: other_ty, default_value: None, name: "other".into() }], - }.into())), + unifier.add_ty(TypeEnum::TFunc( + FunSignature { + ret: store.bool, + vars: HashMap::new(), + args: vec![FuncArg { + ty: other_ty, + default_value: None, + name: "other".into(), + }], + } + .into(), + )), ); } } else { diff --git a/nac3core/src/typecheck/type_inferencer/test.rs b/nac3core/src/typecheck/type_inferencer/test.rs index 29eafdb4..8cae9564 100644 --- a/nac3core/src/typecheck/type_inferencer/test.rs +++ b/nac3core/src/typecheck/type_inferencer/test.rs @@ -176,18 +176,19 @@ impl TestEnvironment { identifier_mapping.insert( "Foo".into(), - unifier.add_ty(TypeEnum::TFunc(FunSignature { - args: vec![], - ret: foo_ty, - vars: [(id, v0)].iter().cloned().collect(), - }.into())), + unifier.add_ty(TypeEnum::TFunc( + FunSignature { + args: vec![], + ret: foo_ty, + vars: [(id, v0)].iter().cloned().collect(), + } + .into(), + )), ); - let fun = unifier.add_ty(TypeEnum::TFunc(FunSignature { - args: vec![], - ret: int32, - vars: Default::default(), - }.into())); + let fun = unifier.add_ty(TypeEnum::TFunc( + FunSignature { args: vec![], ret: int32, vars: Default::default() }.into(), + )); let bar = unifier.add_ty(TypeEnum::TObj { obj_id: DefinitionId(6), fields: [("a".into(), int32), ("b".into(), fun)] @@ -207,11 +208,9 @@ impl TestEnvironment { })); identifier_mapping.insert( "Bar".into(), - unifier.add_ty(TypeEnum::TFunc(FunSignature { - args: vec![], - ret: bar, - vars: Default::default(), - }.into())), + unifier.add_ty(TypeEnum::TFunc( + FunSignature { args: vec![], ret: bar, vars: Default::default() }.into(), + )), ); let bar2 = unifier.add_ty(TypeEnum::TObj { @@ -233,11 +232,9 @@ impl TestEnvironment { })); identifier_mapping.insert( "Bar2".into(), - unifier.add_ty(TypeEnum::TFunc(FunSignature { - args: vec![], - ret: bar2, - vars: Default::default(), - }.into())), + unifier.add_ty(TypeEnum::TFunc( + FunSignature { args: vec![], ret: bar2, vars: Default::default() }.into(), + )), ); let class_names = [("Bar".into(), bar), ("Bar2".into(), bar2)].iter().cloned().collect(); diff --git a/nac3core/src/typecheck/typedef/mod.rs b/nac3core/src/typecheck/typedef/mod.rs index 66f66354..621ea65b 100644 --- a/nac3core/src/typecheck/typedef/mod.rs +++ b/nac3core/src/typecheck/typedef/mod.rs @@ -472,7 +472,8 @@ impl Unifier { } (TCall(calls), TFunc(signature)) => { self.occur_check(a, b)?; - let required: Vec = signature.borrow() + let required: Vec = signature + .borrow() .args .iter() .filter(|v| v.default_value.is_none()) @@ -494,8 +495,13 @@ impl Unifier { // we check to make sure that all required arguments (those without default // arguments) are provided, and do not provide the same argument twice. let mut required = required.clone(); - let mut all_names: Vec<_> = - signature.borrow().args.iter().map(|v| (v.name.clone(), v.ty)).rev().collect(); + let mut all_names: Vec<_> = signature + .borrow() + .args + .iter() + .map(|v| (v.name.clone(), v.ty)) + .rev() + .collect(); for (i, t) in posargs.iter().enumerate() { if signature.borrow().args.len() <= i { return Err("Too many arguments.".to_string()); @@ -741,7 +747,11 @@ impl Unifier { let params = new_params.unwrap_or_else(|| params.clone()); let ret = new_ret.unwrap_or_else(|| *ret); let args = new_args.into_owned(); - Some(self.add_ty(TypeEnum::TFunc(FunSignature { args, ret, vars: params }.into()))) + Some( + self.add_ty(TypeEnum::TFunc( + FunSignature { args, ret, vars: params }.into(), + )), + ) } else { None } diff --git a/nac3core/src/typecheck/typedef/test.rs b/nac3core/src/typecheck/typedef/test.rs index 3aaa7891..2972b3f9 100644 --- a/nac3core/src/typecheck/typedef/test.rs +++ b/nac3core/src/typecheck/typedef/test.rs @@ -329,11 +329,9 @@ fn test_invalid_unification( fn test_virtual() { let mut env = TestEnvironment::new(); let int = env.parse("int", &HashMap::new()); - let fun = env.unifier.add_ty(TypeEnum::TFunc(FunSignature { - args: vec![], - ret: int, - vars: HashMap::new(), - }.into())); + let fun = env.unifier.add_ty(TypeEnum::TFunc( + FunSignature { args: vec![], ret: int, vars: HashMap::new() }.into(), + )); let bar = env.unifier.add_ty(TypeEnum::TObj { obj_id: DefinitionId(5), fields: [("f".to_string(), fun), ("a".to_string(), int)] From a94145348a9ad8ce117dbb60a46c6f83ccf37509 Mon Sep 17 00:00:00 2001 From: ychenfo Date: Tue, 17 Aug 2021 11:06:45 +0800 Subject: [PATCH 120/131] fix on comments and redundant code, start handling 'self' things --- nac3core/src/top_level.rs | 136 ++++++++++++++++++++++++++------------ 1 file changed, 94 insertions(+), 42 deletions(-) diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index feec273d..844be24c 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -70,7 +70,7 @@ pub struct TopLevelContext { pub struct TopLevelComposer { // list of top level definitions, same as top level context pub definition_list: Arc>>>, - // list of top level ast, the index is same as the field `definition_list` and `ty_list` + // list of top level ast, the index is same as the field `definition_list` pub ast_list: RwLock>>>, // start as a primitive unifier, will add more top_level defs inside pub unifier: RwLock, @@ -232,7 +232,7 @@ impl TopLevelComposer { self.unifier.write().add_ty(TypeEnum::TFunc( FunSignature { args: Default::default(), - ret: self.primitives.none.into(), + ret: self.primitives.none, vars: Default::default(), } .into(), @@ -319,25 +319,15 @@ impl TopLevelComposer { // should update the TopLevelDef::Class.typevars and the TypeEnum::TObj.params ast::ExprKind::Subscript { value, slice, .. } if { - // can only be `Generic[...]` and this can only appear once - if let ast::ExprKind::Name { id, .. } = &value.node { - if id == "Generic" { - if !is_generic { - is_generic = true; - true - } else { - return Err( - "Only single Generic[...] can be in bases".into() - ); - } - } else { - false - } - } else { - false - } + matches!(&value.node, ast::ExprKind::Name { id, .. } if id == "Generic") } => { + if !is_generic { + is_generic = true; + } else { + return Err("Only single Generic[...] can be in bases".into()); + } + // if `class A(Generic[T, V, G])` if let ast::ExprKind::Tuple { elts, .. } = &slice.node { // parse the type vars @@ -530,36 +520,93 @@ impl TopLevelComposer { .args .iter() .map(|x| -> Result { - let annotation = x - .node - .annotation - .as_ref() - .ok_or_else(|| { - "type annotation for function parameter is needed".to_string() - })? - .as_ref(); - - let ty = - class_resolver.as_ref().unwrap().lock().parse_type_annotation( - &self.to_top_level_context(), - unifier.borrow_mut(), - &self.primitives, - annotation, - )?; - Ok(ty) + if x.node.arg != "self" { + let annotation = x + .node + .annotation + .as_ref() + .ok_or_else(|| { + "type annotation for function parameter is needed".to_string() + })? + .as_ref(); + + let ty = + class_resolver.as_ref().unwrap().lock().parse_type_annotation( + &self.to_top_level_context(), + unifier.borrow_mut(), + &self.primitives, + annotation, + )?; + Ok(ty) + } else { + // TODO: handle self, how + unimplemented!() + } }) .collect::, _>>()?; - let ret_ty = method_returns_ast + + let ret_ty = if method_name != "__init__" { + method_returns_ast .as_ref() - .and_then(|x| { - Some(class_resolver.as_ref().unwrap().lock().parse_type_annotation( + .map(|x| + class_resolver.as_ref().unwrap().lock().parse_type_annotation( &self.to_top_level_context(), unifier.borrow_mut(), &self.primitives, x.as_ref(), - )) - }) - .unwrap()?; + ) + ) + .ok_or_else(|| "return type annotation needed".to_string())?? + } else { + // TODO: self type, how + unimplemented!() + }; + + // handle fields + if method_name == "__init__" { + for body in method_body_ast { + match &body.node { + ast::StmtKind::AnnAssign { + target, + annotation, + .. + } if { + if let ast::ExprKind::Attribute { + value, + attr, + .. + } = &target.node { + if let ast::ExprKind::Name {id, ..} = &value.node { + id == "self" + } else { false } + } else { false } + } => { + // TODO: record this field with its type + }, + + // TODO: exclude those without type annotation + ast::StmtKind::Assign { + targets, + .. + } if { + if let ast::ExprKind::Attribute { + value, + attr, + .. + } = &targets[0].node { + if let ast::ExprKind::Name {id, ..} = &value.node { + id == "self" + } else { false } + } else { false } + } => { + unimplemented!() + }, + + // do nothing + _ => { } + } + } + } let all_tys_ok = { let ret_ty_iter = vec![ret_ty]; @@ -580,6 +627,7 @@ impl TopLevelComposer { unreachable!() } } + TypeEnum::TVar { .. } => true, _ => unreachable!(), } }) @@ -610,6 +658,10 @@ impl TopLevelComposer { unimplemented!() } + fn analyze_top_level_function(&mut self) -> Result<(), String> { + unimplemented!() + } + fn analyze_top_level_field_instantiation(&mut self) -> Result<(), String> { unimplemented!() } From 276daa03f7e59c93984a01f833a4d1afede49018 Mon Sep 17 00:00:00 2001 From: ychenfo Date: Tue, 17 Aug 2021 14:01:18 +0800 Subject: [PATCH 121/131] start refactorinng for less redundancy --- nac3core/src/top_level.rs | 58 +++++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 23 deletions(-) diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index 844be24c..3910ef72 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -1,4 +1,5 @@ use std::borrow::BorrowMut; +use std::ops::Deref; use std::{collections::HashMap, collections::HashSet, sync::Arc}; use super::typecheck::type_inferencer::PrimitiveStore; @@ -67,25 +68,36 @@ pub struct TopLevelContext { pub unifiers: Arc>>, } +impl TopLevelContext { + pub fn get_def_list<'a>(&'a self) -> Vec<&'a TopLevelDef> { + let list = self.definitions.read(); + let list = list.deref(); + let list = list.iter().map(|x| { + x.read().deref() + }).collect::>(); + list + } +} + pub struct TopLevelComposer { // list of top level definitions, same as top level context - pub definition_list: Arc>>>, + pub definition_list: Arc>>, // list of top level ast, the index is same as the field `definition_list` - pub ast_list: RwLock>>>, + pub ast_list: Vec>>, // start as a primitive unifier, will add more top_level defs inside - pub unifier: RwLock, + pub unifier: Unifier, // primitive store pub primitives: PrimitiveStore, // mangled class method name to def_id - pub class_method_to_def_id: RwLock>, + pub class_method_to_def_id: HashMap, // record the def id of the classes whoses fields and methods are to be analyzed - pub to_be_analyzed_class: RwLock>, + pub to_be_analyzed_class: Vec, } impl TopLevelComposer { pub fn to_top_level_context(&self) -> TopLevelContext { TopLevelContext { - definitions: self.definition_list.clone(), + definitions: RwLock::new(self.definition_list.).into(), // FIXME: all the big unifier or? unifiers: Default::default(), } @@ -144,8 +156,8 @@ impl TopLevelComposer { let ast_list: Vec>> = vec![None, None, None, None, None]; let composer = TopLevelComposer { - definition_list: RwLock::new(top_level_def_list).into(), - ast_list: RwLock::new(ast_list), + definition_list: top_level_def_list, + ast_list, primitives: primitives.0, unifier: primitives.1.into(), class_method_to_def_id: Default::default(), @@ -200,7 +212,7 @@ impl TopLevelComposer { ast: ast::Stmt<()>, resolver: Option>>, ) -> Result<(String, DefinitionId), String> { - let (mut def_list, mut ast_list) = (self.definition_list.write(), self.ast_list.write()); + let (mut def_list, mut ast_list) = (&mut self.definition_list, &mut self.ast_list); assert_eq!(def_list.len(), ast_list.len()); @@ -229,7 +241,7 @@ impl TopLevelComposer { def_list.push( Self::make_top_level_function_def( fun_name.clone(), - self.unifier.write().add_ty(TypeEnum::TFunc( + self.unifier.add_ty(TypeEnum::TFunc( FunSignature { args: Default::default(), ret: self.primitives.none, @@ -245,7 +257,7 @@ impl TopLevelComposer { ast_list.push(None); // class method, do not let the symbol manager manage it, use our own map - self.class_method_to_def_id.write().insert(fun_name, DefinitionId(def_id)); + self.class_method_to_def_id.insert(fun_name, DefinitionId(def_id)); } } @@ -258,7 +270,7 @@ impl TopLevelComposer { ast_list.push(None); // class, put its def_id into the to be analyzed set - let mut to_be_analyzed = self.to_be_analyzed_class.write(); + let mut to_be_analyzed = self.to_be_analyzed_class; to_be_analyzed.push(DefinitionId(class_def_id)); Ok((class_name, DefinitionId(class_def_id))) @@ -284,9 +296,9 @@ impl TopLevelComposer { /// step 1, analyze the type vars associated with top level class fn analyze_top_level_class_type_var(&mut self) -> Result<(), String> { - let mut def_list = self.definition_list.write(); - let ast_list = self.ast_list.read(); - let mut unifier = self.unifier.write(); + let mut def_list = &mut self.definition_list; + let ast_list = &self.ast_list; + let mut unifier = &mut self.unifier; for (class_def, class_ast) in def_list .iter_mut() @@ -396,9 +408,9 @@ impl TopLevelComposer { /// if the type var associated with class `B` has not been handled properly, /// the parse of type annotation of `B[int, bool]` will fail fn analyze_top_level_class_bases(&mut self) -> Result<(), String> { - let mut def_list = self.definition_list.write(); - let ast_list = self.ast_list.read(); - let mut unifier = self.unifier.write(); + let mut def_list = &mut self.definition_list; + let ast_list = &self.ast_list; + let mut unifier = &mut self.unifier; for (class_def, class_ast) in def_list .iter_mut() @@ -451,11 +463,11 @@ impl TopLevelComposer { /// step 3, class fields and methods fn analyze_top_level_class_fields_methods(&mut self) -> Result<(), String> { - let mut def_list = self.definition_list.write(); - let ast_list = self.ast_list.read(); - let mut unifier = self.unifier.write(); - let class_method_to_def_id = self.class_method_to_def_id.read(); - let mut to_be_analyzed_class = self.to_be_analyzed_class.write(); + let mut def_list = &mut self.definition_list; + let ast_list = &self.ast_list; + let mut unifier = &mut self.unifier; + let class_method_to_def_id = &self.class_method_to_def_id; + let mut to_be_analyzed_class = &mut self.to_be_analyzed_class; while !to_be_analyzed_class.is_empty() { let class_ind = to_be_analyzed_class.remove(0).0; From 619963dc8c55bc24d5166d3fdf5a0f47a2b553ae Mon Sep 17 00:00:00 2001 From: ychenfo Date: Tue, 17 Aug 2021 16:36:06 +0800 Subject: [PATCH 122/131] removed locks in toplevelcomposer --- nac3core/src/top_level.rs | 60 ++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 32 deletions(-) diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index 3910ef72..3be1d16d 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -68,20 +68,10 @@ pub struct TopLevelContext { pub unifiers: Arc>>, } -impl TopLevelContext { - pub fn get_def_list<'a>(&'a self) -> Vec<&'a TopLevelDef> { - let list = self.definitions.read(); - let list = list.deref(); - let list = list.iter().map(|x| { - x.read().deref() - }).collect::>(); - list - } -} pub struct TopLevelComposer { // list of top level definitions, same as top level context - pub definition_list: Arc>>, + pub definition_list: Arc>>>, // list of top level ast, the index is same as the field `definition_list` pub ast_list: Vec>>, // start as a primitive unifier, will add more top_level defs inside @@ -97,7 +87,7 @@ pub struct TopLevelComposer { impl TopLevelComposer { pub fn to_top_level_context(&self) -> TopLevelContext { TopLevelContext { - definitions: RwLock::new(self.definition_list.).into(), + definitions: self.definition_list.clone(), // FIXME: all the big unifier or? unifiers: Default::default(), } @@ -156,7 +146,7 @@ impl TopLevelComposer { let ast_list: Vec>> = vec![None, None, None, None, None]; let composer = TopLevelComposer { - definition_list: top_level_def_list, + definition_list: RwLock::new(top_level_def_list).into(), ast_list, primitives: primitives.0, unifier: primitives.1.into(), @@ -212,7 +202,7 @@ impl TopLevelComposer { ast: ast::Stmt<()>, resolver: Option>>, ) -> Result<(String, DefinitionId), String> { - let (mut def_list, mut ast_list) = (&mut self.definition_list, &mut self.ast_list); + let (mut def_list, ast_list) = (self.definition_list.write(), &mut self.ast_list); assert_eq!(def_list.len(), ast_list.len()); @@ -270,7 +260,7 @@ impl TopLevelComposer { ast_list.push(None); // class, put its def_id into the to be analyzed set - let mut to_be_analyzed = self.to_be_analyzed_class; + let to_be_analyzed = &mut self.to_be_analyzed_class; to_be_analyzed.push(DefinitionId(class_def_id)); Ok((class_name, DefinitionId(class_def_id))) @@ -296,9 +286,11 @@ impl TopLevelComposer { /// step 1, analyze the type vars associated with top level class fn analyze_top_level_class_type_var(&mut self) -> Result<(), String> { - let mut def_list = &mut self.definition_list; + let mut def_list = self.definition_list.write(); let ast_list = &self.ast_list; - let mut unifier = &mut self.unifier; + let converted_top_level = &self.to_top_level_context(); + let primitives = &self.primitives; + let unifier = &mut self.unifier; for (class_def, class_ast) in def_list .iter_mut() @@ -347,9 +339,9 @@ impl TopLevelComposer { .iter() .map(|e| { class_resolver.as_ref().unwrap().lock().parse_type_annotation( - &self.to_top_level_context(), + converted_top_level, unifier.borrow_mut(), - &self.primitives, + primitives, e, ) }) @@ -377,9 +369,9 @@ impl TopLevelComposer { } else { let ty = class_resolver.as_ref().unwrap().lock().parse_type_annotation( - &self.to_top_level_context(), + converted_top_level, unifier.borrow_mut(), - &self.primitives, + primitives, &slice, )?; // check if it is type var @@ -408,9 +400,11 @@ impl TopLevelComposer { /// if the type var associated with class `B` has not been handled properly, /// the parse of type annotation of `B[int, bool]` will fail fn analyze_top_level_class_bases(&mut self) -> Result<(), String> { - let mut def_list = &mut self.definition_list; + let mut def_list = self.definition_list.write(); let ast_list = &self.ast_list; - let mut unifier = &mut self.unifier; + let converted_top_level = &self.to_top_level_context(); + let primitives = &self.primitives; + let unifier = &mut self.unifier; for (class_def, class_ast) in def_list .iter_mut() @@ -442,9 +436,9 @@ impl TopLevelComposer { } // get the def id of the base class let base_ty = class_resolver.as_ref().unwrap().lock().parse_type_annotation( - &self.to_top_level_context(), + converted_top_level, unifier.borrow_mut(), - &self.primitives, + primitives, b, )?; let base_id = @@ -463,11 +457,13 @@ impl TopLevelComposer { /// step 3, class fields and methods fn analyze_top_level_class_fields_methods(&mut self) -> Result<(), String> { - let mut def_list = &mut self.definition_list; + let mut def_list = self.definition_list.write(); let ast_list = &self.ast_list; - let mut unifier = &mut self.unifier; + let converted_top_level = &self.to_top_level_context(); let class_method_to_def_id = &self.class_method_to_def_id; - let mut to_be_analyzed_class = &mut self.to_be_analyzed_class; + let primitives = &self.primitives; + let to_be_analyzed_class = &mut self.to_be_analyzed_class; + let unifier = &mut self.unifier; while !to_be_analyzed_class.is_empty() { let class_ind = to_be_analyzed_class.remove(0).0; @@ -544,9 +540,9 @@ impl TopLevelComposer { let ty = class_resolver.as_ref().unwrap().lock().parse_type_annotation( - &self.to_top_level_context(), + converted_top_level, unifier.borrow_mut(), - &self.primitives, + primitives, annotation, )?; Ok(ty) @@ -562,9 +558,9 @@ impl TopLevelComposer { .as_ref() .map(|x| class_resolver.as_ref().unwrap().lock().parse_type_annotation( - &self.to_top_level_context(), + converted_top_level, unifier.borrow_mut(), - &self.primitives, + primitives, x.as_ref(), ) ) From 4fcd48e4c86a1b3b53e5636de4ef8bc1e84e9007 Mon Sep 17 00:00:00 2001 From: ychenfo Date: Wed, 18 Aug 2021 10:01:11 +0800 Subject: [PATCH 123/131] try to use def list ast tuple and remove method_to_def_id map --- nac3core/src/top_level.rs | 185 ++++++++++-------- .../src/typecheck/type_inferencer/test.rs | 10 +- 2 files changed, 103 insertions(+), 92 deletions(-) diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index 3be1d16d..6b573ec7 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -1,12 +1,12 @@ use std::borrow::BorrowMut; -use std::ops::Deref; +use std::ops::{Deref, DerefMut}; use std::{collections::HashMap, collections::HashSet, sync::Arc}; use super::typecheck::type_inferencer::PrimitiveStore; use super::typecheck::typedef::{SharedUnifier, Type, TypeEnum, Unifier}; use crate::symbol_resolver::SymbolResolver; use crate::typecheck::typedef::{FunSignature, FuncArg}; -use itertools::chain; +use itertools::{Itertools, chain}; use parking_lot::{Mutex, RwLock}; use rustpython_parser::ast::{self, Stmt}; @@ -64,16 +64,14 @@ impl TopLevelDef { } pub struct TopLevelContext { - pub definitions: Arc>>>, + pub definitions: Arc>>>>, pub unifiers: Arc>>, } pub struct TopLevelComposer { // list of top level definitions, same as top level context - pub definition_list: Arc>>>, - // list of top level ast, the index is same as the field `definition_list` - pub ast_list: Vec>>, + pub definition_ast_list: Arc>, Option>)>>>, // start as a primitive unifier, will add more top_level defs inside pub unifier: Unifier, // primitive store @@ -86,8 +84,14 @@ pub struct TopLevelComposer { impl TopLevelComposer { pub fn to_top_level_context(&self) -> TopLevelContext { + let def_list = self + .definition_ast_list + .read() + .iter() + .map(|(x, _)| x.clone()) + .collect::>(); TopLevelContext { - definitions: self.definition_list.clone(), + definitions: RwLock::new(def_list).into(), // FIXME: all the big unifier or? unifiers: Default::default(), } @@ -136,18 +140,19 @@ impl TopLevelComposer { let primitives = Self::make_primitives(); let top_level_def_list = vec![ - RwLock::new(Self::make_top_level_class_def(0, None)), - RwLock::new(Self::make_top_level_class_def(1, None)), - RwLock::new(Self::make_top_level_class_def(2, None)), - RwLock::new(Self::make_top_level_class_def(3, None)), - RwLock::new(Self::make_top_level_class_def(4, None)), + Arc::new(RwLock::new(Self::make_top_level_class_def(0, None))), + Arc::new(RwLock::new(Self::make_top_level_class_def(1, None))), + Arc::new(RwLock::new(Self::make_top_level_class_def(2, None))), + Arc::new(RwLock::new(Self::make_top_level_class_def(3, None))), + Arc::new(RwLock::new(Self::make_top_level_class_def(4, None))), ]; let ast_list: Vec>> = vec![None, None, None, None, None]; let composer = TopLevelComposer { - definition_list: RwLock::new(top_level_def_list).into(), - ast_list, + definition_ast_list: RwLock::new( + top_level_def_list.into_iter().zip(ast_list).collect_vec() + ).into(), primitives: primitives.0, unifier: primitives.1.into(), class_method_to_def_id: Default::default(), @@ -202,62 +207,77 @@ impl TopLevelComposer { ast: ast::Stmt<()>, resolver: Option>>, ) -> Result<(String, DefinitionId), String> { - let (mut def_list, ast_list) = (self.definition_list.write(), &mut self.ast_list); - - assert_eq!(def_list.len(), ast_list.len()); - + let mut def_list = self.definition_ast_list.write(); match &ast.node { ast::StmtKind::ClassDef { name, body, .. } => { let class_name = name.to_string(); let class_def_id = def_list.len(); // add the class to the definition lists - def_list - .push(Self::make_top_level_class_def(class_def_id, resolver.clone()).into()); // since later when registering class method, ast will still be used, // here push None temporarly, later will move the ast inside - ast_list.push(None); + let mut class_def_ast = ( + Arc::new(RwLock::new( + Self::make_top_level_class_def(class_def_id, resolver.clone()) + )), + None + ); // parse class def body and register class methods into the def list. // module's symbol resolver would not know the name of the class methods, - // thus cannot return their definition_id? so we have to manage it ourselves - // by using `class_method_to_def_id` + // thus cannot return their definition_id + let mut class_method_name_def_ids: Vec<(String, Arc>, DefinitionId)> = Vec::new(); + let mut class_method_index_offset = 0; for b in body { - if let ast::StmtKind::FunctionDef { name, .. } = &b.node { - let fun_name = Self::name_mangling(class_name.clone(), name); - let def_id = def_list.len(); + if let ast::StmtKind::FunctionDef { name: method_name, .. } = &b.node { + let method_name = Self::name_mangling(class_name.clone(), method_name); + let method_def_id = def_list.len() + { + class_method_index_offset += 1; + class_method_index_offset + }; - // add to the definition list - def_list.push( - Self::make_top_level_function_def( - fun_name.clone(), - self.unifier.add_ty(TypeEnum::TFunc( - FunSignature { - args: Default::default(), - ret: self.primitives.none, - vars: Default::default(), - } - .into(), - )), - resolver.clone(), - ) - .into(), - ); + // dummy method define here // the ast of class method is in the class, push None in to the list here - ast_list.push(None); - - // class method, do not let the symbol manager manage it, use our own map - self.class_method_to_def_id.insert(fun_name, DefinitionId(def_id)); + class_method_name_def_ids.push(( + method_name.clone(), + RwLock::new(Self::make_top_level_function_def( + method_name.clone(), + self.primitives.none, + resolver.clone(), + )).into(), + DefinitionId(method_def_id) + )); + } + } + // move the ast to the entry of the class in the ast_list + class_def_ast.1 = Some(ast); + + // put methods into the class def + { + let mut class_def = class_def_ast.0.write(); + let class_def_methods = + if let TopLevelDef::Class { methods, .. } = class_def.deref_mut() { + methods + } else { unimplemented!() }; + for (name, _, id) in &class_method_name_def_ids { + class_def_methods.push((name.into(), self.primitives.none, *id)); } } - // move the ast to the entry of the class in the ast_list - ast_list[class_def_id] = Some(ast); + // now class_def_ast and class_method_def_ast_ids are ok, put them into actual def list in correct order + def_list.push(class_def_ast); + for (_, def, _) in class_method_name_def_ids { + def_list.push((def, None)); + } // put the constructor into the def_list def_list - .push(TopLevelDef::Initializer { class_id: DefinitionId(class_def_id) }.into()); - ast_list.push(None); + .push(( + RwLock::new( + TopLevelDef::Initializer { class_id: DefinitionId(class_def_id) } + ).into(), + None + )); // class, put its def_id into the to be analyzed set let to_be_analyzed = &mut self.to_be_analyzed_class; @@ -270,11 +290,11 @@ impl TopLevelComposer { let fun_name = name.to_string(); // add to the definition list - def_list.push( - Self::make_top_level_function_def(name.into(), self.primitives.none, resolver) + def_list.push(( + RwLock::new(Self::make_top_level_function_def(name.into(), self.primitives.none, resolver)) .into(), - ); - ast_list.push(Some(ast)); + Some(ast) + )); // return Ok((fun_name, DefinitionId(def_list.len() - 1))) @@ -286,20 +306,17 @@ impl TopLevelComposer { /// step 1, analyze the type vars associated with top level class fn analyze_top_level_class_type_var(&mut self) -> Result<(), String> { - let mut def_list = self.definition_list.write(); - let ast_list = &self.ast_list; + let mut def_list = self.definition_ast_list.write(); let converted_top_level = &self.to_top_level_context(); let primitives = &self.primitives; let unifier = &mut self.unifier; - for (class_def, class_ast) in def_list - .iter_mut() - .zip(ast_list.iter()) - .collect::, &Option>)>>() + for (class_def, class_ast) in def_list.iter_mut() { // only deal with class def here - let (class_bases, class_def_type_vars, class_resolver) = { - if let TopLevelDef::Class { type_vars, resolver, .. } = class_def.get_mut() { + let mut class_def = class_def.write(); + let (class_bases_ast, class_def_type_vars, class_resolver) = { + if let TopLevelDef::Class { type_vars, resolver, .. } = class_def.deref_mut() { if let Some(ast::Located { node: ast::StmtKind::ClassDef { bases, .. }, .. }) = class_ast @@ -312,9 +329,10 @@ impl TopLevelComposer { continue; } }; + let class_resolver = class_resolver.as_ref().unwrap().lock(); let mut is_generic = false; - for b in class_bases { + for b in class_bases_ast { match &b.node { // analyze typevars bounded to the class, // only support things like `class A(Generic[T, V])`, @@ -322,10 +340,7 @@ impl TopLevelComposer { // i.e. only simple names are allowed in the subscript // should update the TopLevelDef::Class.typevars and the TypeEnum::TObj.params ast::ExprKind::Subscript { value, slice, .. } - if { - matches!(&value.node, ast::ExprKind::Name { id, .. } if id == "Generic") - } => - { + if matches!(&value.node, ast::ExprKind::Name { id, .. } if id == "Generic") => { if !is_generic { is_generic = true; } else { @@ -338,7 +353,7 @@ impl TopLevelComposer { let type_vars = elts .iter() .map(|e| { - class_resolver.as_ref().unwrap().lock().parse_type_annotation( + class_resolver.parse_type_annotation( converted_top_level, unifier.borrow_mut(), primitives, @@ -368,7 +383,7 @@ impl TopLevelComposer { // `class A(Generic[T])` } else { let ty = - class_resolver.as_ref().unwrap().lock().parse_type_annotation( + class_resolver.parse_type_annotation( converted_top_level, unifier.borrow_mut(), primitives, @@ -400,19 +415,16 @@ impl TopLevelComposer { /// if the type var associated with class `B` has not been handled properly, /// the parse of type annotation of `B[int, bool]` will fail fn analyze_top_level_class_bases(&mut self) -> Result<(), String> { - let mut def_list = self.definition_list.write(); - let ast_list = &self.ast_list; + let mut def_list = self.definition_ast_list.write(); let converted_top_level = &self.to_top_level_context(); let primitives = &self.primitives; let unifier = &mut self.unifier; - for (class_def, class_ast) in def_list - .iter_mut() - .zip(ast_list.iter()) - .collect::, &Option>)>>() + for (class_def, class_ast) in def_list.iter_mut() { + let mut class_def = class_def.write(); let (class_bases, class_ancestors, class_resolver) = { - if let TopLevelDef::Class { ancestors, resolver, .. } = class_def.get_mut() { + if let TopLevelDef::Class { ancestors, resolver, .. } = class_def.deref_mut() { if let Some(ast::Located { node: ast::StmtKind::ClassDef { bases, .. }, .. }) = class_ast @@ -425,6 +437,7 @@ impl TopLevelComposer { continue; } }; + let class_resolver = class_resolver.as_ref().unwrap().lock(); for b in class_bases { // type vars have already been handled, so skip on `Generic[...]` if let ast::ExprKind::Subscript { value, .. } = &b.node { @@ -435,7 +448,7 @@ impl TopLevelComposer { } } // get the def id of the base class - let base_ty = class_resolver.as_ref().unwrap().lock().parse_type_annotation( + let base_ty = class_resolver.parse_type_annotation( converted_top_level, unifier.borrow_mut(), primitives, @@ -457,31 +470,29 @@ impl TopLevelComposer { /// step 3, class fields and methods fn analyze_top_level_class_fields_methods(&mut self) -> Result<(), String> { - let mut def_list = self.definition_list.write(); - let ast_list = &self.ast_list; + let mut def_list = self.definition_ast_list.write(); let converted_top_level = &self.to_top_level_context(); - let class_method_to_def_id = &self.class_method_to_def_id; let primitives = &self.primitives; let to_be_analyzed_class = &mut self.to_be_analyzed_class; let unifier = &mut self.unifier; while !to_be_analyzed_class.is_empty() { let class_ind = to_be_analyzed_class.remove(0).0; - let (class_name, class_body) = { - let class_ast = &ast_list[class_ind]; + let (class_name, class_body, classs_def) = { + let class_ast = def_list[class_ind].1.as_ref(); if let Some(ast::Located { node: ast::StmtKind::ClassDef { name, body, .. }, .. }) = class_ast { - (name, body) + let class_def = def_list[class_ind].0; + (name, body, class_def) } else { unreachable!("should be class def ast") } }; - let class_methods_parsing_result: Vec<(String, Type, DefinitionId)> = - Default::default(); - let class_fields_parsing_result: Vec<(String, Type)> = Default::default(); + let class_methods_parsing_result: Vec<(String, Type, DefinitionId)> = vec![]; + let class_fields_parsing_result: Vec<(String, Type)> = vec![]; for b in class_body { if let ast::StmtKind::FunctionDef { args: method_args_ast, @@ -516,7 +527,7 @@ impl TopLevelComposer { }; let (class_fields, class_methods, class_resolver) = { if let TopLevelDef::Class { resolver, fields, methods, .. } = - class_def.get_mut() + class_def.0.get_mut() { (fields, methods, resolver) } else { diff --git a/nac3core/src/typecheck/type_inferencer/test.rs b/nac3core/src/typecheck/type_inferencer/test.rs index 8cae9564..ca1ee78c 100644 --- a/nac3core/src/typecheck/type_inferencer/test.rs +++ b/nac3core/src/typecheck/type_inferencer/test.rs @@ -118,7 +118,7 @@ impl TestEnvironment { fn new() -> TestEnvironment { let mut unifier = Unifier::new(); let mut identifier_mapping = HashMap::new(); - let mut top_level_defs = Vec::new(); + let mut top_level_defs: Vec>> = Vec::new(); let int32 = unifier.add_ty(TypeEnum::TObj { obj_id: DefinitionId(0), fields: HashMap::new().into(), @@ -153,7 +153,7 @@ impl TestEnvironment { methods: Default::default(), ancestors: Default::default(), resolver: None, - })); + }).into()); } let primitives = PrimitiveStore { int32, int64, float, bool, none }; @@ -172,7 +172,7 @@ impl TestEnvironment { methods: Default::default(), ancestors: Default::default(), resolver: None, - })); + }).into()); identifier_mapping.insert( "Foo".into(), @@ -205,7 +205,7 @@ impl TestEnvironment { methods: Default::default(), ancestors: Default::default(), resolver: None, - })); + }).into()); identifier_mapping.insert( "Bar".into(), unifier.add_ty(TypeEnum::TFunc( @@ -229,7 +229,7 @@ impl TestEnvironment { methods: Default::default(), ancestors: Default::default(), resolver: None, - })); + }).into()); identifier_mapping.insert( "Bar2".into(), unifier.add_ty(TypeEnum::TFunc( From 529442590fe81050c3788e22699ffd531604a505 Mon Sep 17 00:00:00 2001 From: ychenfo Date: Wed, 18 Aug 2021 16:28:17 +0800 Subject: [PATCH 124/131] some parsing of top level class fields and methods --- nac3core/src/top_level.rs | 367 +++++++++++++++++++++----------------- 1 file changed, 201 insertions(+), 166 deletions(-) diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index 6b573ec7..06507437 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -4,9 +4,9 @@ use std::{collections::HashMap, collections::HashSet, sync::Arc}; use super::typecheck::type_inferencer::PrimitiveStore; use super::typecheck::typedef::{SharedUnifier, Type, TypeEnum, Unifier}; -use crate::symbol_resolver::SymbolResolver; +use crate::{symbol_resolver::SymbolResolver, typecheck::typedef::Mapping}; use crate::typecheck::typedef::{FunSignature, FuncArg}; -use itertools::{Itertools, chain}; +use itertools::Itertools; use parking_lot::{Mutex, RwLock}; use rustpython_parser::ast::{self, Stmt}; @@ -154,7 +154,7 @@ impl TopLevelComposer { top_level_def_list.into_iter().zip(ast_list).collect_vec() ).into(), primitives: primitives.0, - unifier: primitives.1.into(), + unifier: primitives.1, class_method_to_def_id: Default::default(), to_be_analyzed_class: Default::default(), }; @@ -252,22 +252,11 @@ impl TopLevelComposer { // move the ast to the entry of the class in the ast_list class_def_ast.1 = Some(ast); - // put methods into the class def - { - let mut class_def = class_def_ast.0.write(); - let class_def_methods = - if let TopLevelDef::Class { methods, .. } = class_def.deref_mut() { - methods - } else { unimplemented!() }; - for (name, _, id) in &class_method_name_def_ids { - class_def_methods.push((name.into(), self.primitives.none, *id)); - } - } - // now class_def_ast and class_method_def_ast_ids are ok, put them into actual def list in correct order def_list.push(class_def_ast); - for (_, def, _) in class_method_name_def_ids { + for (name, def, id) in class_method_name_def_ids { def_list.push((def, None)); + self.class_method_to_def_id.insert(name, id); } // put the constructor into the def_list @@ -280,8 +269,7 @@ impl TopLevelComposer { )); // class, put its def_id into the to be analyzed set - let to_be_analyzed = &mut self.to_be_analyzed_class; - to_be_analyzed.push(DefinitionId(class_def_id)); + self.to_be_analyzed_class.push(DefinitionId(class_def_id)); Ok((class_name, DefinitionId(class_def_id))) } @@ -461,38 +449,50 @@ impl TopLevelComposer { return Err("expect concrete class/type to be base class".into()); }; - // write to the class ancestors - class_ancestors.push(base_id); + // write to the class ancestors, make sure the uniqueness + if !class_ancestors.contains(&base_id) { + class_ancestors.push(base_id); + } else { + return Err("cannot specify the same base class twice".into()) + } } } Ok(()) } /// step 3, class fields and methods + // FIXME: need analyze base classes here + // FIXME: how to deal with self type + // FIXME: how to prevent cycles fn analyze_top_level_class_fields_methods(&mut self) -> Result<(), String> { - let mut def_list = self.definition_ast_list.write(); + let mut def_ast_list = self.definition_ast_list.write(); let converted_top_level = &self.to_top_level_context(); let primitives = &self.primitives; let to_be_analyzed_class = &mut self.to_be_analyzed_class; let unifier = &mut self.unifier; + + 'class: loop{ + if to_be_analyzed_class.is_empty() { break; } - while !to_be_analyzed_class.is_empty() { let class_ind = to_be_analyzed_class.remove(0).0; - let (class_name, class_body, classs_def) = { - let class_ast = def_list[class_ind].1.as_ref(); + let (class_name, class_body, class_resolver) = { + let (class_def, class_ast) = &mut def_ast_list[class_ind]; if let Some(ast::Located { node: ast::StmtKind::ClassDef { name, body, .. }, .. - }) = class_ast + }) = class_ast.as_ref() { - let class_def = def_list[class_ind].0; - (name, body, class_def) + if let TopLevelDef::Class { resolver, .. } = class_def.write().deref() { + (name, body, resolver.as_ref().unwrap().clone()) + } else { unreachable!() } } else { unreachable!("should be class def ast") } }; - let class_methods_parsing_result: Vec<(String, Type, DefinitionId)> = vec![]; - let class_fields_parsing_result: Vec<(String, Type)> = vec![]; + // need these vectors to check re-defining methods, class fields + // and store the parsed result in case some method cannot be typed for now + let mut class_methods_parsing_result: Vec<(String, Type, DefinitionId)> = vec![]; + let mut class_fields_parsing_result: Vec<(String, Type)> = vec![]; for b in class_body { if let ast::StmtKind::FunctionDef { args: method_args_ast, @@ -502,181 +502,193 @@ impl TopLevelComposer { .. } = &b.node { - let (class_def, method_def) = { - // unwrap should not fail - let method_ind = class_method_to_def_id - .get(&Self::name_mangling(class_name.into(), method_name)) - .unwrap() - .0; - - // split the def_list to two parts to get the - // mutable reference to both the method and the class - assert_ne!(method_ind, class_ind); - let min_ind = - (if method_ind > class_ind { class_ind } else { method_ind }) + 1; - let (head_slice, tail_slice) = def_list.split_at_mut(min_ind); - let (new_method_ind, new_class_ind) = ( - if method_ind >= min_ind { method_ind - min_ind } else { method_ind }, - if class_ind >= min_ind { class_ind - min_ind } else { class_ind }, - ); - if new_class_ind == class_ind { - (&mut head_slice[new_class_ind], &mut tail_slice[new_method_ind]) - } else { - (&mut tail_slice[new_class_ind], &mut head_slice[new_method_ind]) - } - }; - let (class_fields, class_methods, class_resolver) = { - if let TopLevelDef::Class { resolver, fields, methods, .. } = - class_def.0.get_mut() - { - (fields, methods, resolver) - } else { - unreachable!("must be class def here") - } - }; - - let arg_tys = method_args_ast - .args - .iter() - .map(|x| -> Result { - if x.node.arg != "self" { - let annotation = x + let arg_name_tys: Vec<(String, Type)> = { + let mut result = vec![]; + for a in &method_args_ast.args { + if a.node.arg != "self" { + let annotation = a .node .annotation .as_ref() .ok_or_else(|| { "type annotation for function parameter is needed".to_string() - })? - .as_ref(); + })?.as_ref(); let ty = - class_resolver.as_ref().unwrap().lock().parse_type_annotation( + class_resolver.as_ref().lock().parse_type_annotation( converted_top_level, unifier.borrow_mut(), primitives, annotation, )?; - Ok(ty) + if !Self::check_ty_analyzed(ty, unifier, to_be_analyzed_class) { + to_be_analyzed_class.push(DefinitionId(class_ind)); + continue 'class; + } + result.push((a.node.arg.to_string(), ty)); } else { // TODO: handle self, how unimplemented!() } + } + result + }; + + let method_type_var = + arg_name_tys + .iter() + .filter_map(|(_, ty)| { + let ty_enum = unifier.get_ty(*ty); + if let TypeEnum::TVar { id, .. } = ty_enum.as_ref() { + Some((*id, *ty)) + } else { None } }) - .collect::, _>>()?; + .collect::>(); - let ret_ty = if method_name != "__init__" { - method_returns_ast - .as_ref() - .map(|x| - class_resolver.as_ref().unwrap().lock().parse_type_annotation( - converted_top_level, - unifier.borrow_mut(), - primitives, - x.as_ref(), + let ret_ty = { + if method_name != "__init__" { + let ty = method_returns_ast + .as_ref() + .map(|x| + class_resolver.as_ref().lock().parse_type_annotation( + converted_top_level, + unifier.borrow_mut(), + primitives, + x.as_ref(), + ) ) - ) - .ok_or_else(|| "return type annotation needed".to_string())?? - } else { - // TODO: self type, how - unimplemented!() + .ok_or_else(|| "return type annotation error".to_string())??; + if !Self::check_ty_analyzed(ty, unifier, to_be_analyzed_class) { + to_be_analyzed_class.push(DefinitionId(class_ind)); + continue 'class; + } else { ty } + } else { + // TODO: __init__ function, self type, how + unimplemented!() + } }; // handle fields - if method_name == "__init__" { - for body in method_body_ast { - match &body.node { - ast::StmtKind::AnnAssign { - target, - annotation, - .. - } if { - if let ast::ExprKind::Attribute { - value, - attr, + let class_field_name_tys: Option> = + if method_name == "__init__" { + let mut result: Vec<(String, Type)> = vec![]; + for body in method_body_ast { + match &body.node { + ast::StmtKind::AnnAssign { + target, + annotation, .. - } = &target.node { - if let ast::ExprKind::Name {id, ..} = &value.node { - id == "self" + } if { + if let ast::ExprKind::Attribute { + value, .. + } = &target.node { + matches!( + &value.node, + ast::ExprKind::Name { id, .. } if id == "self") } else { false } - } else { false } - } => { - // TODO: record this field with its type - }, + } => { + let field_ty = class_resolver.as_ref().lock().parse_type_annotation( + converted_top_level, + unifier.borrow_mut(), + primitives, + annotation.as_ref())?; + if !Self::check_ty_analyzed(field_ty, unifier, to_be_analyzed_class) { + to_be_analyzed_class.push(DefinitionId(class_ind)); + continue 'class; + } else { + result.push(( + if let ast::ExprKind::Attribute { + attr, .. + } = &target.node { + attr.to_string() + } else { unreachable!() }, + field_ty + )) } + }, - // TODO: exclude those without type annotation - ast::StmtKind::Assign { - targets, - .. - } if { - if let ast::ExprKind::Attribute { - value, - attr, - .. - } = &targets[0].node { - if let ast::ExprKind::Name {id, ..} = &value.node { - id == "self" + // exclude those without type annotation + ast::StmtKind::Assign { + targets, .. + } if { + if let ast::ExprKind::Attribute { + value, .. + } = &targets[0].node { + matches!( + &value.node, + ast::ExprKind::Name {id, ..} if id == "self") } else { false } - } else { false } - } => { - unimplemented!() - }, + } => { + return Err("class fields type annotation needed".into()) + }, - // do nothing - _ => { } - } - } + // do nothing + _ => { } + } + }; + Some(result) + } else { None }; + + // current method all type ok, put the current method into the list + if class_methods_parsing_result + .iter() + .any(|(name, _, _)| name == method_name) { + return Err("duplicate method definition".into()) + } else { + class_methods_parsing_result.push(( + method_name.clone(), + unifier.add_ty(TypeEnum::TFunc(FunSignature { + ret: ret_ty, + args: arg_name_tys.into_iter().map(|(name, ty)| { + FuncArg { + name, + ty, + default_value: None + } + }).collect_vec(), + vars: method_type_var + }.into())), + *self.class_method_to_def_id.get(&Self::name_mangling(class_name.clone(), method_name)).unwrap() + )) } - let all_tys_ok = { - let ret_ty_iter = vec![ret_ty]; - let ret_ty_iter = ret_ty_iter.iter(); - let mut all_tys = chain!(arg_tys.iter(), ret_ty_iter); - all_tys.all(|x| { - let type_enum = unifier.get_ty(*x); - match type_enum.as_ref() { - TypeEnum::TObj { obj_id, .. } => { - !to_be_analyzed_class.contains(obj_id) - } - TypeEnum::TVirtual { ty } => { - if let TypeEnum::TObj { obj_id, .. } = - unifier.get_ty(*ty).as_ref() - { - !to_be_analyzed_class.contains(obj_id) - } else { - unreachable!() - } - } - TypeEnum::TVar { .. } => true, - _ => unreachable!(), - } - }) - }; - - if all_tys_ok { - // TODO: put related value to the `class_methods_parsing_result` - unimplemented!() - } else { - to_be_analyzed_class.push(DefinitionId(class_ind)); - // TODO: go to the next WHILE loop - unimplemented!() + // put the fiedlds inside + if let Some(class_field_name_tys) = class_field_name_tys { + assert!(class_fields_parsing_result.is_empty()); + class_fields_parsing_result.extend(class_field_name_tys); } } else { // what should we do with `class A: a = 3`? + // do nothing, continue the for loop to iterate class ast continue; } - } - - // TODO: now it should be confirmed that every + }; + + // now it should be confirmed that every // methods and fields of the class can be correctly typed, put the results - // into the actual def_list and the unifier - } + // into the actual class def method and fields field + let (class_def, _) = &def_ast_list[class_ind]; + let mut class_def = class_def.write(); + if let TopLevelDef::Class { fields, methods, .. } = class_def.deref_mut() { + for (ref n, ref t) in class_fields_parsing_result { + fields.push((n.clone(), *t)); + } + for (n, t, id) in &class_methods_parsing_result { + methods.push((n.clone(), *t, *id)); + } + } else { unreachable!() } + + // change the signature field of the class methods + for (_, ty, id) in &class_methods_parsing_result { + let (method_def, _) = &def_ast_list[id.0]; + let mut method_def = method_def.write(); + if let TopLevelDef::Function { signature, .. } = method_def.deref_mut() { + *signature = *ty; + } + } + }; Ok(()) } - fn analyze_top_level_inheritance(&mut self) -> Result<(), String> { - unimplemented!() - } - fn analyze_top_level_function(&mut self) -> Result<(), String> { unimplemented!() } @@ -684,4 +696,27 @@ impl TopLevelComposer { fn analyze_top_level_field_instantiation(&mut self) -> Result<(), String> { unimplemented!() } + + fn check_ty_analyzed(ty: Type, + unifier: &mut Unifier, + to_be_analyzed: &[DefinitionId]) -> bool + { + let type_enum = unifier.get_ty(ty); + match type_enum.as_ref() { + TypeEnum::TObj { obj_id, .. } => { + !to_be_analyzed.contains(obj_id) + } + TypeEnum::TVirtual { ty } => { + if let TypeEnum::TObj { obj_id, .. } = + unifier.get_ty(*ty).as_ref() + { + !to_be_analyzed.contains(obj_id) + } else { + unreachable!() + } + } + TypeEnum::TVar { .. } => true, + _ => unreachable!(), + } + } } From 6279dbb589614b61180eb18a7556e0017c6a6e0d Mon Sep 17 00:00:00 2001 From: ychenfo Date: Wed, 18 Aug 2021 16:33:50 +0800 Subject: [PATCH 125/131] formating --- nac3core/src/top_level.rs | 320 +++++++++--------- .../src/typecheck/type_inferencer/test.rs | 76 +++-- 2 files changed, 213 insertions(+), 183 deletions(-) diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index 06507437..ade98dd1 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -4,8 +4,8 @@ use std::{collections::HashMap, collections::HashSet, sync::Arc}; use super::typecheck::type_inferencer::PrimitiveStore; use super::typecheck::typedef::{SharedUnifier, Type, TypeEnum, Unifier}; -use crate::{symbol_resolver::SymbolResolver, typecheck::typedef::Mapping}; use crate::typecheck::typedef::{FunSignature, FuncArg}; +use crate::{symbol_resolver::SymbolResolver, typecheck::typedef::Mapping}; use itertools::Itertools; use parking_lot::{Mutex, RwLock}; use rustpython_parser::ast::{self, Stmt}; @@ -68,7 +68,6 @@ pub struct TopLevelContext { pub unifiers: Arc>>, } - pub struct TopLevelComposer { // list of top level definitions, same as top level context pub definition_ast_list: Arc>, Option>)>>>, @@ -84,12 +83,8 @@ pub struct TopLevelComposer { impl TopLevelComposer { pub fn to_top_level_context(&self) -> TopLevelContext { - let def_list = self - .definition_ast_list - .read() - .iter() - .map(|(x, _)| x.clone()) - .collect::>(); + let def_list = + self.definition_ast_list.read().iter().map(|(x, _)| x.clone()).collect::>(); TopLevelContext { definitions: RwLock::new(def_list).into(), // FIXME: all the big unifier or? @@ -151,8 +146,9 @@ impl TopLevelComposer { let composer = TopLevelComposer { definition_ast_list: RwLock::new( - top_level_def_list.into_iter().zip(ast_list).collect_vec() - ).into(), + top_level_def_list.into_iter().zip(ast_list).collect_vec(), + ) + .into(), primitives: primitives.0, unifier: primitives.1, class_method_to_def_id: Default::default(), @@ -217,16 +213,21 @@ impl TopLevelComposer { // since later when registering class method, ast will still be used, // here push None temporarly, later will move the ast inside let mut class_def_ast = ( - Arc::new(RwLock::new( - Self::make_top_level_class_def(class_def_id, resolver.clone()) - )), - None + Arc::new(RwLock::new(Self::make_top_level_class_def( + class_def_id, + resolver.clone(), + ))), + None, ); // parse class def body and register class methods into the def list. // module's symbol resolver would not know the name of the class methods, // thus cannot return their definition_id - let mut class_method_name_def_ids: Vec<(String, Arc>, DefinitionId)> = Vec::new(); + let mut class_method_name_def_ids: Vec<( + String, + Arc>, + DefinitionId, + )> = Vec::new(); let mut class_method_index_offset = 0; for b in body { if let ast::StmtKind::FunctionDef { name: method_name, .. } = &b.node { @@ -244,14 +245,15 @@ impl TopLevelComposer { method_name.clone(), self.primitives.none, resolver.clone(), - )).into(), - DefinitionId(method_def_id) + )) + .into(), + DefinitionId(method_def_id), )); } } // move the ast to the entry of the class in the ast_list class_def_ast.1 = Some(ast); - + // now class_def_ast and class_method_def_ast_ids are ok, put them into actual def list in correct order def_list.push(class_def_ast); for (name, def, id) in class_method_name_def_ids { @@ -260,13 +262,11 @@ impl TopLevelComposer { } // put the constructor into the def_list - def_list - .push(( - RwLock::new( - TopLevelDef::Initializer { class_id: DefinitionId(class_def_id) } - ).into(), - None - )); + def_list.push(( + RwLock::new(TopLevelDef::Initializer { class_id: DefinitionId(class_def_id) }) + .into(), + None, + )); // class, put its def_id into the to be analyzed set self.to_be_analyzed_class.push(DefinitionId(class_def_id)); @@ -279,9 +279,13 @@ impl TopLevelComposer { // add to the definition list def_list.push(( - RwLock::new(Self::make_top_level_function_def(name.into(), self.primitives.none, resolver)) - .into(), - Some(ast) + RwLock::new(Self::make_top_level_function_def( + name.into(), + self.primitives.none, + resolver, + )) + .into(), + Some(ast), )); // return @@ -299,8 +303,7 @@ impl TopLevelComposer { let primitives = &self.primitives; let unifier = &mut self.unifier; - for (class_def, class_ast) in def_list.iter_mut() - { + for (class_def, class_ast) in def_list.iter_mut() { // only deal with class def here let mut class_def = class_def.write(); let (class_bases_ast, class_def_type_vars, class_resolver) = { @@ -327,8 +330,8 @@ impl TopLevelComposer { // things like `class A(Generic[T, V, ImportedModule.T])` is not supported // i.e. only simple names are allowed in the subscript // should update the TopLevelDef::Class.typevars and the TypeEnum::TObj.params - ast::ExprKind::Subscript { value, slice, .. } - if matches!(&value.node, ast::ExprKind::Name { id, .. } if id == "Generic") => { + ast::ExprKind::Subscript { value, slice, .. } if matches!(&value.node, ast::ExprKind::Name { id, .. } if id == "Generic") => + { if !is_generic { is_generic = true; } else { @@ -370,13 +373,12 @@ impl TopLevelComposer { // `class A(Generic[T])` } else { - let ty = - class_resolver.parse_type_annotation( - converted_top_level, - unifier.borrow_mut(), - primitives, - &slice, - )?; + let ty = class_resolver.parse_type_annotation( + converted_top_level, + unifier.borrow_mut(), + primitives, + &slice, + )?; // check if it is type var let is_type_var = matches!(unifier.get_ty(ty).as_ref(), &TypeEnum::TVar { .. }); @@ -408,8 +410,7 @@ impl TopLevelComposer { let primitives = &self.primitives; let unifier = &mut self.unifier; - for (class_def, class_ast) in def_list.iter_mut() - { + for (class_def, class_ast) in def_list.iter_mut() { let mut class_def = class_def.write(); let (class_bases, class_ancestors, class_resolver) = { if let TopLevelDef::Class { ancestors, resolver, .. } = class_def.deref_mut() { @@ -453,7 +454,7 @@ impl TopLevelComposer { if !class_ancestors.contains(&base_id) { class_ancestors.push(base_id); } else { - return Err("cannot specify the same base class twice".into()) + return Err("cannot specify the same base class twice".into()); } } } @@ -470,9 +471,11 @@ impl TopLevelComposer { let primitives = &self.primitives; let to_be_analyzed_class = &mut self.to_be_analyzed_class; let unifier = &mut self.unifier; - - 'class: loop{ - if to_be_analyzed_class.is_empty() { break; } + + 'class: loop { + if to_be_analyzed_class.is_empty() { + break; + } let class_ind = to_be_analyzed_class.remove(0).0; let (class_name, class_body, class_resolver) = { @@ -483,7 +486,9 @@ impl TopLevelComposer { { if let TopLevelDef::Class { resolver, .. } = class_def.write().deref() { (name, body, resolver.as_ref().unwrap().clone()) - } else { unreachable!() } + } else { + unreachable!() + } } else { unreachable!("should be class def ast") } @@ -511,16 +516,17 @@ impl TopLevelComposer { .annotation .as_ref() .ok_or_else(|| { - "type annotation for function parameter is needed".to_string() - })?.as_ref(); - - let ty = - class_resolver.as_ref().lock().parse_type_annotation( - converted_top_level, - unifier.borrow_mut(), - primitives, - annotation, - )?; + "type annotation for function parameter is needed" + .to_string() + })? + .as_ref(); + + let ty = class_resolver.as_ref().lock().parse_type_annotation( + converted_top_level, + unifier.borrow_mut(), + primitives, + annotation, + )?; if !Self::check_ty_analyzed(ty, unifier, to_be_analyzed_class) { to_be_analyzed_class.push(DefinitionId(class_ind)); continue 'class; @@ -534,120 +540,137 @@ impl TopLevelComposer { result }; - let method_type_var = - arg_name_tys + let method_type_var = arg_name_tys .iter() .filter_map(|(_, ty)| { let ty_enum = unifier.get_ty(*ty); if let TypeEnum::TVar { id, .. } = ty_enum.as_ref() { Some((*id, *ty)) - } else { None } + } else { + None + } }) .collect::>(); - + let ret_ty = { if method_name != "__init__" { let ty = method_returns_ast - .as_ref() - .map(|x| - class_resolver.as_ref().lock().parse_type_annotation( - converted_top_level, - unifier.borrow_mut(), - primitives, - x.as_ref(), - ) - ) - .ok_or_else(|| "return type annotation error".to_string())??; + .as_ref() + .map(|x| { + class_resolver.as_ref().lock().parse_type_annotation( + converted_top_level, + unifier.borrow_mut(), + primitives, + x.as_ref(), + ) + }) + .ok_or_else(|| "return type annotation error".to_string())??; if !Self::check_ty_analyzed(ty, unifier, to_be_analyzed_class) { to_be_analyzed_class.push(DefinitionId(class_ind)); continue 'class; - } else { ty } + } else { + ty + } } else { // TODO: __init__ function, self type, how unimplemented!() } }; - + // handle fields - let class_field_name_tys: Option> = - if method_name == "__init__" { - let mut result: Vec<(String, Type)> = vec![]; - for body in method_body_ast { - match &body.node { - ast::StmtKind::AnnAssign { - target, - annotation, - .. - } if { - if let ast::ExprKind::Attribute { - value, .. - } = &target.node { + let class_field_name_tys: Option> = if method_name + == "__init__" + { + let mut result: Vec<(String, Type)> = vec![]; + for body in method_body_ast { + match &body.node { + ast::StmtKind::AnnAssign { target, annotation, .. } + if { + if let ast::ExprKind::Attribute { value, .. } = &target.node + { matches!( &value.node, ast::ExprKind::Name { id, .. } if id == "self") - } else { false } - } => { - let field_ty = class_resolver.as_ref().lock().parse_type_annotation( + } else { + false + } + } => + { + let field_ty = + class_resolver.as_ref().lock().parse_type_annotation( converted_top_level, unifier.borrow_mut(), primitives, - annotation.as_ref())?; - if !Self::check_ty_analyzed(field_ty, unifier, to_be_analyzed_class) { - to_be_analyzed_class.push(DefinitionId(class_ind)); - continue 'class; - } else { - result.push(( - if let ast::ExprKind::Attribute { - attr, .. - } = &target.node { - attr.to_string() - } else { unreachable!() }, - field_ty - )) } - }, + annotation.as_ref(), + )?; + if !Self::check_ty_analyzed( + field_ty, + unifier, + to_be_analyzed_class, + ) { + to_be_analyzed_class.push(DefinitionId(class_ind)); + continue 'class; + } else { + result.push(( + if let ast::ExprKind::Attribute { attr, .. } = + &target.node + { + attr.to_string() + } else { + unreachable!() + }, + field_ty, + )) + } + } - // exclude those without type annotation - ast::StmtKind::Assign { - targets, .. - } if { - if let ast::ExprKind::Attribute { - value, .. - } = &targets[0].node { + // exclude those without type annotation + ast::StmtKind::Assign { targets, .. } + if { + if let ast::ExprKind::Attribute { value, .. } = + &targets[0].node + { matches!( &value.node, ast::ExprKind::Name {id, ..} if id == "self") - } else { false } - } => { - return Err("class fields type annotation needed".into()) - }, - - // do nothing - _ => { } + } else { + false + } + } => + { + return Err("class fields type annotation needed".into()) } - }; - Some(result) - } else { None }; - + + // do nothing + _ => {} + } + } + Some(result) + } else { + None + }; + // current method all type ok, put the current method into the list - if class_methods_parsing_result - .iter() - .any(|(name, _, _)| name == method_name) { - return Err("duplicate method definition".into()) + if class_methods_parsing_result.iter().any(|(name, _, _)| name == method_name) { + return Err("duplicate method definition".into()); } else { class_methods_parsing_result.push(( method_name.clone(), - unifier.add_ty(TypeEnum::TFunc(FunSignature { - ret: ret_ty, - args: arg_name_tys.into_iter().map(|(name, ty)| { - FuncArg { - name, - ty, - default_value: None - } - }).collect_vec(), - vars: method_type_var - }.into())), - *self.class_method_to_def_id.get(&Self::name_mangling(class_name.clone(), method_name)).unwrap() + unifier.add_ty(TypeEnum::TFunc( + FunSignature { + ret: ret_ty, + args: arg_name_tys + .into_iter() + .map(|(name, ty)| FuncArg { name, ty, default_value: None }) + .collect_vec(), + vars: method_type_var, + } + .into(), + )), + *self + .class_method_to_def_id + .get(&Self::name_mangling(class_name.clone(), method_name)) + .unwrap(), )) } @@ -661,8 +684,8 @@ impl TopLevelComposer { // do nothing, continue the for loop to iterate class ast continue; } - }; - + } + // now it should be confirmed that every // methods and fields of the class can be correctly typed, put the results // into the actual class def method and fields field @@ -675,8 +698,10 @@ impl TopLevelComposer { for (n, t, id) in &class_methods_parsing_result { methods.push((n.clone(), *t, *id)); } - } else { unreachable!() } - + } else { + unreachable!() + } + // change the signature field of the class methods for (_, ty, id) in &class_methods_parsing_result { let (method_def, _) = &def_ast_list[id.0]; @@ -685,7 +710,7 @@ impl TopLevelComposer { *signature = *ty; } } - }; + } Ok(()) } @@ -697,19 +722,12 @@ impl TopLevelComposer { unimplemented!() } - fn check_ty_analyzed(ty: Type, - unifier: &mut Unifier, - to_be_analyzed: &[DefinitionId]) -> bool - { + fn check_ty_analyzed(ty: Type, unifier: &mut Unifier, to_be_analyzed: &[DefinitionId]) -> bool { let type_enum = unifier.get_ty(ty); match type_enum.as_ref() { - TypeEnum::TObj { obj_id, .. } => { - !to_be_analyzed.contains(obj_id) - } + TypeEnum::TObj { obj_id, .. } => !to_be_analyzed.contains(obj_id), TypeEnum::TVirtual { ty } => { - if let TypeEnum::TObj { obj_id, .. } = - unifier.get_ty(*ty).as_ref() - { + if let TypeEnum::TObj { obj_id, .. } = unifier.get_ty(*ty).as_ref() { !to_be_analyzed.contains(obj_id) } else { unreachable!() diff --git a/nac3core/src/typecheck/type_inferencer/test.rs b/nac3core/src/typecheck/type_inferencer/test.rs index ca1ee78c..6952ef1e 100644 --- a/nac3core/src/typecheck/type_inferencer/test.rs +++ b/nac3core/src/typecheck/type_inferencer/test.rs @@ -146,14 +146,17 @@ impl TestEnvironment { }); identifier_mapping.insert("None".into(), none); for i in 0..5 { - top_level_defs.push(RwLock::new(TopLevelDef::Class { - object_id: DefinitionId(i), - type_vars: Default::default(), - fields: Default::default(), - methods: Default::default(), - ancestors: Default::default(), - resolver: None, - }).into()); + top_level_defs.push( + RwLock::new(TopLevelDef::Class { + object_id: DefinitionId(i), + type_vars: Default::default(), + fields: Default::default(), + methods: Default::default(), + ancestors: Default::default(), + resolver: None, + }) + .into(), + ); } let primitives = PrimitiveStore { int32, int64, float, bool, none }; @@ -165,14 +168,17 @@ impl TestEnvironment { fields: [("a".into(), v0)].iter().cloned().collect::>().into(), params: [(id, v0)].iter().cloned().collect::>().into(), }); - top_level_defs.push(RwLock::new(TopLevelDef::Class { - object_id: DefinitionId(5), - type_vars: vec![v0], - fields: [("a".into(), v0)].into(), - methods: Default::default(), - ancestors: Default::default(), - resolver: None, - }).into()); + top_level_defs.push( + RwLock::new(TopLevelDef::Class { + object_id: DefinitionId(5), + type_vars: vec![v0], + fields: [("a".into(), v0)].into(), + methods: Default::default(), + ancestors: Default::default(), + resolver: None, + }) + .into(), + ); identifier_mapping.insert( "Foo".into(), @@ -198,14 +204,17 @@ impl TestEnvironment { .into(), params: Default::default(), }); - top_level_defs.push(RwLock::new(TopLevelDef::Class { - object_id: DefinitionId(6), - type_vars: Default::default(), - fields: [("a".into(), int32), ("b".into(), fun)].into(), - methods: Default::default(), - ancestors: Default::default(), - resolver: None, - }).into()); + top_level_defs.push( + RwLock::new(TopLevelDef::Class { + object_id: DefinitionId(6), + type_vars: Default::default(), + fields: [("a".into(), int32), ("b".into(), fun)].into(), + methods: Default::default(), + ancestors: Default::default(), + resolver: None, + }) + .into(), + ); identifier_mapping.insert( "Bar".into(), unifier.add_ty(TypeEnum::TFunc( @@ -222,14 +231,17 @@ impl TestEnvironment { .into(), params: Default::default(), }); - top_level_defs.push(RwLock::new(TopLevelDef::Class { - object_id: DefinitionId(7), - type_vars: Default::default(), - fields: [("a".into(), bool), ("b".into(), fun)].into(), - methods: Default::default(), - ancestors: Default::default(), - resolver: None, - }).into()); + top_level_defs.push( + RwLock::new(TopLevelDef::Class { + object_id: DefinitionId(7), + type_vars: Default::default(), + fields: [("a".into(), bool), ("b".into(), fun)].into(), + methods: Default::default(), + ancestors: Default::default(), + resolver: None, + }) + .into(), + ); identifier_mapping.insert( "Bar2".into(), unifier.add_ty(TypeEnum::TFunc( From 9cb07e6f04a281158ad931e20c95953f2b5cea06 Mon Sep 17 00:00:00 2001 From: ychenfo Date: Wed, 18 Aug 2021 17:32:55 +0800 Subject: [PATCH 126/131] start to handle base inheritance methods, fields --- nac3core/src/top_level.rs | 51 +++++++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 10 deletions(-) diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index ade98dd1..681700be 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -10,7 +10,7 @@ use itertools::Itertools; use parking_lot::{Mutex, RwLock}; use rustpython_parser::ast::{self, Stmt}; -#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy)] +#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)] pub struct DefinitionId(pub usize); pub enum TopLevelDef { @@ -462,9 +462,9 @@ impl TopLevelComposer { } /// step 3, class fields and methods - // FIXME: need analyze base classes here - // FIXME: how to deal with self type - // FIXME: how to prevent cycles + // FIXME: analyze base classes here + // FIXME: deal with self type + // NOTE: prevent cycles only roughly done fn analyze_top_level_class_fields_methods(&mut self) -> Result<(), String> { let mut def_ast_list = self.definition_ast_list.write(); let converted_top_level = &self.to_top_level_context(); @@ -472,20 +472,27 @@ impl TopLevelComposer { let to_be_analyzed_class = &mut self.to_be_analyzed_class; let unifier = &mut self.unifier; + // NOTE: roughly prevent infinite loop + let mut max_iter = to_be_analyzed_class.len() * 4; 'class: loop { - if to_be_analyzed_class.is_empty() { + if to_be_analyzed_class.is_empty() && { max_iter -= 1; max_iter > 0 } { break; } let class_ind = to_be_analyzed_class.remove(0).0; - let (class_name, class_body, class_resolver) = { + let (class_name, + class_body_ast, + class_bases_ast, + class_resolver, + class_ancestors + ) = { let (class_def, class_ast) = &mut def_ast_list[class_ind]; if let Some(ast::Located { - node: ast::StmtKind::ClassDef { name, body, .. }, .. + node: ast::StmtKind::ClassDef { name, body, bases, .. }, .. }) = class_ast.as_ref() { - if let TopLevelDef::Class { resolver, .. } = class_def.write().deref() { - (name, body, resolver.as_ref().unwrap().clone()) + if let TopLevelDef::Class { resolver, ancestors, .. } = class_def.write().deref() { + (name, body, bases, resolver.as_ref().unwrap().clone(), ancestors.clone()) } else { unreachable!() } @@ -494,11 +501,35 @@ impl TopLevelComposer { } }; + let all_base_class_analyzed = { + let not_yet_analyzed = to_be_analyzed_class.clone().into_iter().collect::>(); + let base = class_ancestors.clone().into_iter().collect::>(); + let intersection = not_yet_analyzed.intersection(&base).collect_vec(); + intersection.is_empty() + }; + if !all_base_class_analyzed { + to_be_analyzed_class.push(DefinitionId(class_ind)); + continue 'class; + } + + // get the bases type, can directly do this since it + // already pass the check in the previous stages + let class_bases_ty = class_bases_ast + .iter() + .filter_map(|x| { + class_resolver.as_ref().lock().parse_type_annotation( + converted_top_level, + unifier.borrow_mut(), + primitives, + x).ok() + }) + .collect_vec(); + // need these vectors to check re-defining methods, class fields // and store the parsed result in case some method cannot be typed for now let mut class_methods_parsing_result: Vec<(String, Type, DefinitionId)> = vec![]; let mut class_fields_parsing_result: Vec<(String, Type)> = vec![]; - for b in class_body { + for b in class_body_ast { if let ast::StmtKind::FunctionDef { args: method_args_ast, body: method_body_ast, From 4b38fe66a2bdee560149bade5b2e311791e21444 Mon Sep 17 00:00:00 2001 From: ychenfo Date: Wed, 18 Aug 2021 17:33:48 +0800 Subject: [PATCH 127/131] format --- nac3core/src/top_level.rs | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/nac3core/src/top_level.rs b/nac3core/src/top_level.rs index 681700be..8560cfb3 100644 --- a/nac3core/src/top_level.rs +++ b/nac3core/src/top_level.rs @@ -475,23 +475,24 @@ impl TopLevelComposer { // NOTE: roughly prevent infinite loop let mut max_iter = to_be_analyzed_class.len() * 4; 'class: loop { - if to_be_analyzed_class.is_empty() && { max_iter -= 1; max_iter > 0 } { + if to_be_analyzed_class.is_empty() && { + max_iter -= 1; + max_iter > 0 + } { break; } let class_ind = to_be_analyzed_class.remove(0).0; - let (class_name, - class_body_ast, - class_bases_ast, - class_resolver, - class_ancestors - ) = { + let (class_name, class_body_ast, class_bases_ast, class_resolver, class_ancestors) = { let (class_def, class_ast) = &mut def_ast_list[class_ind]; if let Some(ast::Located { - node: ast::StmtKind::ClassDef { name, body, bases, .. }, .. + node: ast::StmtKind::ClassDef { name, body, bases, .. }, + .. }) = class_ast.as_ref() { - if let TopLevelDef::Class { resolver, ancestors, .. } = class_def.write().deref() { + if let TopLevelDef::Class { resolver, ancestors, .. } = + class_def.write().deref() + { (name, body, bases, resolver.as_ref().unwrap().clone(), ancestors.clone()) } else { unreachable!() @@ -502,7 +503,8 @@ impl TopLevelComposer { }; let all_base_class_analyzed = { - let not_yet_analyzed = to_be_analyzed_class.clone().into_iter().collect::>(); + let not_yet_analyzed = + to_be_analyzed_class.clone().into_iter().collect::>(); let base = class_ancestors.clone().into_iter().collect::>(); let intersection = not_yet_analyzed.intersection(&base).collect_vec(); intersection.is_empty() @@ -517,11 +519,16 @@ impl TopLevelComposer { let class_bases_ty = class_bases_ast .iter() .filter_map(|x| { - class_resolver.as_ref().lock().parse_type_annotation( - converted_top_level, - unifier.borrow_mut(), - primitives, - x).ok() + class_resolver + .as_ref() + .lock() + .parse_type_annotation( + converted_top_level, + unifier.borrow_mut(), + primitives, + x, + ) + .ok() }) .collect_vec(); From f8a697e3d455cb9935271329041a005217f6c00c Mon Sep 17 00:00:00 2001 From: Sebastien Bourdeauducq Date: Thu, 19 Aug 2021 11:14:35 +0800 Subject: [PATCH 128/131] switch to LLVM 11 --- Cargo.lock | 53 ++++++++++++++++++++++++++++++++++++++++++--- nac3core/Cargo.toml | 3 +-- shell.nix | 2 +- 3 files changed, 52 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ff33e7cf..1d50ff1a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -311,7 +311,8 @@ dependencies = [ "either", "inkwell_internals", "libc", - "llvm-sys", + "llvm-sys 100.2.1", + "llvm-sys 110.0.1", "once_cell", "parking_lot", "regex", @@ -421,7 +422,20 @@ dependencies = [ "lazy_static", "libc", "regex", - "semver", + "semver 0.9.0", +] + +[[package]] +name = "llvm-sys" +version = "110.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21ede189444b8c78907e5d36da5dabcf153170fcff9c1dba48afc4b33c7e19f0" +dependencies = [ + "cc", + "lazy_static", + "libc", + "regex", + "semver 0.11.0", ] [[package]] @@ -599,6 +613,15 @@ dependencies = [ "proc-macro-hack", ] +[[package]] +name = "pest" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10f4872ae94d7b90ae48754df22fd42ad52ce740b8f370b03da4835417403e53" +dependencies = [ + "ucd-trie", +] + [[package]] name = "petgraph" version = "0.5.1" @@ -889,7 +912,16 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" dependencies = [ - "semver-parser", + "semver-parser 0.7.0", +] + +[[package]] +name = "semver" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f301af10236f6df4160f7c3f04eec6dbc70ace82d23326abad5edee88801c6b6" +dependencies = [ + "semver-parser 0.10.2", ] [[package]] @@ -898,6 +930,15 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" +[[package]] +name = "semver-parser" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0bef5b7f9e0df16536d3961cfb6e84331c065b4066afb39768d0e319411f7" +dependencies = [ + "pest", +] + [[package]] name = "siphasher" version = "0.3.5" @@ -966,6 +1007,12 @@ dependencies = [ "crunchy", ] +[[package]] +name = "ucd-trie" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56dee185309b50d1f11bfedef0fe6d036842e3fb77413abef29f8f8d1c5d4c1c" + [[package]] name = "unic-char-property" version = "0.9.0" diff --git a/nac3core/Cargo.toml b/nac3core/Cargo.toml index c0581cc1..bbd553e4 100644 --- a/nac3core/Cargo.toml +++ b/nac3core/Cargo.toml @@ -7,7 +7,7 @@ edition = "2018" [dependencies] num-bigint = "0.3" num-traits = "0.2" -inkwell = { git = "https://github.com/TheDan64/inkwell", branch = "master", features = ["llvm10-0"] } +inkwell = { git = "https://github.com/TheDan64/inkwell", branch = "master", features = ["llvm11-0"] } rustpython-parser = { git = "https://github.com/RustPython/RustPython", branch = "master" } itertools = "0.10.1" crossbeam = "0.8.1" @@ -17,4 +17,3 @@ rayon = "1.5.1" [dev-dependencies] test-case = "1.2.0" indoc = "1.0" - diff --git a/shell.nix b/shell.nix index 858e68b9..35be2055 100644 --- a/shell.nix +++ b/shell.nix @@ -4,6 +4,6 @@ in pkgs.stdenv.mkDerivation { name = "nac3-env"; buildInputs = with pkgs; [ - llvm_10 clang_10 cargo rustc libffi libxml2 clippy + llvm_11 clang_11 cargo rustc libffi libxml2 clippy ]; } From c238c264e7253e18b4b72ebdd926e404730cead7 Mon Sep 17 00:00:00 2001 From: ychenfo Date: Thu, 19 Aug 2021 11:18:58 +0800 Subject: [PATCH 129/131] add type vars to the primitive binop function def --- nac3core/src/typecheck/magic_methods.rs | 33 +++++++++++++------------ 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/nac3core/src/typecheck/magic_methods.rs b/nac3core/src/typecheck/magic_methods.rs index 7a9fbd6d..30e3c753 100644 --- a/nac3core/src/typecheck/magic_methods.rs +++ b/nac3core/src/typecheck/magic_methods.rs @@ -66,26 +66,32 @@ pub fn comparison_name(op: &Cmpop) -> Option<&'static str> { pub fn impl_binop( unifier: &mut Unifier, - _store: &PrimitiveStore, + store: &PrimitiveStore, ty: Type, other_ty: &[Type], ret_ty: Type, ops: &[ast::Operator], ) { if let TypeEnum::TObj { fields, .. } = unifier.get_ty(ty).borrow() { + let (other_ty, other_var_id) = if other_ty.len() == 1 { + (other_ty[0], None) + } else { + let (ty, var_id) = unifier.get_fresh_var_with_range(other_ty); + (ty, Some(var_id)) + }; + let function_vars = if let Some(var_id) = other_var_id { + vec![(var_id, other_ty)].into_iter().collect::>() + } else { + HashMap::new() + }; for op in ops { fields.borrow_mut().insert(binop_name(op).into(), { - let other = if other_ty.len() == 1 { - other_ty[0] - } else { - unifier.get_fresh_var_with_range(other_ty).0 - }; unifier.add_ty(TypeEnum::TFunc( FunSignature { ret: ret_ty, - vars: HashMap::new(), + vars: function_vars.clone(), args: vec![FuncArg { - ty: other, + ty: other_ty, default_value: None, name: "other".into(), }], @@ -95,17 +101,12 @@ pub fn impl_binop( }); fields.borrow_mut().insert(binop_assign_name(op).into(), { - let other = if other_ty.len() == 1 { - other_ty[0] - } else { - unifier.get_fresh_var_with_range(other_ty).0 - }; unifier.add_ty(TypeEnum::TFunc( FunSignature { - ret: ret_ty, - vars: HashMap::new(), + ret: store.none, + vars: function_vars.clone(), args: vec![FuncArg { - ty: other, + ty: other_ty, default_value: None, name: "other".into(), }], From 6e424a6a3eb4754ccd156e14ebce8580cd115675 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Thu, 19 Aug 2021 11:32:22 +0800 Subject: [PATCH 130/131] fixed codegen test --- nac3core/src/codegen/test.rs | 64 +++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 30 deletions(-) diff --git a/nac3core/src/codegen/test.rs b/nac3core/src/codegen/test.rs index e56c7b45..72ea9a75 100644 --- a/nac3core/src/codegen/test.rs +++ b/nac3core/src/codegen/test.rs @@ -151,6 +151,8 @@ fn test_primitives() { }; let mut inferencer = env.get_inferencer(); + inferencer.variable_mapping.insert("a".into(), inferencer.primitives.int32); + inferencer.variable_mapping.insert("b".into(), inferencer.primitives.int32); let source = indoc! { " c = a + b d = a if c == 1 else 0 @@ -163,6 +165,8 @@ fn test_primitives() { .map(|v| inferencer.fold_stmt(v)) .collect::, _>>() .unwrap(); + let mut identifiers = vec!["a".to_string(), "b".to_string()]; + inferencer.check_block(&statements, &mut identifiers).unwrap(); let top_level = Arc::new(TopLevelContext { definitions: Default::default(), @@ -198,41 +202,41 @@ fn test_primitives() { // after O2 optimization let expected = indoc! {" - ; ModuleID = 'test' - source_filename = \"test\" + ; ModuleID = 'test' + source_filename = \"test\" - define i32 @testing(i32 %0, i32 %1) { - init: - %a = alloca i32 - store i32 %0, i32* %a - %b = alloca i32 - store i32 %1, i32* %b - %tmp = alloca i32 - %tmp4 = alloca i32 - br label %body + define i32 @testing(i32 %0, i32 %1) { + init: + %a = alloca i32, align 4 + store i32 %0, i32* %a, align 4 + %b = alloca i32, align 4 + store i32 %1, i32* %b, align 4 + %tmp = alloca i32, align 4 + %tmp4 = alloca i32, align 4 + br label %body - body: ; preds = %init - %load = load i32, i32* %a - %load1 = load i32, i32* %b - %add = add i32 %load, %load1 - store i32 %add, i32* %tmp - %load2 = load i32, i32* %tmp - %cmp = icmp eq i32 %load2, 1 - br i1 %cmp, label %then, label %else + body: ; preds = %init + %load = load i32, i32* %a, align 4 + %load1 = load i32, i32* %b, align 4 + %add = add i32 %load, %load1 + store i32 %add, i32* %tmp, align 4 + %load2 = load i32, i32* %tmp, align 4 + %cmp = icmp eq i32 %load2, 1 + br i1 %cmp, label %then, label %else - then: ; preds = %body - %load3 = load i32, i32* %a - br label %cont + then: ; preds = %body + %load3 = load i32, i32* %a, align 4 + br label %cont - else: ; preds = %body - br label %cont + else: ; preds = %body + br label %cont - cont: ; preds = %else, %then - %ifexpr = phi i32 [ %load3, %then ], [ 0, %else ] - store i32 %ifexpr, i32* %tmp4 - %load5 = load i32, i32* %tmp4 - ret i32 %load5 - } + cont: ; preds = %else, %then + %ifexpr = phi i32 [ %load3, %then ], [ 0, %else ] + store i32 %ifexpr, i32* %tmp4, align 4 + %load5 = load i32, i32* %tmp4, align 4 + ret i32 %load5 + } "} .trim(); assert_eq!(expected, module.print_to_string().to_str().unwrap().trim()); From d1215bf5acef353dbf715cc6ad2b5a0c9e7f5bf0 Mon Sep 17 00:00:00 2001 From: pca006132 Date: Thu, 19 Aug 2021 11:45:33 +0800 Subject: [PATCH 131/131] nac3core/codegen/expr: fixed typo --- nac3core/src/codegen/expr.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nac3core/src/codegen/expr.rs b/nac3core/src/codegen/expr.rs index 59bb0430..81a2161b 100644 --- a/nac3core/src/codegen/expr.rs +++ b/nac3core/src/codegen/expr.rs @@ -372,7 +372,7 @@ impl<'ctx, 'a> CodeGenContext<'ctx, 'a> { } ExprKind::BinOp { op, left, right } => { let ty1 = self.unifier.get_representative(left.custom.unwrap()); - let ty2 = self.unifier.get_representative(left.custom.unwrap()); + let ty2 = self.unifier.get_representative(right.custom.unwrap()); let left = self.gen_expr(left); let right = self.gen_expr(right);