diff --git a/data/image/no/image1.jpeg b/data/image/combined/image1.jpeg similarity index 100% rename from data/image/no/image1.jpeg rename to data/image/combined/image1.jpeg diff --git a/data/image/no/image10.jpg b/data/image/combined/image10.jpg similarity index 100% rename from data/image/no/image10.jpg rename to data/image/combined/image10.jpg diff --git a/data/image/combined/image100.jpg b/data/image/combined/image100.jpg new file mode 100755 index 0000000..aba41ac Binary files /dev/null and b/data/image/combined/image100.jpg differ diff --git a/data/image/combined/image101.jpg b/data/image/combined/image101.jpg new file mode 100755 index 0000000..754a902 Binary files /dev/null and b/data/image/combined/image101.jpg differ diff --git a/data/image/combined/image102.jpg b/data/image/combined/image102.jpg new file mode 100755 index 0000000..3b6db0d Binary files /dev/null and b/data/image/combined/image102.jpg differ diff --git a/data/image/combined/image103.jpg b/data/image/combined/image103.jpg new file mode 100755 index 0000000..d0a3156 Binary files /dev/null and b/data/image/combined/image103.jpg differ diff --git a/data/image/combined/image104.jpg b/data/image/combined/image104.jpg new file mode 100755 index 0000000..756cbd1 Binary files /dev/null and b/data/image/combined/image104.jpg differ diff --git a/data/image/combined/image105.jpg b/data/image/combined/image105.jpg new file mode 100755 index 0000000..6d2eeba Binary files /dev/null and b/data/image/combined/image105.jpg differ diff --git a/data/image/combined/image106.jpg b/data/image/combined/image106.jpg new file mode 100755 index 0000000..de1f187 Binary files /dev/null and b/data/image/combined/image106.jpg differ diff --git a/data/image/combined/image107.png b/data/image/combined/image107.png new file mode 100755 index 0000000..05fdc05 Binary files /dev/null and b/data/image/combined/image107.png differ diff --git a/data/image/combined/image108.jpg b/data/image/combined/image108.jpg new file mode 100755 index 0000000..f3e56a0 Binary files /dev/null and b/data/image/combined/image108.jpg differ diff --git a/data/image/combined/image109.jpg b/data/image/combined/image109.jpg new file mode 100755 index 0000000..0e7db19 Binary files /dev/null and b/data/image/combined/image109.jpg differ diff --git a/data/image/no/image48.jpg b/data/image/combined/image11.jpg similarity index 100% rename from data/image/no/image48.jpg rename to data/image/combined/image11.jpg diff --git a/data/image/combined/image110.jpg b/data/image/combined/image110.jpg new file mode 100755 index 0000000..19b3a1c Binary files /dev/null and b/data/image/combined/image110.jpg differ diff --git a/data/image/combined/image111.jpg b/data/image/combined/image111.jpg new file mode 100755 index 0000000..2fc8d46 Binary files /dev/null and b/data/image/combined/image111.jpg differ diff --git a/data/image/combined/image112.jpg b/data/image/combined/image112.jpg new file mode 100755 index 0000000..38c140c Binary files /dev/null and b/data/image/combined/image112.jpg differ diff --git a/data/image/combined/image113.JPG b/data/image/combined/image113.JPG new file mode 100755 index 0000000..f0eef62 Binary files /dev/null and b/data/image/combined/image113.JPG differ diff --git a/data/image/combined/image114.jpg b/data/image/combined/image114.jpg new file mode 100755 index 0000000..9358481 Binary files /dev/null and b/data/image/combined/image114.jpg differ diff --git a/data/image/combined/image115.JPG b/data/image/combined/image115.JPG new file mode 100755 index 0000000..ab0a3e4 Binary files /dev/null and b/data/image/combined/image115.JPG differ diff --git a/data/image/combined/image116.JPG b/data/image/combined/image116.JPG new file mode 100755 index 0000000..c950d36 Binary files /dev/null and b/data/image/combined/image116.JPG differ diff --git a/data/image/combined/image117.jpg b/data/image/combined/image117.jpg new file mode 100755 index 0000000..a4db55d Binary files /dev/null and b/data/image/combined/image117.jpg differ diff --git a/data/image/combined/image118.jpg b/data/image/combined/image118.jpg new file mode 100755 index 0000000..f023a76 Binary files /dev/null and b/data/image/combined/image118.jpg differ diff --git a/data/image/combined/image119.jpg b/data/image/combined/image119.jpg new file mode 100755 index 0000000..0d46dbd Binary files /dev/null and b/data/image/combined/image119.jpg differ diff --git a/data/image/no/image49.jpg b/data/image/combined/image12.jpg similarity index 100% rename from data/image/no/image49.jpg rename to data/image/combined/image12.jpg diff --git a/data/image/combined/image120.JPG b/data/image/combined/image120.JPG new file mode 100755 index 0000000..bd33641 Binary files /dev/null and b/data/image/combined/image120.JPG differ diff --git a/data/image/combined/image121.jpg b/data/image/combined/image121.jpg new file mode 100755 index 0000000..e492883 Binary files /dev/null and b/data/image/combined/image121.jpg differ diff --git a/data/image/combined/image122.jpg b/data/image/combined/image122.jpg new file mode 100755 index 0000000..280fd68 Binary files /dev/null and b/data/image/combined/image122.jpg differ diff --git a/data/image/combined/image123.jpg b/data/image/combined/image123.jpg new file mode 100755 index 0000000..bc72451 Binary files /dev/null and b/data/image/combined/image123.jpg differ diff --git a/data/image/combined/image124.jpg b/data/image/combined/image124.jpg new file mode 100755 index 0000000..87832a2 Binary files /dev/null and b/data/image/combined/image124.jpg differ diff --git a/data/image/combined/image125.jpg b/data/image/combined/image125.jpg new file mode 100755 index 0000000..14e1152 Binary files /dev/null and b/data/image/combined/image125.jpg differ diff --git a/data/image/combined/image126.jpg b/data/image/combined/image126.jpg new file mode 100755 index 0000000..82943f7 Binary files /dev/null and b/data/image/combined/image126.jpg differ diff --git a/data/image/combined/image127.jpg b/data/image/combined/image127.jpg new file mode 100755 index 0000000..e324779 Binary files /dev/null and b/data/image/combined/image127.jpg differ diff --git a/data/image/combined/image128.jpg b/data/image/combined/image128.jpg new file mode 100755 index 0000000..d9ca35e Binary files /dev/null and b/data/image/combined/image128.jpg differ diff --git a/data/image/combined/image129.jpg b/data/image/combined/image129.jpg new file mode 100755 index 0000000..1324e1f Binary files /dev/null and b/data/image/combined/image129.jpg differ diff --git a/data/image/no/image58.jpg b/data/image/combined/image13.jpg similarity index 100% rename from data/image/no/image58.jpg rename to data/image/combined/image13.jpg diff --git a/data/image/combined/image130.jpg b/data/image/combined/image130.jpg new file mode 100755 index 0000000..4caa390 Binary files /dev/null and b/data/image/combined/image130.jpg differ diff --git a/data/image/combined/image131.jpg b/data/image/combined/image131.jpg new file mode 100755 index 0000000..48a8754 Binary files /dev/null and b/data/image/combined/image131.jpg differ diff --git a/data/image/combined/image132.jpg b/data/image/combined/image132.jpg new file mode 100755 index 0000000..b2051e4 Binary files /dev/null and b/data/image/combined/image132.jpg differ diff --git a/data/image/combined/image133.JPG b/data/image/combined/image133.JPG new file mode 100755 index 0000000..e685ddf Binary files /dev/null and b/data/image/combined/image133.JPG differ diff --git a/data/image/combined/image134.jpg b/data/image/combined/image134.jpg new file mode 100755 index 0000000..d7b8f39 Binary files /dev/null and b/data/image/combined/image134.jpg differ diff --git a/data/image/combined/image135.jpg b/data/image/combined/image135.jpg new file mode 100755 index 0000000..0f0db0b Binary files /dev/null and b/data/image/combined/image135.jpg differ diff --git a/data/image/combined/image136.jpg b/data/image/combined/image136.jpg new file mode 100755 index 0000000..764bfb2 Binary files /dev/null and b/data/image/combined/image136.jpg differ diff --git a/data/image/combined/image137.JPG b/data/image/combined/image137.JPG new file mode 100755 index 0000000..a7a2a7c Binary files /dev/null and b/data/image/combined/image137.JPG differ diff --git a/data/image/combined/image138.jpg b/data/image/combined/image138.jpg new file mode 100755 index 0000000..7b0f027 Binary files /dev/null and b/data/image/combined/image138.jpg differ diff --git a/data/image/combined/image139.jpg b/data/image/combined/image139.jpg new file mode 100755 index 0000000..3a32081 Binary files /dev/null and b/data/image/combined/image139.jpg differ diff --git a/data/image/no/image59.JPG b/data/image/combined/image14.jpg similarity index 100% rename from data/image/no/image59.JPG rename to data/image/combined/image14.jpg diff --git a/data/image/combined/image140.JPG b/data/image/combined/image140.JPG new file mode 100755 index 0000000..fc8e22d Binary files /dev/null and b/data/image/combined/image140.JPG differ diff --git a/data/image/combined/image141.JPG b/data/image/combined/image141.JPG new file mode 100755 index 0000000..76d9be2 Binary files /dev/null and b/data/image/combined/image141.JPG differ diff --git a/data/image/combined/image142.jpg b/data/image/combined/image142.jpg new file mode 100755 index 0000000..f0f4baf Binary files /dev/null and b/data/image/combined/image142.jpg differ diff --git a/data/image/combined/image143.JPG b/data/image/combined/image143.JPG new file mode 100755 index 0000000..e492883 Binary files /dev/null and b/data/image/combined/image143.JPG differ diff --git a/data/image/combined/image144.JPG b/data/image/combined/image144.JPG new file mode 100755 index 0000000..5d6725d Binary files /dev/null and b/data/image/combined/image144.JPG differ diff --git a/data/image/combined/image145.JPG b/data/image/combined/image145.JPG new file mode 100755 index 0000000..5fbfe5c Binary files /dev/null and b/data/image/combined/image145.JPG differ diff --git a/data/image/combined/image146.jpg b/data/image/combined/image146.jpg new file mode 100755 index 0000000..3202ad9 Binary files /dev/null and b/data/image/combined/image146.jpg differ diff --git a/data/image/combined/image147.jpg b/data/image/combined/image147.jpg new file mode 100755 index 0000000..af5768c Binary files /dev/null and b/data/image/combined/image147.jpg differ diff --git a/data/image/combined/image148.jpg b/data/image/combined/image148.jpg new file mode 100755 index 0000000..147dd02 Binary files /dev/null and b/data/image/combined/image148.jpg differ diff --git a/data/image/combined/image149.jpg b/data/image/combined/image149.jpg new file mode 100755 index 0000000..6646dd3 Binary files /dev/null and b/data/image/combined/image149.jpg differ diff --git a/data/image/no/image60.JPG b/data/image/combined/image15.jpg similarity index 100% rename from data/image/no/image60.JPG rename to data/image/combined/image15.jpg diff --git a/data/image/combined/image150.jpg b/data/image/combined/image150.jpg new file mode 100755 index 0000000..059d0e5 Binary files /dev/null and b/data/image/combined/image150.jpg differ diff --git a/data/image/combined/image151.jpg b/data/image/combined/image151.jpg new file mode 100755 index 0000000..60dbd8b Binary files /dev/null and b/data/image/combined/image151.jpg differ diff --git a/data/image/combined/image152.JPG b/data/image/combined/image152.JPG new file mode 100755 index 0000000..eca668f Binary files /dev/null and b/data/image/combined/image152.JPG differ diff --git a/data/image/combined/image153.JPG b/data/image/combined/image153.JPG new file mode 100755 index 0000000..64989a1 Binary files /dev/null and b/data/image/combined/image153.JPG differ diff --git a/data/image/combined/image154.jpg b/data/image/combined/image154.jpg new file mode 100755 index 0000000..e675c8d Binary files /dev/null and b/data/image/combined/image154.jpg differ diff --git a/data/image/combined/image155.jpg b/data/image/combined/image155.jpg new file mode 100755 index 0000000..5436c77 Binary files /dev/null and b/data/image/combined/image155.jpg differ diff --git a/data/image/combined/image156.JPG b/data/image/combined/image156.JPG new file mode 100755 index 0000000..0801934 Binary files /dev/null and b/data/image/combined/image156.JPG differ diff --git a/data/image/combined/image157.jpg b/data/image/combined/image157.jpg new file mode 100755 index 0000000..e6e8ad2 Binary files /dev/null and b/data/image/combined/image157.jpg differ diff --git a/data/image/combined/image158.jpg b/data/image/combined/image158.jpg new file mode 100755 index 0000000..3076799 Binary files /dev/null and b/data/image/combined/image158.jpg differ diff --git a/data/image/combined/image159.jpg b/data/image/combined/image159.jpg new file mode 100755 index 0000000..2150d0e Binary files /dev/null and b/data/image/combined/image159.jpg differ diff --git a/data/image/no/image61.jpg b/data/image/combined/image16.jpg similarity index 100% rename from data/image/no/image61.jpg rename to data/image/combined/image16.jpg diff --git a/data/image/combined/image160.jpg b/data/image/combined/image160.jpg new file mode 100755 index 0000000..5e7bf87 Binary files /dev/null and b/data/image/combined/image160.jpg differ diff --git a/data/image/combined/image161.JPG b/data/image/combined/image161.JPG new file mode 100755 index 0000000..d516ed8 Binary files /dev/null and b/data/image/combined/image161.JPG differ diff --git a/data/image/combined/image162.JPG b/data/image/combined/image162.JPG new file mode 100755 index 0000000..433c6d3 Binary files /dev/null and b/data/image/combined/image162.JPG differ diff --git a/data/image/combined/image163.jpg b/data/image/combined/image163.jpg new file mode 100755 index 0000000..93a7c42 Binary files /dev/null and b/data/image/combined/image163.jpg differ diff --git a/data/image/combined/image164.JPG b/data/image/combined/image164.JPG new file mode 100755 index 0000000..c6f7993 Binary files /dev/null and b/data/image/combined/image164.JPG differ diff --git a/data/image/combined/image165.JPG b/data/image/combined/image165.JPG new file mode 100755 index 0000000..0c0c7a9 Binary files /dev/null and b/data/image/combined/image165.JPG differ diff --git a/data/image/combined/image166.JPG b/data/image/combined/image166.JPG new file mode 100755 index 0000000..dfa3a22 Binary files /dev/null and b/data/image/combined/image166.JPG differ diff --git a/data/image/combined/image167.JPG b/data/image/combined/image167.JPG new file mode 100755 index 0000000..8c5a446 Binary files /dev/null and b/data/image/combined/image167.JPG differ diff --git a/data/image/combined/image168.jpg b/data/image/combined/image168.jpg new file mode 100755 index 0000000..078c7fd Binary files /dev/null and b/data/image/combined/image168.jpg differ diff --git a/data/image/combined/image169.JPG b/data/image/combined/image169.JPG new file mode 100755 index 0000000..cd7805c Binary files /dev/null and b/data/image/combined/image169.JPG differ diff --git a/data/image/no/image62.JPG b/data/image/combined/image17.jpg similarity index 100% rename from data/image/no/image62.JPG rename to data/image/combined/image17.jpg diff --git a/data/image/combined/image170.JPG b/data/image/combined/image170.JPG new file mode 100755 index 0000000..234060f Binary files /dev/null and b/data/image/combined/image170.JPG differ diff --git a/data/image/combined/image171.JPG b/data/image/combined/image171.JPG new file mode 100755 index 0000000..72cdb90 Binary files /dev/null and b/data/image/combined/image171.JPG differ diff --git a/data/image/combined/image172.JPG b/data/image/combined/image172.JPG new file mode 100755 index 0000000..8019ae3 Binary files /dev/null and b/data/image/combined/image172.JPG differ diff --git a/data/image/combined/image173.jpg b/data/image/combined/image173.jpg new file mode 100755 index 0000000..21d5941 Binary files /dev/null and b/data/image/combined/image173.jpg differ diff --git a/data/image/combined/image174.JPG b/data/image/combined/image174.JPG new file mode 100755 index 0000000..b4219ac Binary files /dev/null and b/data/image/combined/image174.JPG differ diff --git a/data/image/combined/image175.jpg b/data/image/combined/image175.jpg new file mode 100755 index 0000000..48fa256 Binary files /dev/null and b/data/image/combined/image175.jpg differ diff --git a/data/image/combined/image176.JPG b/data/image/combined/image176.JPG new file mode 100755 index 0000000..9eb7c71 Binary files /dev/null and b/data/image/combined/image176.JPG differ diff --git a/data/image/combined/image177.jpg b/data/image/combined/image177.jpg new file mode 100755 index 0000000..eaaa871 Binary files /dev/null and b/data/image/combined/image177.jpg differ diff --git a/data/image/combined/image178.JPG b/data/image/combined/image178.JPG new file mode 100755 index 0000000..b77fec8 Binary files /dev/null and b/data/image/combined/image178.JPG differ diff --git a/data/image/combined/image179.JPG b/data/image/combined/image179.JPG new file mode 100755 index 0000000..4058e0d Binary files /dev/null and b/data/image/combined/image179.JPG differ diff --git a/data/image/no/image63.JPG b/data/image/combined/image18.jpg similarity index 100% rename from data/image/no/image63.JPG rename to data/image/combined/image18.jpg diff --git a/data/image/combined/image180.jpg b/data/image/combined/image180.jpg new file mode 100755 index 0000000..eb9e5ea Binary files /dev/null and b/data/image/combined/image180.jpg differ diff --git a/data/image/combined/image181.JPG b/data/image/combined/image181.JPG new file mode 100755 index 0000000..0d6cafc Binary files /dev/null and b/data/image/combined/image181.JPG differ diff --git a/data/image/combined/image182.JPG b/data/image/combined/image182.JPG new file mode 100755 index 0000000..8edaefd Binary files /dev/null and b/data/image/combined/image182.JPG differ diff --git a/data/image/no/image11.jpg b/data/image/combined/image183.jpg similarity index 100% rename from data/image/no/image11.jpg rename to data/image/combined/image183.jpg diff --git a/data/image/no/image12.jpg b/data/image/combined/image184.jpg similarity index 100% rename from data/image/no/image12.jpg rename to data/image/combined/image184.jpg diff --git a/data/image/no/image13.jpg b/data/image/combined/image185.jpg similarity index 100% rename from data/image/no/image13.jpg rename to data/image/combined/image185.jpg diff --git a/data/image/no/image14.jpg b/data/image/combined/image186.jpg similarity index 100% rename from data/image/no/image14.jpg rename to data/image/combined/image186.jpg diff --git a/data/image/no/image15.jpg b/data/image/combined/image187.jpg similarity index 100% rename from data/image/no/image15.jpg rename to data/image/combined/image187.jpg diff --git a/data/image/no/image16.jpg b/data/image/combined/image188.jpg similarity index 100% rename from data/image/no/image16.jpg rename to data/image/combined/image188.jpg diff --git a/data/image/no/image17.jpg b/data/image/combined/image189.jpg similarity index 100% rename from data/image/no/image17.jpg rename to data/image/combined/image189.jpg diff --git a/data/image/no/image64.jpg b/data/image/combined/image19.jpg similarity index 100% rename from data/image/no/image64.jpg rename to data/image/combined/image19.jpg diff --git a/data/image/no/image18.jpg b/data/image/combined/image190.jpg similarity index 100% rename from data/image/no/image18.jpg rename to data/image/combined/image190.jpg diff --git a/data/image/no/image19.jpg b/data/image/combined/image191.jpg similarity index 100% rename from data/image/no/image19.jpg rename to data/image/combined/image191.jpg diff --git a/data/image/no/image20.jpg b/data/image/combined/image192.jpg similarity index 100% rename from data/image/no/image20.jpg rename to data/image/combined/image192.jpg diff --git a/data/image/no/image21.jpg b/data/image/combined/image193.jpg similarity index 100% rename from data/image/no/image21.jpg rename to data/image/combined/image193.jpg diff --git a/data/image/no/image22.jpg b/data/image/combined/image194.jpg similarity index 100% rename from data/image/no/image22.jpg rename to data/image/combined/image194.jpg diff --git a/data/image/no/image23.jpg b/data/image/combined/image195.jpg similarity index 100% rename from data/image/no/image23.jpg rename to data/image/combined/image195.jpg diff --git a/data/image/no/image24.jpg b/data/image/combined/image196.jpg similarity index 100% rename from data/image/no/image24.jpg rename to data/image/combined/image196.jpg diff --git a/data/image/no/image25.jpg b/data/image/combined/image197.jpg similarity index 100% rename from data/image/no/image25.jpg rename to data/image/combined/image197.jpg diff --git a/data/image/no/image26.jpg b/data/image/combined/image198.jpg similarity index 100% rename from data/image/no/image26.jpg rename to data/image/combined/image198.jpg diff --git a/data/image/no/image27.jpg b/data/image/combined/image199.jpg similarity index 100% rename from data/image/no/image27.jpg rename to data/image/combined/image199.jpg diff --git a/data/image/no/image2.jpeg b/data/image/combined/image2.jpeg similarity index 100% rename from data/image/no/image2.jpeg rename to data/image/combined/image2.jpeg diff --git a/data/image/no/image65.jpg b/data/image/combined/image20.jpg similarity index 100% rename from data/image/no/image65.jpg rename to data/image/combined/image20.jpg diff --git a/data/image/no/image28.jpg b/data/image/combined/image200.jpg similarity index 100% rename from data/image/no/image28.jpg rename to data/image/combined/image200.jpg diff --git a/data/image/no/image29.jpg b/data/image/combined/image201.jpg similarity index 100% rename from data/image/no/image29.jpg rename to data/image/combined/image201.jpg diff --git a/data/image/no/image30.jpg b/data/image/combined/image202.jpg similarity index 100% rename from data/image/no/image30.jpg rename to data/image/combined/image202.jpg diff --git a/data/image/no/image31.jpg b/data/image/combined/image203.jpg similarity index 100% rename from data/image/no/image31.jpg rename to data/image/combined/image203.jpg diff --git a/data/image/no/image32.jpg b/data/image/combined/image204.jpg similarity index 100% rename from data/image/no/image32.jpg rename to data/image/combined/image204.jpg diff --git a/data/image/no/image33.jpg b/data/image/combined/image205.jpg similarity index 100% rename from data/image/no/image33.jpg rename to data/image/combined/image205.jpg diff --git a/data/image/no/image34.jpg b/data/image/combined/image206.jpg similarity index 100% rename from data/image/no/image34.jpg rename to data/image/combined/image206.jpg diff --git a/data/image/no/image35.jpg b/data/image/combined/image207.jpg similarity index 100% rename from data/image/no/image35.jpg rename to data/image/combined/image207.jpg diff --git a/data/image/no/image36.jpg b/data/image/combined/image208.jpg similarity index 100% rename from data/image/no/image36.jpg rename to data/image/combined/image208.jpg diff --git a/data/image/no/image37.jpg b/data/image/combined/image209.jpg similarity index 100% rename from data/image/no/image37.jpg rename to data/image/combined/image209.jpg diff --git a/data/image/no/image66.jpg b/data/image/combined/image21.jpg similarity index 100% rename from data/image/no/image66.jpg rename to data/image/combined/image21.jpg diff --git a/data/image/no/image38.jpg b/data/image/combined/image210.jpg similarity index 100% rename from data/image/no/image38.jpg rename to data/image/combined/image210.jpg diff --git a/data/image/no/image39.jpg b/data/image/combined/image211.jpg similarity index 100% rename from data/image/no/image39.jpg rename to data/image/combined/image211.jpg diff --git a/data/image/no/image40.jpg b/data/image/combined/image212.jpg similarity index 100% rename from data/image/no/image40.jpg rename to data/image/combined/image212.jpg diff --git a/data/image/no/image41.jpg b/data/image/combined/image213.jpg similarity index 100% rename from data/image/no/image41.jpg rename to data/image/combined/image213.jpg diff --git a/data/image/no/image42.jpg b/data/image/combined/image214.JPG similarity index 100% rename from data/image/no/image42.jpg rename to data/image/combined/image214.JPG diff --git a/data/image/no/image43.jpg b/data/image/combined/image215.JPG similarity index 100% rename from data/image/no/image43.jpg rename to data/image/combined/image215.JPG diff --git a/data/image/no/image44.jpg b/data/image/combined/image216.JPG similarity index 100% rename from data/image/no/image44.jpg rename to data/image/combined/image216.JPG diff --git a/data/image/no/image45.jpg b/data/image/combined/image217.jpg similarity index 100% rename from data/image/no/image45.jpg rename to data/image/combined/image217.jpg diff --git a/data/image/no/image46.jpg b/data/image/combined/image218.jpg similarity index 100% rename from data/image/no/image46.jpg rename to data/image/combined/image218.jpg diff --git a/data/image/no/image47.jpeg b/data/image/combined/image219.jpeg similarity index 100% rename from data/image/no/image47.jpeg rename to data/image/combined/image219.jpeg diff --git a/data/image/no/image52.jpg b/data/image/combined/image22.jpg similarity index 100% rename from data/image/no/image52.jpg rename to data/image/combined/image22.jpg diff --git a/data/image/no/image50.JPG b/data/image/combined/image220.jpg similarity index 100% rename from data/image/no/image50.JPG rename to data/image/combined/image220.jpg diff --git a/data/image/no/image51.JPG b/data/image/combined/image221.jpg similarity index 100% rename from data/image/no/image51.JPG rename to data/image/combined/image221.jpg diff --git a/data/image/no/image67.jpg b/data/image/combined/image222.JPG similarity index 100% rename from data/image/no/image67.jpg rename to data/image/combined/image222.JPG diff --git a/data/image/no/image4.jpg b/data/image/combined/image223.jpg similarity index 100% rename from data/image/no/image4.jpg rename to data/image/combined/image223.jpg diff --git a/data/image/no/image54.jpg b/data/image/combined/image224.jpg similarity index 100% rename from data/image/no/image54.jpg rename to data/image/combined/image224.jpg diff --git a/data/image/no/image55.jpg b/data/image/combined/image225.jpg similarity index 100% rename from data/image/no/image55.jpg rename to data/image/combined/image225.jpg diff --git a/data/image/no/image56.jpg b/data/image/combined/image226.jpg similarity index 100% rename from data/image/no/image56.jpg rename to data/image/combined/image226.jpg diff --git a/data/image/no/image57.jpg b/data/image/combined/image227.jpg similarity index 100% rename from data/image/no/image57.jpg rename to data/image/combined/image227.jpg diff --git a/data/image/no/image68.jpeg b/data/image/combined/image228.jpeg similarity index 100% rename from data/image/no/image68.jpeg rename to data/image/combined/image228.jpeg diff --git a/data/image/no/image69.jpg b/data/image/combined/image229.JPG similarity index 100% rename from data/image/no/image69.jpg rename to data/image/combined/image229.JPG diff --git a/data/image/no/image71.jpg b/data/image/combined/image23.jpg similarity index 100% rename from data/image/no/image71.jpg rename to data/image/combined/image23.jpg diff --git a/data/image/no/image70.jpeg b/data/image/combined/image230.jpeg similarity index 100% rename from data/image/no/image70.jpeg rename to data/image/combined/image230.jpeg diff --git a/data/image/no/image76.jpeg b/data/image/combined/image231.jpeg similarity index 100% rename from data/image/no/image76.jpeg rename to data/image/combined/image231.jpeg diff --git a/data/image/no/image78.jpg b/data/image/combined/image232.jpg similarity index 100% rename from data/image/no/image78.jpg rename to data/image/combined/image232.jpg diff --git a/data/image/no/image79.jpg b/data/image/combined/image233.jpg similarity index 100% rename from data/image/no/image79.jpg rename to data/image/combined/image233.jpg diff --git a/data/image/no/image80.jpg b/data/image/combined/image234.jpg similarity index 100% rename from data/image/no/image80.jpg rename to data/image/combined/image234.jpg diff --git a/data/image/no/image81.jpg b/data/image/combined/image235.jpg similarity index 100% rename from data/image/no/image81.jpg rename to data/image/combined/image235.jpg diff --git a/data/image/no/image82.jpg b/data/image/combined/image236.jpg similarity index 100% rename from data/image/no/image82.jpg rename to data/image/combined/image236.jpg diff --git a/data/image/no/image83.jpg b/data/image/combined/image237.jpg similarity index 100% rename from data/image/no/image83.jpg rename to data/image/combined/image237.jpg diff --git a/data/image/no/image84.jpg b/data/image/combined/image238.JPG similarity index 100% rename from data/image/no/image84.jpg rename to data/image/combined/image238.JPG diff --git a/data/image/no/image85.jpg b/data/image/combined/image239.JPG similarity index 100% rename from data/image/no/image85.jpg rename to data/image/combined/image239.JPG diff --git a/data/image/no/image72.png b/data/image/combined/image24.png similarity index 100% rename from data/image/no/image72.png rename to data/image/combined/image24.png diff --git a/data/image/no/image86.jpg b/data/image/combined/image240.JPG similarity index 100% rename from data/image/no/image86.jpg rename to data/image/combined/image240.JPG diff --git a/data/image/no/image87.jpg b/data/image/combined/image241.JPG similarity index 100% rename from data/image/no/image87.jpg rename to data/image/combined/image241.JPG diff --git a/data/image/no/image90.jpg b/data/image/combined/image242.jpg similarity index 100% rename from data/image/no/image90.jpg rename to data/image/combined/image242.jpg diff --git a/data/image/no/image91.jpg b/data/image/combined/image243.jpg similarity index 100% rename from data/image/no/image91.jpg rename to data/image/combined/image243.jpg diff --git a/data/image/no/image92.jpg b/data/image/combined/image244.jpg similarity index 100% rename from data/image/no/image92.jpg rename to data/image/combined/image244.jpg diff --git a/data/image/no/image93.jpg b/data/image/combined/image245.jpg similarity index 100% rename from data/image/no/image93.jpg rename to data/image/combined/image245.jpg diff --git a/data/image/combined/image246.jpg b/data/image/combined/image246.jpg new file mode 100755 index 0000000..6a82b3b Binary files /dev/null and b/data/image/combined/image246.jpg differ diff --git a/data/image/combined/image247.jpg b/data/image/combined/image247.jpg new file mode 100755 index 0000000..d6cb1cd Binary files /dev/null and b/data/image/combined/image247.jpg differ diff --git a/data/image/combined/image248.jpg b/data/image/combined/image248.jpg new file mode 100755 index 0000000..08ec3fa Binary files /dev/null and b/data/image/combined/image248.jpg differ diff --git a/data/image/combined/image249.jpg b/data/image/combined/image249.jpg new file mode 100755 index 0000000..d945f3c Binary files /dev/null and b/data/image/combined/image249.jpg differ diff --git a/data/image/no/image73.jpg b/data/image/combined/image25.jpg similarity index 100% rename from data/image/no/image73.jpg rename to data/image/combined/image25.jpg diff --git a/data/image/combined/image250.JPG b/data/image/combined/image250.JPG new file mode 100755 index 0000000..059d0e5 Binary files /dev/null and b/data/image/combined/image250.JPG differ diff --git a/data/image/combined/image251.JPG b/data/image/combined/image251.JPG new file mode 100755 index 0000000..81d8624 Binary files /dev/null and b/data/image/combined/image251.JPG differ diff --git a/data/image/combined/image252.JPG b/data/image/combined/image252.JPG new file mode 100755 index 0000000..64989a1 Binary files /dev/null and b/data/image/combined/image252.JPG differ diff --git a/data/image/combined/image253.JPG b/data/image/combined/image253.JPG new file mode 100755 index 0000000..433f468 Binary files /dev/null and b/data/image/combined/image253.JPG differ diff --git a/data/image/no/image74.jpg b/data/image/combined/image26.jpg similarity index 100% rename from data/image/no/image74.jpg rename to data/image/combined/image26.jpg diff --git a/data/image/no/image75.jpg b/data/image/combined/image27.jpg similarity index 100% rename from data/image/no/image75.jpg rename to data/image/combined/image27.jpg diff --git a/data/image/no/image77.jpg b/data/image/combined/image28.jpg similarity index 100% rename from data/image/no/image77.jpg rename to data/image/combined/image28.jpg diff --git a/data/image/no/image88.jpg b/data/image/combined/image29.jpg similarity index 100% rename from data/image/no/image88.jpg rename to data/image/combined/image29.jpg diff --git a/data/image/no/image3.jpg b/data/image/combined/image3.jpg similarity index 100% rename from data/image/no/image3.jpg rename to data/image/combined/image3.jpg diff --git a/data/image/no/image89.jpg b/data/image/combined/image30.jpg similarity index 100% rename from data/image/no/image89.jpg rename to data/image/combined/image30.jpg diff --git a/data/image/no/image94.jpg b/data/image/combined/image31.jpg similarity index 100% rename from data/image/no/image94.jpg rename to data/image/combined/image31.jpg diff --git a/data/image/no/image95.jpg b/data/image/combined/image32.jpg similarity index 100% rename from data/image/no/image95.jpg rename to data/image/combined/image32.jpg diff --git a/data/image/no/image96.jpg b/data/image/combined/image33.jpg similarity index 100% rename from data/image/no/image96.jpg rename to data/image/combined/image33.jpg diff --git a/data/image/no/image97.jpg b/data/image/combined/image34.jpg similarity index 100% rename from data/image/no/image97.jpg rename to data/image/combined/image34.jpg diff --git a/data/image/no/image98.jpg b/data/image/combined/image35.jpg similarity index 100% rename from data/image/no/image98.jpg rename to data/image/combined/image35.jpg diff --git a/data/image/combined/image36.jpg b/data/image/combined/image36.jpg new file mode 100755 index 0000000..d248b62 Binary files /dev/null and b/data/image/combined/image36.jpg differ diff --git a/data/image/combined/image37.jpg b/data/image/combined/image37.jpg new file mode 100755 index 0000000..7c8cdd9 Binary files /dev/null and b/data/image/combined/image37.jpg differ diff --git a/data/image/combined/image38.jpg b/data/image/combined/image38.jpg new file mode 100755 index 0000000..4270441 Binary files /dev/null and b/data/image/combined/image38.jpg differ diff --git a/data/image/combined/image39.jpg b/data/image/combined/image39.jpg new file mode 100755 index 0000000..c4a14c6 Binary files /dev/null and b/data/image/combined/image39.jpg differ diff --git a/data/image/no/image53.jpg b/data/image/combined/image4.jpg similarity index 100% rename from data/image/no/image53.jpg rename to data/image/combined/image4.jpg diff --git a/data/image/combined/image40.jpg b/data/image/combined/image40.jpg new file mode 100755 index 0000000..e2dc65d Binary files /dev/null and b/data/image/combined/image40.jpg differ diff --git a/data/image/combined/image41.jpg b/data/image/combined/image41.jpg new file mode 100755 index 0000000..e9902b7 Binary files /dev/null and b/data/image/combined/image41.jpg differ diff --git a/data/image/combined/image42.jpg b/data/image/combined/image42.jpg new file mode 100755 index 0000000..3788f35 Binary files /dev/null and b/data/image/combined/image42.jpg differ diff --git a/data/image/combined/image43.jpg b/data/image/combined/image43.jpg new file mode 100755 index 0000000..81d8624 Binary files /dev/null and b/data/image/combined/image43.jpg differ diff --git a/data/image/combined/image44.jpg b/data/image/combined/image44.jpg new file mode 100755 index 0000000..60ec08b Binary files /dev/null and b/data/image/combined/image44.jpg differ diff --git a/data/image/combined/image45.jpg b/data/image/combined/image45.jpg new file mode 100755 index 0000000..4adc331 Binary files /dev/null and b/data/image/combined/image45.jpg differ diff --git a/data/image/combined/image46.jpg b/data/image/combined/image46.jpg new file mode 100755 index 0000000..38dfc74 Binary files /dev/null and b/data/image/combined/image46.jpg differ diff --git a/data/image/combined/image47.jpg b/data/image/combined/image47.jpg new file mode 100755 index 0000000..a124b46 Binary files /dev/null and b/data/image/combined/image47.jpg differ diff --git a/data/image/combined/image48.jpg b/data/image/combined/image48.jpg new file mode 100755 index 0000000..8f34060 Binary files /dev/null and b/data/image/combined/image48.jpg differ diff --git a/data/image/combined/image49.jpg b/data/image/combined/image49.jpg new file mode 100755 index 0000000..aedac22 Binary files /dev/null and b/data/image/combined/image49.jpg differ diff --git a/data/image/no/image5.jpg b/data/image/combined/image5.jpg similarity index 100% rename from data/image/no/image5.jpg rename to data/image/combined/image5.jpg diff --git a/data/image/combined/image50.JPG b/data/image/combined/image50.JPG new file mode 100755 index 0000000..a2cb1f6 Binary files /dev/null and b/data/image/combined/image50.JPG differ diff --git a/data/image/combined/image51.JPG b/data/image/combined/image51.JPG new file mode 100755 index 0000000..1213ec5 Binary files /dev/null and b/data/image/combined/image51.JPG differ diff --git a/data/image/combined/image52.jpg b/data/image/combined/image52.jpg new file mode 100755 index 0000000..fdb6058 Binary files /dev/null and b/data/image/combined/image52.jpg differ diff --git a/data/image/combined/image53.jpg b/data/image/combined/image53.jpg new file mode 100755 index 0000000..814e9f0 Binary files /dev/null and b/data/image/combined/image53.jpg differ diff --git a/data/image/combined/image54.jpg b/data/image/combined/image54.jpg new file mode 100755 index 0000000..9aa4a76 Binary files /dev/null and b/data/image/combined/image54.jpg differ diff --git a/data/image/combined/image55.jpg b/data/image/combined/image55.jpg new file mode 100755 index 0000000..3c345e4 Binary files /dev/null and b/data/image/combined/image55.jpg differ diff --git a/data/image/combined/image56.jpg b/data/image/combined/image56.jpg new file mode 100755 index 0000000..def6bd2 Binary files /dev/null and b/data/image/combined/image56.jpg differ diff --git a/data/image/combined/image57.jpg b/data/image/combined/image57.jpg new file mode 100755 index 0000000..151ea30 Binary files /dev/null and b/data/image/combined/image57.jpg differ diff --git a/data/image/combined/image58.jpg b/data/image/combined/image58.jpg new file mode 100755 index 0000000..c8018cf Binary files /dev/null and b/data/image/combined/image58.jpg differ diff --git a/data/image/combined/image59.JPG b/data/image/combined/image59.JPG new file mode 100755 index 0000000..bd33641 Binary files /dev/null and b/data/image/combined/image59.JPG differ diff --git a/data/image/no/image6.jpg b/data/image/combined/image6.jpg similarity index 100% rename from data/image/no/image6.jpg rename to data/image/combined/image6.jpg diff --git a/data/image/combined/image60.JPG b/data/image/combined/image60.JPG new file mode 100755 index 0000000..64989a1 Binary files /dev/null and b/data/image/combined/image60.JPG differ diff --git a/data/image/combined/image61.jpg b/data/image/combined/image61.jpg new file mode 100755 index 0000000..d8769b6 Binary files /dev/null and b/data/image/combined/image61.jpg differ diff --git a/data/image/combined/image62.JPG b/data/image/combined/image62.JPG new file mode 100755 index 0000000..b30810a Binary files /dev/null and b/data/image/combined/image62.JPG differ diff --git a/data/image/combined/image63.JPG b/data/image/combined/image63.JPG new file mode 100755 index 0000000..d945f3c Binary files /dev/null and b/data/image/combined/image63.JPG differ diff --git a/data/image/combined/image64.jpg b/data/image/combined/image64.jpg new file mode 100755 index 0000000..def6bd2 Binary files /dev/null and b/data/image/combined/image64.jpg differ diff --git a/data/image/combined/image65.jpg b/data/image/combined/image65.jpg new file mode 100755 index 0000000..0ef39f2 Binary files /dev/null and b/data/image/combined/image65.jpg differ diff --git a/data/image/combined/image66.jpg b/data/image/combined/image66.jpg new file mode 100755 index 0000000..51eab15 Binary files /dev/null and b/data/image/combined/image66.jpg differ diff --git a/data/image/combined/image67.jpg b/data/image/combined/image67.jpg new file mode 100755 index 0000000..4d6fc19 Binary files /dev/null and b/data/image/combined/image67.jpg differ diff --git a/data/image/combined/image68.jpg b/data/image/combined/image68.jpg new file mode 100755 index 0000000..ef6fcc6 Binary files /dev/null and b/data/image/combined/image68.jpg differ diff --git a/data/image/combined/image69.jpg b/data/image/combined/image69.jpg new file mode 100755 index 0000000..9c3425b Binary files /dev/null and b/data/image/combined/image69.jpg differ diff --git a/data/image/no/image7.jpg b/data/image/combined/image7.jpg similarity index 100% rename from data/image/no/image7.jpg rename to data/image/combined/image7.jpg diff --git a/data/image/combined/image70.JPG b/data/image/combined/image70.JPG new file mode 100755 index 0000000..7d34db0 Binary files /dev/null and b/data/image/combined/image70.JPG differ diff --git a/data/image/combined/image71.jpg b/data/image/combined/image71.jpg new file mode 100755 index 0000000..dbd4a54 Binary files /dev/null and b/data/image/combined/image71.jpg differ diff --git a/data/image/combined/image72.JPG b/data/image/combined/image72.JPG new file mode 100755 index 0000000..747fdd0 Binary files /dev/null and b/data/image/combined/image72.JPG differ diff --git a/data/image/combined/image73.jpg b/data/image/combined/image73.jpg new file mode 100755 index 0000000..53fa58c Binary files /dev/null and b/data/image/combined/image73.jpg differ diff --git a/data/image/combined/image74.jpg b/data/image/combined/image74.jpg new file mode 100755 index 0000000..1a05c44 Binary files /dev/null and b/data/image/combined/image74.jpg differ diff --git a/data/image/combined/image75.jpg b/data/image/combined/image75.jpg new file mode 100755 index 0000000..127ee2b Binary files /dev/null and b/data/image/combined/image75.jpg differ diff --git a/data/image/combined/image76.jpg b/data/image/combined/image76.jpg new file mode 100755 index 0000000..8784fee Binary files /dev/null and b/data/image/combined/image76.jpg differ diff --git a/data/image/combined/image77.jpg b/data/image/combined/image77.jpg new file mode 100755 index 0000000..f848823 Binary files /dev/null and b/data/image/combined/image77.jpg differ diff --git a/data/image/combined/image78.jpg b/data/image/combined/image78.jpg new file mode 100755 index 0000000..def6bd2 Binary files /dev/null and b/data/image/combined/image78.jpg differ diff --git a/data/image/combined/image79.jpg b/data/image/combined/image79.jpg new file mode 100755 index 0000000..8b48ebe Binary files /dev/null and b/data/image/combined/image79.jpg differ diff --git a/data/image/no/image8.jpg b/data/image/combined/image8.jpg similarity index 100% rename from data/image/no/image8.jpg rename to data/image/combined/image8.jpg diff --git a/data/image/combined/image80.jpg b/data/image/combined/image80.jpg new file mode 100755 index 0000000..4a14791 Binary files /dev/null and b/data/image/combined/image80.jpg differ diff --git a/data/image/combined/image81.jpg b/data/image/combined/image81.jpg new file mode 100755 index 0000000..140fbff Binary files /dev/null and b/data/image/combined/image81.jpg differ diff --git a/data/image/combined/image82.jpg b/data/image/combined/image82.jpg new file mode 100755 index 0000000..d8a184e Binary files /dev/null and b/data/image/combined/image82.jpg differ diff --git a/data/image/combined/image83.jpg b/data/image/combined/image83.jpg new file mode 100755 index 0000000..f50dfd7 Binary files /dev/null and b/data/image/combined/image83.jpg differ diff --git a/data/image/combined/image84.jpg b/data/image/combined/image84.jpg new file mode 100755 index 0000000..287cb5b Binary files /dev/null and b/data/image/combined/image84.jpg differ diff --git a/data/image/combined/image85.jpg b/data/image/combined/image85.jpg new file mode 100755 index 0000000..151ea30 Binary files /dev/null and b/data/image/combined/image85.jpg differ diff --git a/data/image/combined/image86.jpg b/data/image/combined/image86.jpg new file mode 100755 index 0000000..55dc972 Binary files /dev/null and b/data/image/combined/image86.jpg differ diff --git a/data/image/combined/image87.jpg b/data/image/combined/image87.jpg new file mode 100755 index 0000000..3419656 Binary files /dev/null and b/data/image/combined/image87.jpg differ diff --git a/data/image/combined/image88.jpg b/data/image/combined/image88.jpg new file mode 100755 index 0000000..ec593b4 Binary files /dev/null and b/data/image/combined/image88.jpg differ diff --git a/data/image/combined/image89.jpg b/data/image/combined/image89.jpg new file mode 100755 index 0000000..1213ec5 Binary files /dev/null and b/data/image/combined/image89.jpg differ diff --git a/data/image/no/image9.jpg b/data/image/combined/image9.jpg similarity index 100% rename from data/image/no/image9.jpg rename to data/image/combined/image9.jpg diff --git a/data/image/combined/image90.jpg b/data/image/combined/image90.jpg new file mode 100755 index 0000000..def6bd2 Binary files /dev/null and b/data/image/combined/image90.jpg differ diff --git a/data/image/combined/image91.jpg b/data/image/combined/image91.jpg new file mode 100755 index 0000000..59190e4 Binary files /dev/null and b/data/image/combined/image91.jpg differ diff --git a/data/image/combined/image92.jpg b/data/image/combined/image92.jpg new file mode 100755 index 0000000..3318347 Binary files /dev/null and b/data/image/combined/image92.jpg differ diff --git a/data/image/combined/image93.jpg b/data/image/combined/image93.jpg new file mode 100755 index 0000000..8b48ebe Binary files /dev/null and b/data/image/combined/image93.jpg differ diff --git a/data/image/combined/image94.jpg b/data/image/combined/image94.jpg new file mode 100755 index 0000000..45334c1 Binary files /dev/null and b/data/image/combined/image94.jpg differ diff --git a/data/image/combined/image95.jpg b/data/image/combined/image95.jpg new file mode 100755 index 0000000..2659da0 Binary files /dev/null and b/data/image/combined/image95.jpg differ diff --git a/data/image/combined/image96.jpg b/data/image/combined/image96.jpg new file mode 100755 index 0000000..e2b78b0 Binary files /dev/null and b/data/image/combined/image96.jpg differ diff --git a/data/image/combined/image97.jpg b/data/image/combined/image97.jpg new file mode 100755 index 0000000..68cb73e Binary files /dev/null and b/data/image/combined/image97.jpg differ diff --git a/data/image/combined/image98.jpg b/data/image/combined/image98.jpg new file mode 100755 index 0000000..0f0db0b Binary files /dev/null and b/data/image/combined/image98.jpg differ diff --git a/data/image/combined/image99.jpg b/data/image/combined/image99.jpg new file mode 100755 index 0000000..640d411 Binary files /dev/null and b/data/image/combined/image99.jpg differ diff --git a/gen/mxm_gen.cpp b/gen/mxm_gen.cpp new file mode 100644 index 0000000..c3d268d --- /dev/null +++ b/gen/mxm_gen.cpp @@ -0,0 +1,185 @@ +// g++ -std=c++11 -Llib -I../include -I../src mxm_gen.cpp -ltaco;./a.out + +#include "taco.h" +#include "taco/index_notation/transformations.h" +#include "codegen/codegen_c.h" +#include "taco/lower/lower.h" + +#include +#include +#include +#include +#include + +using namespace taco; + +ir::Expr orImpl(const std::vector& v) { + return ir::Or::make(v[0], v[1]); +} +Func OrOp("or", orImpl, {Annihilator(true), Identity(false), Commutative(), Associative()}); + +ir::Expr andImpl(const std::vector& v) { + return ir::And::make(v[0], v[1]); +} +Func AndOp("and", andImpl, {Annihilator(false), Identity(true), Commutative(), Associative()}); + +ir::Expr addImpl(const std::vector& v) { + return ir::Add::make(v[0], v[1]); +} +Func AddOp("add", addImpl, {Annihilator(std::numeric_limits::infinity()), Identity(0), Commutative(), Associative()}); + +ir::Expr minImpl(const std::vector& v) { + return ir::Min::make(v[0], v[1]); +} +Func MinOp("min", minImpl, {Identity(std::numeric_limits::infinity()), Commutative(), Associative()}); + +ir::Expr maskImpl(const std::vector& v) { + return v[0]; +} +struct MaskAlgebra { + IterationAlgebra operator()(const std::vector& r) { + return Intersect(r[0], Complement(r[1])); + } +}; +Func MaskOp("mask", maskImpl, MaskAlgebra()); + +static bool compare(std::vector vars1, std::vector vars2) { + return vars1 == vars2; +} + +static IndexStmt optimizeSpGEMM(IndexStmt stmt) { + if (!isa(stmt)) { + return stmt; + } + Forall foralli = to(stmt); + IndexVar i = foralli.getIndexVar(); + + if (!isa(foralli.getStmt())) { + return stmt; + } + Forall forallk = to(foralli.getStmt()); + IndexVar k = forallk.getIndexVar(); + + if (!isa(forallk.getStmt())) { + return stmt; + } + Forall forallj = to(forallk.getStmt()); + IndexVar j = forallj.getIndexVar(); + + if (!isa(forallj.getStmt())) { + return stmt; + } + Assignment assignment = to(forallj.getStmt()); + IndexExpr reduceOp = assignment.getOperator(); + + if (!isa(assignment.getRhs())) { + return stmt; + } + Call mul = to(assignment.getRhs()); + + taco_iassert(isa(assignment.getLhs())); + if (!isa(mul.getArgs()[0])) { + return stmt; + } + if (!isa(mul.getArgs()[1])) { + return stmt; + } + + Access Aaccess = to(assignment.getLhs()); + Access Baccess = to(mul.getArgs()[0]); + Access Caccess = to(mul.getArgs()[1]); + + if (Aaccess.getIndexVars().size() != 2 || + Baccess.getIndexVars().size() != 2 || + Caccess.getIndexVars().size() != 2) { + return stmt; + } + + if (!compare(Aaccess.getIndexVars(), {i,j}) || + !compare(Baccess.getIndexVars(), {i,k}) || + !compare(Caccess.getIndexVars(), {k,j})) { + return stmt; + } + + TensorVar A = Aaccess.getTensorVar(); + if (A.getFormat().getModeFormats()[0].getName() != "dense" || + A.getFormat().getModeFormats()[1].getName() != "compressed" || + A.getFormat().getModeOrdering()[0] != 0 || + A.getFormat().getModeOrdering()[1] != 1) { + return stmt; + } + + // I think we can to linear combination of rows as long as there are no permutations in the format and the + // level formats are ordered. The i -> k -> j loops should iterate over the data structures without issue. + TensorVar B = Baccess.getTensorVar(); + if (!B.getFormat().getModeFormats()[0].isOrdered() && A.getFormat().getModeFormats()[0].isOrdered() || + !B.getFormat().getModeFormats()[1].isOrdered() && A.getFormat().getModeFormats()[1].isOrdered() || + B.getFormat().getModeOrdering()[0] != 0 || + B.getFormat().getModeOrdering()[1] != 1) { + return stmt; + } + + TensorVar C = Caccess.getTensorVar(); + if (!C.getFormat().getModeFormats()[0].isOrdered() && A.getFormat().getModeFormats()[0].isOrdered() || + !C.getFormat().getModeFormats()[1].isOrdered() && A.getFormat().getModeFormats()[1].isOrdered() || + C.getFormat().getModeOrdering()[0] != 0 || + C.getFormat().getModeOrdering()[1] != 1) { + return stmt; + } + + // It's an SpMM statement so return an optimized SpMM statement + TensorVar w("w", + Type(A.getType().getDataType(), + {A.getType().getShape().getDimension(1)}), + taco::dense, + A.getFill()); + return forall(i, + where(forall(j, + A(i,j) = w(j)), + forall(k, + forall(j, + Assignment(w(j), mul, reduceOp))))); +} + +void printToFile(std::string filename, IndexStmt stmt) { + std::stringstream source; + + std::shared_ptr codegen = ir::CodeGen::init_default(source, ir::CodeGen::ImplementationGen); + ir::Stmt compute = lower(stmt, "evaluate", true, true); + codegen->compile(compute, true); + + std::ofstream source_file; + source_file.open(filename + ".c"); + source_file << source.str(); + source_file.close(); +} + +Format UCSR({Dense, Compressed(ModeFormat::NOT_ORDERED)}); +Format UZCSR({Dense, Compressed({ModeFormat::NOT_ORDERED, ModeFormat::ZEROLESS})}); + +int main() { + IndexVar i("i"), j("j"), k("k"); +#if 1 + Tensor A("A", {200, 200}, UCSR, std::numeric_limits::infinity()); + Tensor B("B", {200, 200}, UCSR, std::numeric_limits::infinity()); + Tensor C("C", {200, 200}, UCSR, std::numeric_limits::infinity()); + A(i,j) = Reduction(MinOp(), k, AddOp(B(i,k), C(k,j))); +#else + Tensor A("A", {200, 200}, UCSR); + Tensor B("B", {200, 200}, UZCSR); + Tensor C("C", {200, 200}, UZCSR); + A(i,j) = Reduction(OrOp(), k, AndOp(B(i,k), C(k,j))); +#endif + IndexStmt stmt = A.getAssignment().concretize(); + stmt = reorderLoopsTopologically(stmt); + stmt = optimizeSpGEMM(stmt); + stmt = stmt.assemble(A.getTensorVar(), AssembleStrategy::Insert); + IndexVar qi = to(to(stmt).getQueries()).getIndexVar(); + stmt = stmt.parallelize(i, ParallelUnit::CPUThread, + OutputRaceStrategy::NoRaces) + .parallelize(qi, ParallelUnit::CPUThread, + OutputRaceStrategy::NoRaces); + stmt = scalarPromote(stmt); + printToFile("mxm", stmt); + return 0; +} diff --git a/numpy/conftest.py b/numpy/conftest.py index 610f155..df8483f 100644 --- a/numpy/conftest.py +++ b/numpy/conftest.py @@ -1,11 +1,13 @@ import pytest @pytest.fixture def tacoBench(benchmark): - def f(func, extra_info = None): + def f(func, extra_info = None, save_ret_val = False): # Take statistics based on 10 rounds. if extra_info is not None: for k, v in extra_info.items(): benchmark.extra_info[k] = v + if save_ret_val: + benchmark.extra_info["return"] = func() benchmark.pedantic(func, rounds=10, iterations=1, warmup_rounds=1) return f diff --git a/numpy/image.py b/numpy/image.py index cab8b7b..e3af790 100644 --- a/numpy/image.py +++ b/numpy/image.py @@ -2,14 +2,13 @@ import cv2 import os import pytest -import matplotlib.pyplot as plt import sparse -from util import ImagePydataSparseTensorLoader, safeCastPydataTensorToInts, plot_image +from util import ImagePydataSparseTensorLoader, safeCastPydataTensorToInts, TnsFileDumper#, plot_image +# import matplotlib.pyplot as plt - -@pytest.mark.parametrize("num", list(range(1, 99))) -@pytest.mark.parametrize("pt1", [0.5]) +@pytest.mark.parametrize("num", list(range(1, 253))) +@pytest.mark.parametrize("pt1", [0.75]) def bench_edge_detection_pydata(tacoBench, num, pt1, plot): loader = ImagePydataSparseTensorLoader() sparse_bin_img1 = safeCastPydataTensorToInts(loader.sparse_image(num, pt1, 1)) @@ -42,10 +41,10 @@ def dense_bench(): sparse_xor_img = sparse_xor_img.todense() t1 = round(loader.max[num]*pt1, 2) t2 = round(loader.max[num]*(pt1 + 0.05), 2) - plot_image(loader.img[num], bin_img1, bin_img2, xor_img, sparse_xor_img, t1, t2) + #plot_image(loader.img[num], bin_img1, bin_img2, xor_img, sparse_xor_img, t1, t2) -@pytest.mark.parametrize("num", list(range(1, 99))) -@pytest.mark.parametrize("pt1", [0.5]) +@pytest.mark.parametrize("num", list(range(1, 253))) +@pytest.mark.parametrize("pt1", [0.75]) def bench_edge_detection_dense(tacoBench, num, pt1): loader = ImagePydataSparseTensorLoader() bin_img1 = loader.dense_image(num, pt1, 1) @@ -56,8 +55,8 @@ def dense_bench(): return xor_img tacoBench(dense_bench) -@pytest.mark.parametrize("num", list(range(1, 99))) -@pytest.mark.parametrize("pt1", [0.5]) +@pytest.mark.parametrize("num", list(range(1, 253))) +@pytest.mark.parametrize("pt1", [0.75]) def bench_edge_detection_fused_pydata(tacoBench, num, pt1, plot): loader = ImagePydataSparseTensorLoader() sparse_bin_img1 = safeCastPydataTensorToInts(loader.sparse_image(num, pt1, 1)) @@ -99,8 +98,8 @@ def dense_bench(): assert(sparse_xor_img.nnz == np.sum(xor_img != 0)) -@pytest.mark.parametrize("num", list(range(1, 99))) -@pytest.mark.parametrize("pt1", [0.5]) +@pytest.mark.parametrize("num", list(range(1, 253))) +@pytest.mark.parametrize("pt1", [0.75]) def bench_edge_detection_fused_dense(tacoBench, num, pt1): loader = ImagePydataSparseTensorLoader() bin_img1 = loader.dense_image(num, pt1, 1) @@ -115,7 +114,193 @@ def dense_bench(): tacoBench(dense_bench) #TODO: Add in a benchmark that uses windowing for medical imaging as well. +@pytest.mark.parametrize("num", list(range(1, 253))) +@pytest.mark.parametrize("pt1", [0.75]) +@pytest.mark.parametrize("window_size", [0.45, 0.4, 0.35, 0.3]) +def bench_edge_detection_window_pydata(tacoBench, num, pt1, window_size, plot): + loader = ImagePydataSparseTensorLoader() + sparse_bin_img1 = safeCastPydataTensorToInts(loader.sparse_image(num, pt1, 1)) + sparse_bin_img2 = safeCastPydataTensorToInts(loader.sparse_image(num, pt1+0.05, 2)) + bin_img1 = loader.dense_image(num, pt1, 1) + bin_img2 = loader.dense_image(num, pt1 + 0.05, 2) + + mid0 = int(bin_img1.shape[0]/2) + mid1 = int(bin_img1.shape[1]/2) + + win_len0 = int(window_size * bin_img1.shape[0]) + win_len1 = int(window_size * bin_img1.shape[1]) + + if plot: + print(sparse_bin_img1.shape) + print(sparse_bin_img2.shape) + + def sparse_bench(): + swin1 = sparse_bin_img1[mid0 - win_len0:mid0 + win_len0, mid1 - win_len1:mid1 + win_len1] + swin2 = sparse_bin_img2[mid0 - win_len0:mid0 + win_len0, mid1 - win_len1:mid1 + win_len1] + sparse_xor_img = np.logical_xor(swin1, swin2).astype('int') + return sparse_xor_img + + def dense_bench(): + win1 = bin_img1[mid0 - win_len0:mid0 + win_len0, mid1 - win_len1:mid1 + win_len1] + win2 = bin_img2[mid0 - win_len0:mid0 + win_len0, mid1 - win_len1:mid1 + win_len1] + xor_img = np.logical_xor(win1, win2).astype('int') + return xor_img + + ret = tacoBench(sparse_bench) + sparse_xor_img = sparse_bench() + xor_img = dense_bench() + + if plot: + print(sparse_xor_img) + print("sparse img1 nnz =", sparse_bin_img1.nnz, " ", np.sum(bin_img1 != 0)) + print("sparse img2 nnz =", sparse_bin_img2.nnz, " ", np.sum(bin_img2 != 0)) + num_elements = float(np.prod(bin_img1.shape)) + print("Sparse xor NNZ = ", sparse_xor_img.nnz, "\t", "Dense xor NNZ = ", np.sum(xor_img != 0)) + print("Sparsity img 1 ", np.sum(bin_img1 != 0) / num_elements) + print("Sparsity img 2 ", np.sum(bin_img2 != 0) / num_elements) + print("Sparsity xor ", np.sum(xor_img != 0) / num_elements) + sparse_xor_img = sparse_xor_img.todense() + t1 = round(loader.max[num]*pt1, 2) + t2 = round(loader.max[num]*(pt1 + 0.05), 2) + print(xor_img) + #plot_image(loader.img[num], bin_img1, bin_img2, xor_img, sparse_xor_img, t1, t2) + + assert(sparse_xor_img.nnz == np.sum(xor_img != 0)) + +@pytest.mark.parametrize("num", list(range(1, 253))) +@pytest.mark.parametrize("pt1", [0.75]) +@pytest.mark.parametrize("window_size", [0.45, 0.4, 0.35, 0.3]) +def bench_edge_detection_window_dense(tacoBench, num, pt1, window_size): + loader = ImagePydataSparseTensorLoader() + bin_img1 = loader.dense_image(num, pt1, 1) + bin_img2 = loader.dense_image(num, pt1 + 0.05, 2) + + mid0 = int(bin_img1.shape[0]/2) + mid1 = int(bin_img1.shape[1]/2) + + win_len0 = int(window_size * bin_img1.shape[0]) + win_len1 = int(window_size * bin_img1.shape[1]) + + def dense_bench(): + win1 = bin_img1[mid0 - win_len0:mid0 + win_len0, mid1 - win_len1:mid1 + win_len1] + win2 = bin_img2[mid0 - win_len0:mid0 + win_len0, mid1 - win_len1:mid1 + win_len1] + xor_img = np.logical_xor(win1, win2).astype('int') + return xor_img + + tacoBench(dense_bench) + +# USED FOR TESTING ITTERATION LATTICE CONSTRUCTION TACO CODE ONLY +def testOp(a, b, c): + return np.logical_and(np.logical_not(np.logical_and(a, c).astype('int')).astype('int'), np.logical_not(np.logical_and(b, c).astype('int')).astype('int')).astype('int') + +@pytest.mark.skip(reason="Used for verification only") +@pytest.mark.parametrize("num", list(range(1, 11))) +@pytest.mark.parametrize("pt1", [0.5]) +def bench_test_fused_pydata(tacoBench, num, pt1): + loader = ImagePydataSparseTensorLoader() + sparse_bin_img1 = safeCastPydataTensorToInts(loader.sparse_image(num, pt1, 1)) + sparse_bin_img2 = safeCastPydataTensorToInts(loader.sparse_image(num, pt1+0.05, 2)) + sparse_bin_window = loader.sparse_window(num, 3) + bin_img1 = loader.dense_image(num, pt1, 1) + bin_img2 = loader.dense_image(num, pt1 + 0.05, 2) + bin_window = loader.dense_window(num) + + def sparse_bench(): + return testOp(sparse_bin_img1, sparse_bin_img2, sparse_bin_window).astype('int') + + def dense_bench(): + return testOp(bin_img1, bin_img2, bin_window).astype('int') + + ret = tacoBench(sparse_bench) + sparse_xor_img = sparse_bench() + xor_img = dense_bench() + + # Write result to TNS file to see what's different + shape = xor_img.shape + result = sparse.COO.from_numpy(xor_img, fill_value=0) + dok = sparse.DOK(result) + TnsFileDumper().dump_dict_to_file(shape, dok.data, os.path.join("temp", "numpy-result-{}.tns".format(num))) + + + num_elements = float(np.prod(bin_img1.shape)) + f = sparse_xor_img.fill_value + print("shape1", sparse_bin_img1.shape) + print("shape2", sparse_bin_img2.shape) + print("sparse img1 nnz =", sparse_bin_img1.nnz, " ", np.sum(bin_img1 != 0)) + print("sparse img2 nnz =", sparse_bin_img2.nnz, " ", np.sum(bin_img2 != 0)) + print("sparse win nnz =", sparse_bin_window.nnz, " ", np.sum(bin_window != 0)) + print("Total num elements", num_elements) + print("Fill value", f) + print("Sparse xor NNF = ", sparse_xor_img.nnz, "\t", "Dense xor NNF = ", np.sum(xor_img != int(f))) + print("Dense xor NNZ = ", np.sum(xor_img != 0)) + assert(sparse_xor_img.nnz == np.sum(xor_img != 1)) + + +#@pytest.mark.skip(reason="for getting the input matrices statistics only") +@pytest.mark.parametrize("num", list(range(1, 253))) +@pytest.mark.parametrize("pt1", [0.75]) +def bench_edge_detection_statistics(tacoBench, num, pt1): + loader = ImagePydataSparseTensorLoader() + sparse_bin_img1 = safeCastPydataTensorToInts(loader.sparse_image(num, pt1, 1)) + sparse_bin_img2 = safeCastPydataTensorToInts(loader.sparse_image(num, pt1+0.05, 2)) + sparse_bin_window = loader.sparse_window(num, 3) + + print(sparse_bin_img1.shape) + print(sparse_bin_img2.shape) + extra_info = dict() + extra_info['nnz1'] = sparse_bin_img1.nnz + extra_info['nnz2'] = sparse_bin_img2.nnz + extra_info['nnz3'] = sparse_bin_window.nnz + extra_info['dimx'] = sparse_bin_window.shape[0] + extra_info['dimy'] = sparse_bin_window.shape[1] + + def sparse_bench(): + sbi1 = np.logical_and(sparse_bin_img1, sparse_bin_window) + sbi2 = np.logical_and(sparse_bin_img2, sparse_bin_window) + sparse_xor_img = np.logical_xor(sbi1, sbi2).astype('int') + return sparse_xor_img + + tacoBench(sparse_bench, extra_info) + +@pytest.mark.skip(reasoun="For image generation only") +@pytest.mark.parametrize("num", [42, 44, 50, 63, 92]) +@pytest.mark.parametrize("pt1", [0.75]) +def bench_edge_detection_plot(tacoBench, num, pt1, plot): + loader = ImagePydataSparseTensorLoader() + sparse_bin_img1 = safeCastPydataTensorToInts(loader.sparse_image(num, pt1, 1)) + sparse_bin_img2 = safeCastPydataTensorToInts(loader.sparse_image(num, pt1+0.05, 2)) + sparse_bin_window = loader.sparse_window(num, 3) + bin_img1 = loader.dense_image(num, pt1, 1) + bin_img2 = loader.dense_image(num, pt1 + 0.05, 2) + bin_window = loader.dense_window(num) + + if plot: + print(sparse_bin_img1.shape) + print(sparse_bin_img2.shape) + + def sparse_bench(): + sbi1 = np.logical_and(sparse_bin_img1, sparse_bin_window) + sbi2 = np.logical_and(sparse_bin_img2, sparse_bin_window) + sparse_xor_img = np.logical_xor(sbi1, sbi2).astype('int') + return sparse_xor_img + + def xor(): + xor_img = np.logical_xor(sparse_bin_img1, sparse_bin_img2).astype('int') + return xor_img + + ret = tacoBench(sparse_bench) + sparse_xor_img = sparse_bench() + xor_img = xor() + + if plot: + num_elements = float(np.prod(bin_img1.shape)) + print("Sparse xor NNZ = ", sparse_xor_img.nnz, "\t", "Dense xor NNZ = ", np.sum(xor_img != 0)) + print("Sparsity img 1 ", np.sum(bin_img1 != 0) / num_elements) + print("Sparsity img 2 ", np.sum(bin_img2 != 0) / num_elements) + print("Sparsity xor ", np.sum(xor_img != 0) / num_elements) + sparse_xor_img = sparse_xor_img.todense() + t1 = round(loader.max[num]*pt1, 2) + t2 = round(loader.max[num]*(pt1 + 0.05), 2) + #plot_image(loader.img[num], bin_img1, bin_img2, xor_img.todense(), sparse_xor_img, t1, t2, bin_window) -if __name__=="__main__": - main() diff --git a/numpy/minmax.py b/numpy/minmax.py new file mode 100644 index 0000000..2c6b4d0 --- /dev/null +++ b/numpy/minmax.py @@ -0,0 +1,56 @@ +import numpy as np +from scipy.sparse import random, csr_matrix +import sparse +import pytest +import os +from util import MinMaxPydataSparseTensorLoader, MinMaxScipySparseTensorLoader + +@pytest.mark.parametrize("dims", [1, 3, 5]) +def bench_pydata_minmax(tacoBench, dims): + loader = MinMaxPydataSparseTensorLoader() + dims_list = [20] + [20] + [43 for ele in range(dims)] + + matrix = loader.tensor(dims_list) + extra_info = dict() + extra_info["nnz"] = matrix.nnz + def bench(): + reduced = matrix + for m in range(len(dims_list)): + if m % 2 == 0: + reduced = np.max(reduced, -1) + else: + reduced = np.min(reduced, -1) + return reduced + tacoBench(bench, extra_info, True) + +@pytest.mark.parametrize("dims", [1, 3, 5]) +def bench_scipy_minmax(tacoBench, dims): + loader = MinMaxScipySparseTensorLoader() + dims_list = [20] + [20] + [43 for ele in range(dims)] + + matrix = loader.tensor(dims_list) + extra_info = dict() + extra_info["nnz"] = matrix.nnz + def bench(): + reduced = matrix + for m in range(len(dims_list)): + if m % 2 == 0: + reduced = reduced.min(-1) + else: + reduced = reduced.max(-1) + return reduced + tacoBench(bench, extra_info, True) + +@pytest.mark.skip(reason="Only to get matrix statistics") +@pytest.mark.parametrize("dims", [1, 3, 5]) +def bench_minmax_statistics(tacoBench, dims): + loader = MinMaxPydataSparseTensorLoader() + dims_list = [20] + [20] + [43 for ele in range(dims)] + matrix = loader.tensor(dims_list) + + extra_info = dict() + extra_info["nnz"] = matrix.nnz + + def nop(): + return 0 + tacoBench(nop, extra_info) diff --git a/numpy/ufuncs.py b/numpy/ufuncs.py index 15fa531..7fa5819 100644 --- a/numpy/ufuncs.py +++ b/numpy/ufuncs.py @@ -170,7 +170,7 @@ def bench(): tacoBench(bench, extra_info) # Run benchmarks against the SuiteSparse collection. -@pytest.mark.parametrize("ufunc", [numpy.logical_xor, numpy.ldexp, numpy.right_shift]) +@pytest.mark.parametrize("ufunc", [numpy.power, numpy.logical_xor, numpy.ldexp, numpy.right_shift]) def bench_pydata_suitesparse_ufunc_sparse(tacoBench, ufunc): tensor = SuiteSparseTensor(os.getenv('SUITESPARSE_TENSOR_PATH')) ssTensor, other = inputCache.load(tensor, True) diff --git a/numpy/util.py b/numpy/util.py index 8fa593d..6d305c1 100644 --- a/numpy/util.py +++ b/numpy/util.py @@ -5,7 +5,9 @@ import glob import numpy import cv2 -import matplotlib.pyplot as plt + +# NEEDS TO BE COMMENTED OUT FOR LANKA +# import matplotlib.pyplot as plt # Get the path to the directory holding random tensors. Error out # if this isn't set. @@ -45,7 +47,7 @@ class TnsFileDumper: def __init__(self): pass - def dump_dict_to_file(self, shape, data, path): + def dump_dict_to_file(self, shape, data, path, write_shape = False): # Sort the data so that the output is deterministic. sorted_data = sorted([list(coords) + [value] for coords, value in data.items()]) with open(path, 'w+') as f: @@ -54,9 +56,10 @@ def dump_dict_to_file(self, shape, data, path): strings = coords + [str(line[-1])] f.write(" ".join(strings)) f.write("\n") - shape_strings = [str(elem) for elem in shape] + ['0'] - f.write(" ".join(shape_strings)) - f.write("\n") + if write_shape: + shape_strings = [str(elem) for elem in shape] + ['0'] + f.write(" ".join(shape_strings)) + f.write("\n") # ScipySparseTensorLoader loads a sparse tensor from a file into a # scipy.sparse CSR matrix. @@ -298,10 +301,7 @@ def safeCastPydataTensorToInts(tensor): # load_image loads an image with the correct color format for the numpy/image.py # benchmark def load_image(image_folder, num): - if image_folder == 'no': - image_folder = "./data/image/no" - else: - image_folder = "./data/image/yes" + image_folder = "./data/image/combined" name = "image" + str(num) + '.' file_names = [fn for fn in os.listdir(image_folder) @@ -311,32 +311,6 @@ def load_image(image_folder, num): img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) return img -# plot_image plots the given original, binned, xor, and sparse xor images -# for the numpy/image.py. Used for debugging only with the --plot flag -def plot_image(img, img1, img2, xor_img, sparse_xor_img, t1, t2, window=None): - f, ax = plt.subplots(2, 3) - ax[0, 0].imshow(img1, 'gray') - ax[0, 0].title.set_text("Binned Image 1. t1 = " + str(t1)) - - ax[0, 1].imshow(img2, 'gray') - ax[0, 1].title.set_text("Binned Image 2. t2 = " + str(t2)) - - ax[1, 0].imshow(img, 'gray') - ax[1, 0].title.set_text("Saturdated Image") - - ax[1, 1].imshow(xor_img, 'gray') - ax[1, 1].title.set_text("XOR Image") - - ax[1, 2].imshow(sparse_xor_img, 'gray') - ax[1, 2].title.set_text("Sparse XOR Image") - - if window is not None: - ax[0, 2].imshow(window, 'gray') - ax[0, 2].title.set_text("Fused Window Image") - - f.tight_layout() - plt.show() - # thresh thresholdes the given image by a threshold def thresh(images, t=85): if len(images.shape) < 3: @@ -399,7 +373,8 @@ def sparse_image(self, num, pt, variant=None, path='no'): bin_img = self.dense_image(num, pt, variant, path) result = sparse.COO.from_numpy(bin_img) dok = sparse.DOK(result) - TnsFileDumper().dump_dict_to_file(self.shape[num], dok.data, key) + write_shape = bin_img.flat[-1] == 0 + TnsFileDumper().dump_dict_to_file(self.shape[num], dok.data, key, write_shape) return result # sparse_window and dense_window must be called after the image calls @@ -416,7 +391,8 @@ def sparse_window(self, num, variant=3): result_np = self.dense_window(num) result = sparse.COO.from_numpy(result_np) dok = sparse.DOK(result) - TnsFileDumper().dump_dict_to_file(shape, dok.data, key) + write_shape = result_np.flat[-1] == 0 + TnsFileDumper().dump_dict_to_file(shape, dok.data, key, write_shape) return result def dense_window(self, num): @@ -424,10 +400,130 @@ def dense_window(self, num): result_np = numpy.zeros(shape) m0 = int(shape[0] / 2) m1 = int(shape[1] / 2) - dm0 = int(0.2*m0) - dm1 = int(0.2*m1) + dm0 = int(0.1*m0) + dm1 = int(0.1*m1) result_np[m0+dm0:m0+3*dm0, m1+dm1:m1+3*dm1] = 1 result_np[m0-3*dm0:m0-dm0, m1+dm1:m1+3*dm1] = 1 result_np[m0-3*dm0:m0-dm0, m1-3*dm1:m1-dm1] = 1 result_np[m0+dm0:m0+3*dm0, m1-3*dm1:m1-dm1] = 1 return result_np + +# NEEDS TO BE COMMENTED OUT FOR LANKA +# plot_image plots the given original, binned, xor, and sparse xor images +# for the numpy/image.py. Used for debugging only with the --plot flag +#def plot_image(img, img1, img2, xor_img, sparse_xor_img, t1, t2, window=None): +# f, ax = plt.subplots(2, 3) +# ax[0, 0].imshow(img, 'gray') +# ax[0, 0].title.set_text("Original Image") +# +# ax[0, 1].imshow(img1, 'gray') +# ax[0, 1].title.set_text("Binned Image 1. t = " + str(t1)) +# +# ax[0, 2].imshow(img2, 'gray') +# ax[0, 2].title.set_text("Binned Image 2. t = " + str(t2)) +# +# ax[1, 0].imshow(xor_img, 'gray') +# ax[1, 0].title.set_text("Edge Detected Image") +# +# ax[1, 2].imshow(sparse_xor_img, 'gray') +# ax[1, 2].title.set_text("Masked Edge Image") +# +# if window is not None: +# ax[1, 1].imshow(window, 'gray') +# ax[1, 1].title.set_text("ROI Mask") +# +# f.tight_layout() +# plt.show() + +# construct_minmax_tensor_key constructs a unique key that represents +# an image tensor parameterized by the tensor order +# The key itself is formatted by the string 'minmax', followed by the +# tensor order. For example, a parameter of 3 +# would have a key of minmax-3.tns. +def construct_minmax_tensor_key(dims, variant=None): + path = TENSOR_PATH + name = "minmax" + if variant is None: + key = "{}-{}.tns".format(name, len(dims)) + else: + key = "{}-{}-{}.tns".format(name,len(dims), variant) + return os.path.join(path, name, key) + +def generate_crds_helper(shape, level, crds): + sampling = 0.1 + num = 4 + std = 2 + + if level == len(shape) - 1: + return crds + else: + result = [] + d = shape[level] + for c in crds: + # Get number of locations + num_locs = int(sampling*d) + # Get location uniformly of where to sample around + locs = numpy.random.rand(num_locs)*d + + # sample around each location using a normal distribution around that value with a std of 2 + for loc in locs: + points = std * numpy.random.randn(num) + loc + points = points.astype('int') + points = numpy.clip(points, 0, d - 1) + for p in points: + result.append(c+[p]) + + return generate_crds_helper(shape, level + 1, result) + +# RandomPydataSparseTensorLoader should be used to generate +# random pydata.sparse tensors. It caches the loaded tensors +# in the file system so that TACO benchmarks using tensors +# with the same parameters can use the exact same tensors. +class MinMaxPydataSparseTensorLoader: + def __init__(self): + self.loader = PydataSparseTensorLoader() + + def tensor(self, shape, variant=None): + key = construct_minmax_tensor_key(shape) + # If a tensor with these properties exists already, then load it. + if os.path.exists(key): + return self.loader.load(key) + else: + # Otherwise, we must create a random tensor with the desired properties, + # dump it to the output file, then return it. + crds = self.generate_crds(shape) + values = dict() + for c in crds: + ind_list = numpy.random.rand(2)*shape[-1] + ind_list = ind_list.astype('int') + start = numpy.min(ind_list) + stop = numpy.max(ind_list) + for i in range(start, stop): + temp = tuple(c[1:] + [i]) + values[temp] = int(20*numpy.random.rand() - 10) + + dok = sparse.DOK(shape, values) + ind = tuple([e - 1 for e in shape]) + write_shape = False if ind in values.keys() else True + TnsFileDumper().dump_dict_to_file(shape, dok.data, key, write_shape) + result = dok.asformat('coo') + return result + + + def generate_crds(self, shape): + return generate_crds_helper(shape, 0, [[0]]) + +class MinMaxScipySparseTensorLoader: + def __init__(self): + self.loader = ScipySparseTensorLoader("csr") + + def tensor(self, shape, variant=None): + key = construct_minmax_tensor_key(shape) + # If a tensor with these properties exists already, then load it. + if os.path.exists(key): + return self.loader.load(key) + else: + # Otherwise, we must create a random tensor with the desired properties, + # dump it to the output file, then return it. + raise NotImplementedError + diff --git a/scripts/taco_bench_aggregator.py b/scripts/bench_csv_aggregator.py similarity index 84% rename from scripts/taco_bench_aggregator.py rename to scripts/bench_csv_aggregator.py index 89b20d9..6010fd6 100644 --- a/scripts/taco_bench_aggregator.py +++ b/scripts/bench_csv_aggregator.py @@ -5,7 +5,7 @@ import tqdm import argparse -def aggregateTacoBenches(folder, outfile, labelSet=None): +def aggregateTacoBenches(folder, outfile, taco=False, labelSet=None): first = True outputFile = open(outfile, 'w+') writer = csv.writer(outputFile, delimiter=',') @@ -20,8 +20,9 @@ def aggregateTacoBenches(folder, outfile, labelSet=None): with open(fname, 'r') as f: # Discard the first 10 lines. This corresponds to the # google-benchmark generated header. - # for i in range(0, 10): - # f.readline() + if taco: + for i in range(0, 10): + f.readline() # Open the rest of the file as a CSV. reader = csv.reader(f) # Attempt to read the header from CSV. If this fails, @@ -35,11 +36,13 @@ def aggregateTacoBenches(folder, outfile, labelSet=None): # entries that have a skip marker in the label. # labelIdx = header.index("label", 0) if first: + header.append("original_filename") writer.writerow(header) first = False for row in reader: # if "SKIPPED" not in row[labelIdx]: # validLabels.add(row[labelIdx]) + row.append(fname) writer.writerow(row) # Write out the set of valid labels. if labelSet is not None: @@ -52,5 +55,6 @@ def aggregateTacoBenches(folder, outfile, labelSet=None): parser.add_argument('target_directory', type=str, help="Directory containing CSV's to aggregate") parser.add_argument('output_csv_name', type=str, help="Name of the CSV to generate") parser.add_argument('--label_set_file', type=str, default=None, help='Set to output all valid labels seen to a file') +parser.add_argument('--taco', action='store_true', help='Flag to aggregate TACO csvs, default is numpy') args = parser.parse_args() -aggregateTacoBenches(args.target_directory, args.output_csv_name, labelSet=args.label_set_file) +aggregateTacoBenches(args.target_directory, args.output_csv_name, taco=args.taco, labelSet=args.label_set_file) diff --git a/scripts/image_runner.sh b/scripts/image_runner.sh new file mode 100644 index 0000000..7b304f2 --- /dev/null +++ b/scripts/image_runner.sh @@ -0,0 +1,18 @@ +#!/bin/bash +#SBATCH -N 1 +#SBATCH --mem 120000 +#SBATCH -p lanka-v3 +#SBATCH --exclusive + +set -u + +source /data/scratch/rohany/array-programming-benchmarks/venv/bin/activate + +out=image-bench/ + +mkdir -p "$out" +mkdir -p "data/image/tensors" + +jsonout="$out/image-bench-statistics-all.json" + +LANKA=ON NUMPY_JSON="$jsonout" make python-bench BENCHES="numpy/image.py::bench_edge_detection_statistics" diff --git a/scripts/image_taco_runner.sh b/scripts/image_taco_runner.sh new file mode 100644 index 0000000..4537665 --- /dev/null +++ b/scripts/image_taco_runner.sh @@ -0,0 +1,17 @@ +#!/bin/bash +#SBATCH -N 1 +#SBATCH --mem 120000 +#SBATCH -p lanka-v3 +#SBATCH --exclusive + +set -u + +out=image-bench/taco + +mkdir -p "$out" + +for i in {1..253} +do + csvout="$out/result-taco-img$i.csv" + LANKA=ON IMAGE_NUM="$i" TACO_TENSOR_PATH="data/" TACO_OUT="$csvout" make -j8 taco-bench BENCHES="bench_image" +done diff --git a/scripts/minimax_runner.sh b/scripts/minimax_runner.sh new file mode 100644 index 0000000..3f27f40 --- /dev/null +++ b/scripts/minimax_runner.sh @@ -0,0 +1,18 @@ +#!/bin/bash +#SBATCH -N 1 +#SBATCH --mem 120000 +#SBATCH -p lanka-v3 +#SBATCH --exclusive + +set -u + +source /data/scratch/rohany/array-programming-benchmarks/venv/bin/activate + +out=minmax-bench/numpy + +mkdir -p "$out" +mkdir -p "data/minmax" + +jsonout="$out/results-scipy-only.json" + +LANKA=ON NUMPY_JSON="$jsonout" make python-bench BENCHES="numpy/minmax.py::bench_scipy_minmax" diff --git a/scripts/minimax_taco_runner.sh b/scripts/minimax_taco_runner.sh new file mode 100644 index 0000000..fae59ca --- /dev/null +++ b/scripts/minimax_taco_runner.sh @@ -0,0 +1,17 @@ +#!/bin/bash +#SBATCH -N 1 +#SBATCH --mem 120000 +#SBATCH -p lanka-v3 +#SBATCH --exclusive + +set -u + +out=minmax-bench/taco + +mkdir -p "$out" + +for i in {1..5..2} +do + csvout="$out/result-taco-minmax$i.csv" + LANKA=ON MINMAX_ORDER="$i" TACO_CONCRETIZE_HACK=1 TACO_TENSOR_PATH="data/" TACO_OUT="$csvout" make -j8 taco-bench BENCHES="bench_minimax" +done diff --git a/scripts/tensor_plotter.py b/scripts/tensor_plotter.py new file mode 100644 index 0000000..789f9ed --- /dev/null +++ b/scripts/tensor_plotter.py @@ -0,0 +1,18 @@ +import matplotlib.pyplot as plt +import numpy as np +from util import MinMaxPydataSparseTensorLoader +from mpl_toolkits.mplot3d import Axes3D + +dims = 1 +loader = MinMaxPydataSparseTensorLoader() +dims_list = [20] + [20] + [43] +matrix = loader.tensor(dims_list) +print(matrix) +matrix = matrix.todense() +print(matrix.shape) +fig = plt.figure() +ax = fig.add_subplot(111, projection='3d') +x,y,z = np.meshgrid(range(matrix.shape[0]), range(matrix.shape[1]), range(matrix.shape[2])) +ax.scatter(x,y,z, c=matrix.flat) + +plt.show() diff --git a/scripts/util.py b/scripts/util.py new file mode 100644 index 0000000..75f5135 --- /dev/null +++ b/scripts/util.py @@ -0,0 +1,147 @@ +import scipy.sparse +import scipy.io +import sparse +import os +import glob +import numpy +import cv2 + +# NEEDS TO BE COMMENTED OUT FOR LANKA +# import matplotlib.pyplot as plt + +# Get the path to the directory holding random tensors. Error out +# if this isn't set. +TENSOR_PATH = os.environ['TACO_TENSOR_PATH'] + +# TnsFileLoader loads a tensor stored in .tns format. +class TnsFileLoader: + def __init__(self): + pass + + def load(self, path): + coordinates = [] + values = [] + dims = [] + first = True + with open(path, 'r') as f: + for line in f: + data = line.split(' ') + if first: + first = False + dims = [0] * (len(data) - 1) + for i in range(len(data) - 1): + coordinates.append([]) + + for i in range(len(data) - 1): + coordinates[i].append(int(data[i]) - 1) + dims[i] = max(dims[i], coordinates[i][-1] + 1) + # TODO (rohany): What if we want this to be an integer? + values.append(float(data[-1])) + return dims, coordinates, values + +# TnsFileDumper dumps a dictionary of coordinates to values +# into a coordinate list tensor file. +class TnsFileDumper: + def __init__(self): + pass + + def dump_dict_to_file(self, shape, data, path, write_shape = False): + # Sort the data so that the output is deterministic. + sorted_data = sorted([list(coords) + [value] for coords, value in data.items()]) + with open(path, 'w+') as f: + for line in sorted_data: + coords = [str(elem + 1) for elem in line[:len(line) - 1]] + strings = coords + [str(line[-1])] + f.write(" ".join(strings)) + f.write("\n") + if write_shape: + shape_strings = [str(elem) for elem in shape] + ['0'] + f.write(" ".join(shape_strings)) + f.write("\n") + +# PydataSparseTensorLoader loads a sparse tensor from a file into +# a pydata.sparse tensor. +class PydataSparseTensorLoader: + def __init__(self): + self.loader = TnsFileLoader() + + def load(self, path): + dims, coords, values = self.loader.load(path) + return sparse.COO(coords, values, tuple(dims)) + +# construct_minmax_tensor_key constructs a unique key that represents +# an image tensor parameterized by the tensor order +# The key itself is formatted by the string 'minmax', followed by the +# tensor order. For example, a parameter of 3 +# would have a key of minmax-3.tns. +def construct_minmax_tensor_key(dims, variant=None): + path = TENSOR_PATH + name = "minmax" + if variant is None: + key = "{}-{}.tns".format(name, len(dims)) + else: + key = "{}-{}-{}.tns".format(name,len(dims), variant) + return os.path.join(path, name, key) + +def generate_crds_helper(shape, level, crds): + sampling = 0.1 + num = 3 + std = 2 + last_layer_sampling = 0.4 + + if level == len(shape) - 1: + return crds + else: + result = [] + d = shape[level] + for c in crds: + # Get number of locations + num_locs = int(sampling*d) + # Get location uniformly of where to sample around + locs = numpy.random.rand(num_locs)*d + + # sample around each location using a normal distribution around that value with a std of 2 + for loc in locs: + points = std * numpy.random.randn(num) + loc + points = points.astype('int') + points = numpy.clip(points, 0, d - 1) + for p in points: + result.append(c+[p]) + + return generate_crds_helper(shape, level + 1, result) + +# RandomPydataSparseTensorLoader should be used to generate +# random pydata.sparse tensors. It caches the loaded tensors +# in the file system so that TACO benchmarks using tensors +# with the same parameters can use the exact same tensors. +class MinMaxPydataSparseTensorLoader: + def __init__(self): + self.loader = PydataSparseTensorLoader() + + def tensor(self, shape, variant=None): + key = construct_minmax_tensor_key(shape) + # If a tensor with these properties exists already, then load it. + if os.path.exists(key): + return self.loader.load(key) + else: + # Otherwise, we must create a random tensor with the desired properties, + # dump it to the output file, then return it. + crds = self.generate_crds(shape) + values = dict() + for c in crds: + ind_list = numpy.random.rand(2)*shape[-1] + ind_list = ind_list.astype('int') + start = numpy.min(ind_list) + stop = numpy.max(ind_list) + for i in range(start, stop): + temp = tuple(c[1:] + [i]) + values[temp] = int(20*numpy.random.rand()) + + dok = sparse.DOK(shape, values) + TnsFileDumper().dump_dict_to_file(shape, dok.data, key) + result = dok.asformat('coo') + return result + + + def generate_crds(self, shape): + return generate_crds_helper(shape, 0, [[0]]) diff --git a/taco/bench.cpp b/taco/bench.cpp index 0da035e..d630d3f 100644 --- a/taco/bench.cpp +++ b/taco/bench.cpp @@ -86,4 +86,28 @@ taco::TensorBase loadImageTensor(std::string name, int num, taco::Format format, auto tensor = taco::read(constructImageTensorKey(num, variant, threshold), format, true); tensor.setName(name); return tensor; +} + +std::string constructMinMaxTensorKey(int order, int variant) { + auto path = getTacoTensorPath(); + std::stringstream result; + result << path; + if (path[path.size() - 1] != '/') { + result << "/"; + } + result << "minmax/"; + if (variant == 0) { + result << "minmax-" << order << ".tns"; + } else { + result << "minmax-" << order << "-" << variant << ".tns"; + } + return result.str(); +} + +taco::TensorBase loadMinMaxTensor(std::string name, int order, taco::Format format, int variant) { + // For now, just say that the python code must generate the random + // tensor before use. + auto tensor = taco::read(constructMinMaxTensorKey(order, variant), format, true); + tensor.setName(name); + return tensor; } \ No newline at end of file diff --git a/taco/bench.h b/taco/bench.h index 0dd57a1..7a99ba6 100644 --- a/taco/bench.h +++ b/taco/bench.h @@ -51,7 +51,7 @@ std::string getValidationOutputPath(); std::string cleanPath(std::string path); taco::TensorBase loadRandomTensor(std::string name, std::vector dims, float sparsity, taco::Format format, int variant=0); taco::TensorBase loadImageTensor(std::string name, int num, taco::Format format, float threshold, int variant=0); - +taco::TensorBase loadMinMaxTensor(std::string name, int order, taco::Format format, int variant=0); template taco::Tensor castToType(std::string name, taco::Tensor tensor) { taco::Tensor result(name, tensor.getDimensions(), tensor.getFormat()); @@ -74,6 +74,24 @@ taco::Tensor castToType(std::string name, taco::Tensor tensor) { return result; } +template +taco::Tensor castToTypeZero(std::string name, taco::Tensor tensor) { + taco::Tensor result(name, tensor.getDimensions(), tensor.getFormat()); + std::vector coords(tensor.getOrder()); + for (auto& value : taco::iterate(tensor)) { + for (int i = 0; i < tensor.getOrder(); i++) { + coords[i] = value.first[i]; + } + // Attempt to cast the value to an integer. However, if the cast causes + // the value to equal 0, then this will ruin the sparsity pattern of the + // tensor, as the 0 values will get compressed out. So, if a cast would + // equal 0, insert 1 instead to preserve the sparsity pattern of the tensor. + result.insert(coords, static_cast(value.second)); + } + result.pack(); + return result; +} + template taco::Tensor shiftLastMode(std::string name, taco::Tensor original) { taco::Tensor result(name, original.getDimensions(), original.getFormat()); diff --git a/taco/graphblas.cpp b/taco/graphblas.cpp index da49934..7815373 100644 --- a/taco/graphblas.cpp +++ b/taco/graphblas.cpp @@ -12,117 +12,16 @@ extern "C" { #include "GraphBLAS.h" } +#include #include #include #include +#include #include using namespace taco; -ir::Expr addImpl(const std::vector& v) { - return ir::Add::make(v[0], v[1]); -} -Func AddOp("add", addImpl, {Annihilator(std::numeric_limits::infinity()), Identity(0), Commutative(), Associative()}); - -ir::Expr minImpl(const std::vector& v) { - return ir::Min::make(v[0], v[1]); -} -Func MinOp("min", minImpl, {Identity(std::numeric_limits::infinity()), Commutative(), Associative()}); - -ir::Expr maskImpl(const std::vector& v) { - return v[0]; -} -struct MaskAlgebra { - IterationAlgebra operator()(const std::vector& r) { - return Intersect(r[0], Complement(r[1])); - } -}; -Func MaskOp("mask", maskImpl, MaskAlgebra()); - -//static void bench_mxv_taco(benchmark::State& state) { -// Format dv({Dense}); -// -// Tensor T = read("/data/scratch/s3chou/formats-bench/data/webbase_1M.mtx", CSR); -// Tensor A(T.getDimensions(), CSR, std::numeric_limits::infinity()); -// for (const auto& c : T) { -// A.insert(c.first.toVector(), c.second); -// } -// A.pack(); -// -// // TODO: Only run for square matrices -// -// Tensor x({A.getDimension(1)}, dv, std::numeric_limits::infinity()); -// x.insert({0}, 0.0); -// x.pack(); -// -// IndexVar i, j; -// -// taco_set_num_threads(12); -// for (auto _ : state) { -// state.PauseTiming(); -// -// Tensor y({A.getDimension(0)}, dv, std::numeric_limits::infinity()); -// y(i) = Reduction(MinOp(), j, AddOp(A(i,j), x(j))); -// //y(i) = MinOp(Reduction(MinOp(), j, AddOp(A(i,j), x(j))), x(i)); -// //y(i) = MaskOp(Reduction(MinOp(), j, AddOp(A(i,j), x(j))), x(i)); -// //y(i) = MinOp(MaskOp(Reduction(MinOp(), j, AddOp(A(i,j), x(j))), x(i)), x(i)); -// //y(i) = MaskOp(MinOp(Reduction(MinOp(), j, AddOp(A(i,j), x(j))), x(i)), x(i)); -// //y(i) = MinOp(FilterOp(x(i)) * Reduction(MinOp(), j, AddOp(A(i,j), x(j))), x(i)); -// -// y.compile(); -// y.assemble(); -// -// state.ResumeTiming(); -// -// y.compute(); -// } -// taco_set_num_threads(1); -//} -//TACO_BENCH(bench_mxv_taco); - -//static void bench_mxv_suitesparse(benchmark::State& state) { -// GrB_init(GrB_BLOCKING); -// GxB_Global_Option_set(GxB_HYPER_SWITCH, GxB_NEVER_HYPER); -// GxB_Global_Option_set(GxB_FORMAT, GxB_BY_ROW); -// -// int nthreads_max = 12; -// GxB_Global_Option_set(GxB_NTHREADS, nthreads_max); -// -// Tensor T = read("/data/scratch/s3chou/formats-bench/data/webbase_1M.mtx", CSR); -// GrB_Index M = T.getDimension(0); -// GrB_Index N = T.getDimension(1); -// GrB_Matrix A; -// GrB_Matrix_new(&A, GrB_FP64, M, N); -// std::vector I, J; -// std::vector V; -// for (const auto& c : T) { -// I.push_back(c.first[0]); -// J.push_back(c.first[1]); -// V.push_back(c.second); -// } -// GrB_Matrix_build_FP64(A, I.data(), J.data(), V.data(), V.size(), GrB_PLUS_FP64); -// //GrB_Index nnz; -// //GrB_Matrix_nvals(&nnz, A); -// -// GrB_Vector x; -// GrB_Vector_new(&x, GrB_FP64, N); -// GrB_Vector_assign_FP64(x, NULL, NULL, 1, GrB_ALL, N, NULL); -// //GrB_Vector_setElement_FP64( -// -// GrB_Vector y; -// GrB_Vector_new(&y, GrB_FP64, M); -// //GrB_Vector_assign_FP64(y, NULL, NULL, 0, GrB_ALL, M, NULL); -// -// GrB_Descriptor desc; -// GrB_Descriptor_set (desc, GrB_OUTP, GrB_REPLACE); -// -// for (auto _ : state) { -// GrB_mxv(y, NULL, NULL, GrB_MIN_PLUS_SEMIRING_FP64, A, x, desc); -// //GrB_vxm(x, NULL, NULL, GrB_MIN_PLUS_SEMIRING_FP64, x, A, desc); -// } -//} - -taco_tensor_t* to_taco_tensor(GrB_Matrix* mat) { +taco_tensor_t* to_csr_taco_tensor(GrB_Matrix* mat) { GrB_Type type; GrB_Index M, N, posSize, crdSize, valsSize; GrB_Index* pos; @@ -145,7 +44,26 @@ taco_tensor_t* to_taco_tensor(GrB_Matrix* mat) { return csrt; } -taco_tensor_t* to_taco_tensor(GrB_Vector* vec) { +taco_tensor_t* to_bitmap_taco_tensor(GrB_Vector* vec) { + GrB_Type type; + GrB_Index N, valsSize, validSize, nvals; + void* vals; + int8_t* valid; + GxB_Vector_export_Bitmap(vec, &type, &N, &valid, &vals, &validSize, &valsSize, &nvals, NULL); + + auto vect = new taco_tensor_t; + vect->dimensions = new int32_t[1]; + vect->dimensions[0] = N; + vect->indices = new uint8_t**[1]; + vect->indices[0] = new uint8_t*[1]; + + vect->indices[0][0] = (uint8_t*)valid; + vect->vals = (uint8_t*)vals; + + return vect; +} + +taco_tensor_t* to_dense_taco_tensor(GrB_Vector* vec) { GrB_Type type; GrB_Index N, valsSize; void* vals; @@ -154,11 +72,13 @@ taco_tensor_t* to_taco_tensor(GrB_Vector* vec) { auto vect = new taco_tensor_t; vect->dimensions = new int32_t[1]; vect->dimensions[0] = N; + vect->vals = (uint8_t*)vals; return vect; } +#if 0 taco_tensor_t indices_to_taco_tensor(GrB_Index* indices, GrB_Index size) { GrB_Index* pos = new GrB_Index[2]; pos[0] = 0; @@ -172,23 +92,29 @@ taco_tensor_t indices_to_taco_tensor(GrB_Index* indices, GrB_Index size) { return ind; } +#endif -taco_tensor_t new_vec_taco_tensor(GrB_Index N) { +taco_tensor_t new_bitmap_taco_tensor(GrB_Index N) { taco_tensor_t vec; vec.dimensions = new int32_t[1]; vec.dimensions[0] = N; + vec.indices = new uint8_t**[1]; + vec.indices[0] = new uint8_t*[1]; + + vec.indices[0][0] = nullptr; vec.vals = nullptr; return vec; } -taco_tensor_t new_mat_taco_tensor(GrB_Index M, GrB_Index N) { +taco_tensor_t new_csr_taco_tensor(GrB_Index M, GrB_Index N) { taco_tensor_t mat; mat.dimensions = new int32_t[2]; mat.dimensions[0] = M; mat.dimensions[1] = N; mat.indices = new uint8_t**[2]; mat.indices[1] = new uint8_t*[2]; + mat.indices[1][0] = nullptr; mat.indices[1][1] = nullptr; mat.vals = nullptr; @@ -196,128 +122,472 @@ taco_tensor_t new_mat_taco_tensor(GrB_Index M, GrB_Index N) { return mat; } -void free_mat_taco_tensor(taco_tensor_t mat) { +void free_bitmap_taco_tensor(taco_tensor_t vec) { + free(vec.indices[0][0]); + free(vec.vals); +} + +void free_csr_taco_tensor(taco_tensor_t mat) { free(mat.indices[1][0]); free(mat.indices[1][1]); free(mat.vals); } -Format dv({Dense}); -int nthreads = 12; +bool is_bitmap_vector(GrB_Vector* vec) { + int sparsity; + GxB_Vector_Option_get(*vec, GxB_SPARSITY_STATUS, &sparsity); + return (sparsity == GxB_BITMAP); +} + +bool is_dense_vector(GrB_Vector* vec) { + int sparsity; + GxB_Vector_Option_get(*vec, GxB_SPARSITY_STATUS, &sparsity); + return (sparsity == GxB_FULL); +} + +bool is_csr_matrix(GrB_Matrix* mat) { + int sparsity; + GxB_Format_Value fmt; + GxB_Matrix_Option_get(*mat, GxB_SPARSITY_STATUS, &sparsity); + GxB_Matrix_Option_get(*mat, GxB_FORMAT, &fmt); + return (sparsity == GxB_SPARSE && fmt == GxB_BY_ROW); +} + +struct BitmapArrays { + GrB_Index m; + int8_t* valid = nullptr; + void* vals = nullptr; +}; + +BitmapArrays get_bitmap_arrays(const taco_tensor_t vector) { + BitmapArrays vec; + + vec.m = vector.dimensions[0]; + vec.valid = (int8_t*)vector.indices[0][0]; + vec.vals = vector.vals; + + return vec; +} + +double compare_double_bitmap(BitmapArrays a, BitmapArrays b) { + if (a.m != b.m) { + return std::numeric_limits::infinity(); + } + + double* avals = (double*)a.vals; + double* bvals = (double*)b.vals; + + double ret = 0.0; + for (int i = 0 ; i < a.m; ++i) { + bool avalid = a.valid[i] && !std::isinf(avals[i]); + bool bvalid = b.valid[i] && !std::isinf(bvals[i]); + if (avalid != bvalid) { + return std::numeric_limits::infinity(); + } + if (avalid) { + if (avals[i] != 0.0) { + const double diff = std::abs(bvals[i] / avals[i] - 1.0); + if (diff > ret) { + ret = diff; + } + } else if (bvals[i] != 0.0) { + return std::numeric_limits::infinity(); + } + } + } + + return ret; +} + +double compare_bool_bitmap(BitmapArrays a, BitmapArrays b) { + if (a.m != b.m) { + return 1.0; + } + + bool* avals = (bool*)a.vals; + bool* bvals = (bool*)b.vals; + + for (int i = 0 ; i < a.m; ++i) { + bool avalid = a.valid[i] && avals[i]; + bool bvalid = b.valid[i] && bvals[i]; + if (avalid != bvalid) { + return 1.0; + } + } + + return 0.0; +} + +struct CSRArrays { + GrB_Index m, n; + GrB_Index* pos = nullptr; + GrB_Index* crd = nullptr; + void* vals = nullptr; +}; + +CSRArrays get_csr_arrays(const taco_tensor_t matrix) { + CSRArrays csr; + + csr.m = matrix.dimensions[0]; + csr.n = matrix.dimensions[1]; + csr.pos = (GrB_Index*)matrix.indices[1][0]; + csr.crd = (GrB_Index*)matrix.indices[1][1]; + csr.vals = matrix.vals; + + return csr; +} + +double compare_double_csr(CSRArrays a, CSRArrays b) { + //std::cout << a.m << " " << b.m << " " << a.n << " " << b.n << std::endl; + if (a.m != b.m || a.n != b.n) { + return std::numeric_limits::infinity(); + } + + double* avals = (double*)a.vals; + double* bvals = (double*)b.vals; + + double ret = 0.0; + for (int i = 0; i < a.m; ++i) { + int pA = a.pos[i]; + int pB = b.pos[i]; + while (pA < a.pos[i + 1] && pB < b.pos[i + 1]) { + while (pA < a.pos[i + 1] && std::isinf(avals[pA])) pA++; + while (pB < b.pos[i + 1] && std::isinf(bvals[pB])) pB++; + if (pA < a.pos[i + 1] && pB < b.pos[i + 1]) { + //std::cout << a.crd[pA] << " " << b.crd[pB] << " " << avals[pA] << " " << bvals[pB] << std::endl; + if (a.crd[pA] != b.crd[pB]) { + return std::numeric_limits::infinity(); + } else if (avals[pA] != 0.0) { + const double diff = std::abs(bvals[pB] / avals[pA] - 1.0); + if (diff > ret) { + ret = diff; + //std::cout << i << " " << a.crd[pA] << " " << b.crd[pB] << " " << avals[pA] << " " << bvals[pB] << std::endl; + } + } else if (bvals[pB] != 0.0) { + return std::numeric_limits::infinity(); + } + pA++; + pB++; + } + } + while (pA < a.pos[i + 1] && std::isinf(avals[pA])) pA++; + while (pB < b.pos[i + 1] && std::isinf(bvals[pB])) pB++; + if (pA != a.pos[i + 1] || pB != b.pos[i + 1]) { + return std::numeric_limits::infinity(); + } + } + + return ret; +} + +double compare_bool_csr(CSRArrays a, CSRArrays b) { + //std::cout << a.m << " " << b.m << " " << a.n << " " << b.n << std::endl; + if (a.m != b.m || a.n != b.n) { + return 1.0; + } + + bool* avals = (bool*)a.vals; + bool* bvals = (bool*)b.vals; + + for (int i = 0; i < a.m; ++i) { + int pA = a.pos[i]; + int pB = b.pos[i]; + while (pA < a.pos[i + 1] && pB < b.pos[i + 1]) { + while (pA < a.pos[i + 1] && !avals[pA]) pA++; + while (pB < b.pos[i + 1] && !bvals[pB]) pB++; + if (pA < a.pos[i + 1] && pB < b.pos[i + 1]) { + //std::cout << a.crd[pA] << " " << b.crd[pB] << " " << avals[pA] << " " << bvals[pB] << std::endl; + if (a.crd[pA] != b.crd[pB]) { + return 1.0; + } + pA++; + pB++; + } + } + while (pA < a.pos[i + 1] && !avals[pA]) pA++; + while (pB < b.pos[i + 1] && !bvals[pB]) pB++; + if (pA != a.pos[i + 1] || pB != b.pos[i + 1]) { + return 1.0; + } + } + + return 0.0; +} +const int nthreads = 12; struct GraphBLASFixture { GraphBLASFixture() { - const auto path = "/data/scratch/s3chou/formats-bench/data/pwtk.mtx"; - //const auto path = "/data/scratch/s3chou/formats-bench/data/webbase_1M.mtx"; + //const auto path = "/data/scratch/s3chou/formats-bench/data/pwtk.mtx"; + const auto path = "/data/scratch/s3chou/formats-bench/data/webbase_1M.mtx"; //const auto path = "/data/scratch/s3chou/formats-bench/data/coPapersDBLP/coPapersDBLP.mtx"; - Tensor T = read(path, CSR); + //const auto path = "/data/scratch/changwan/florida_all/soc-LiveJournal1/soc-LiveJournal1.mtx"; + //const auto path = "/data/scratch/changwan/florida_all/com-LiveJournal/com-LiveJournal.mtx"; + //const auto path = "/data/scratch/changwan/florida_all/indochina-2004/indochina-2004.mtx"; // TODO: Only run for square matrices - A_trop_taco = Tensor(T.getDimensions(), CSR, std::numeric_limits::infinity()); - - GrB_init(GrB_BLOCKING); - GxB_Global_Option_set(GxB_HYPER_SWITCH, GxB_NEVER_HYPER); + //double bsw[GxB_NBITMAP_SWITCH] = {0}; + //GrB_init(GrB_BLOCKING); + GrB_init(GrB_NONBLOCKING); + //GxB_Global_Option_set(GxB_HYPER_SWITCH, 1.0); + //GxB_Global_Option_set(GxB_HYPER_SWITCH, GxB_NEVER_HYPER); GxB_Global_Option_set(GxB_FORMAT, GxB_BY_ROW); GxB_Global_Option_set(GxB_NTHREADS, nthreads); + //GxB_Global_Option_set(GxB_BITMAP_SWITCH, bsw); + //GxB_Global_Option_set(GxB_BURBLE, 1); + + read_matrix(path); - GrB_Index M = T.getDimension(0); - GrB_Index N = T.getDimension(1); - GrB_Matrix_new(&A_trop_gb, GrB_FP64, M, N); - - std::vector I, J; - std::vector V; - for (const auto& c : T) { - I.push_back(c.first[0]); - J.push_back(c.first[1]); - V.push_back(c.second); - //A_trop_taco.insert(c.first.toVector(), c.second); - } - GrB_Matrix_build_FP64(A_trop_gb, I.data(), J.data(), V.data(), V.size(), GrB_PLUS_FP64); - //A_trop_taco.pack(); + if (is_bool) { + GrB_Vector_new(&x_gb, GrB_BOOL, N); + } else { + GrB_Vector_new(&x_gb, GrB_FP64, N); + } + GxB_Vector_Option_set(x_gb, GxB_SPARSITY_CONTROL, GxB_BITMAP); + //GrB_Vector_assign_FP64(x_gb, NULL, NULL, 1.0, GrB_ALL, N, NULL); + for (GrB_Index i = 0; i < M; i += 4) { + if (is_bool) { + GrB_Vector_setElement_BOOL(x_gb, 1, i); + } else { + GrB_Vector_setElement_FP64(x_gb, (double)i, i); + } + } + GrB_Vector_wait(&x_gb); + taco_uassert(is_bitmap_vector(&x_gb)) << "x is not bitmap"; - GrB_Vector_new(&x_trop_gb, GrB_FP64, N); - GrB_Vector_assign_FP64(x_trop_gb, NULL, NULL, 0, GrB_ALL, N, NULL); + GrB_Vector_new(&m_gb, GrB_BOOL, M); + GxB_Vector_Option_set(m_gb, GxB_SPARSITY_CONTROL, GxB_FULL); + GrB_Vector_assign_BOOL(m_gb, NULL, NULL, true, GrB_ALL, N, NULL); + for (GrB_Index i = 0; i < M; i += 4) { + GrB_Vector_setElement_BOOL(m_gb, false, i); + } + GrB_Vector_wait(&m_gb); + taco_uassert(is_dense_vector(&m_gb)) << "m is not dense"; - //x_trop_taco = Tensor({T.getDimension(1)}, dv, std::numeric_limits::infinity()); - //for (int i = 0; i < T.getDimension(1); ++i) { - // x_trop_taco.insert({i}, 0.0); + //GrB_Index stride = (GrB_Index)std::sqrt(M); + //for (GrB_Index i = 0; i < M; i += stride) { + // indices.push_back(i); //} - //x_trop_taco.pack(); + //indices_taco = indices_to_taco_tensor(indices.data(), indices.size()); + } - GrB_Index stride = (GrB_Index)std::sqrt(T.getDimension(0)); - for (GrB_Index i = 0; i < T.getDimension(0); i += stride) { - indices.push_back(i); + void read_matrix(const std::string& matrix_path) { + std::fstream stream; + stream.open(matrix_path, std::fstream::in); + if (!stream) { + stream.close(); + return; + } + + std::string line; + std::getline(stream, line); + + // Read Header + std::stringstream lineStream(line); + std::string head, type, formats, field, symmetry; + lineStream >> head >> type >> formats >> field >> symmetry; + assert(head=="%%MatrixMarket"); + // type = [matrix tensor] + // formats = [coordinate array] + assert((type == "matrix") || (type == "tensor")); + + // field = [real integer complex pattern] + bool isreal = false; + bool isint = false; + if (field == "complex") { + stream.close(); + return; + } else if (field == "real") { + isreal = true; + } else if (field == "integer") { + isint = true; + } + + // symmetry = [general symmetric skew-symmetric Hermitian] + if ((symmetry != "general") && (symmetry != "symmetric") && + (symmetry != "skew-symmetric")) { + stream.close(); + return; + } + + const bool symm = ((symmetry == "symmetric") || + (symmetry == "skew-symmetric")); + const bool skew = (symmetry == "skew-symmetric"); + + std::getline(stream, line); + + // Skip comments at the top of the file + std::string token; + do { + std::stringstream lineStream(line); + lineStream >> token; + if (token[0] != '%') { + break; + } + } while (std::getline(stream, line)); + + // The first non-comment line is the header with dimensions + std::vector dimensions; + char* linePtr = (char*)line.data(); + while (auto dimension = std::strtoull(linePtr, &linePtr, 10)) { + dimensions.push_back(dimension); } - indices_taco = indices_to_taco_tensor(indices.data(), indices.size()); + + assert(dimensions.size() == 3); + nnz = dimensions[2]; + + GrB_Index* rows = (GrB_Index*)malloc(sizeof(GrB_Index) * nnz * (1 + symm)); + GrB_Index* cols = (GrB_Index*)malloc(sizeof(GrB_Index) * nnz * (1 + symm)); + double* fvals = !is_bool ? (double*)malloc(sizeof(double) * nnz * (1 + symm)) : nullptr; + bool* bvals = is_bool ? (bool*)malloc(sizeof(bool) * nnz * (1 + symm)) : nullptr; + + for (nnz = 0; std::getline(stream, line); nnz++) { + //if (nnz % 10000000 == 0) std::cout << nnz << std::endl; + linePtr = (char*)line.data(); + + const GrB_Index i = strtoull(linePtr, &linePtr, 10) - 1; + const GrB_Index j = strtoull(linePtr, &linePtr, 10) - 1; + + double fval = 1.0; + bool bval = true;; + if (isreal) { + fval = strtod(linePtr, &linePtr); + bval = (fval != 0.0); + } else if (isint) { + fval = strtoll(linePtr, &linePtr, 10); + bval = (fval != 0.0); + } + + rows[nnz] = i; + cols[nnz] = j; + if (fvals) { + fvals[nnz] = fval; + } + if (bvals) { + bvals[nnz] = bval; + } + + if (symm && i != j) { + nnz++; + + if (skew) { + fval = -1.0 * fval; + } + + rows[nnz] = j; + cols[nnz] = i; + if (fvals) { + fvals[nnz] = fval; + } + if (bvals) { + bvals[nnz] = bval; + } + } + } + + stream.close(); + + GrB_Matrix_new(&A_gb, is_bool ? GrB_BOOL : GrB_FP64, dimensions[0], dimensions[1]); + GxB_Matrix_Option_set(A_gb, GxB_SPARSITY_CONTROL, GxB_SPARSE); + if (is_bool) { + GrB_Matrix_build_BOOL(A_gb, rows, cols, bvals, nnz, GrB_LOR); + } else { + GrB_Matrix_build_FP64(A_gb, rows, cols, fvals, nnz, GrB_PLUS_FP64); + } + GrB_Matrix_wait(&A_gb); + taco_uassert(is_csr_matrix(&A_gb)) << "A is not CSR"; + + free(rows); + free(cols); + free(fvals); + free(bvals); + + M = dimensions[0]; + N = dimensions[1]; } - GrB_Matrix A_trop_gb = nullptr; - GrB_Vector x_trop_gb = nullptr; - Tensor A_trop_taco; - Tensor x_trop_taco; - taco_tensor_t* A_trop_taco_t = nullptr; - taco_tensor_t* x_trop_taco_t = nullptr; - std::vector indices; - taco_tensor_t indices_taco; + bool is_bool = true; + bool validate = false; + GrB_Index M, N, nnz; + GrB_Matrix A_gb = nullptr; + GrB_Matrix C_gb = nullptr; + GrB_Vector x_gb = nullptr; + GrB_Vector m_gb = nullptr; + GrB_Vector y_gb = nullptr; + taco_tensor_t* A_taco_t = nullptr; + taco_tensor_t* x_taco_t = nullptr; + taco_tensor_t* m_taco_t = nullptr; + //std::vector indices; + //taco_tensor_t indices_taco; }; GraphBLASFixture fixture; static void bench_mxv_suitesparse(benchmark::State& state) { - GrB_init(GrB_BLOCKING); - GxB_Global_Option_set(GxB_HYPER_SWITCH, GxB_NEVER_HYPER); - GxB_Global_Option_set(GxB_FORMAT, GxB_BY_ROW); - GxB_Global_Option_set(GxB_NTHREADS, nthreads); - GrB_Descriptor desc; - //GrB_Descriptor_set (desc, GrB_OUTP, GrB_REPLACE); + GrB_Descriptor_new(&desc); + GrB_Descriptor_set(desc, GrB_MASK, GrB_COMP); + GrB_Descriptor_set(desc, GrB_OUTP, GrB_REPLACE); - GrB_Vector y = NULL;; for (auto _ : state) { state.PauseTiming(); - GrB_Vector_free(&y); + GrB_Vector_free(&fixture.y_gb); + + GrB_Vector_new(&fixture.y_gb, fixture.is_bool ? GrB_BOOL : GrB_FP64, fixture.M); + GxB_Vector_Option_set(fixture.y_gb, GxB_SPARSITY_CONTROL, GxB_BITMAP); state.ResumeTiming(); - GrB_Vector_new(&y, GrB_FP64, fixture.A_trop_taco.getDimension(0)); - GrB_mxv(y, NULL, NULL, GrB_MIN_PLUS_SEMIRING_FP64, fixture.A_trop_gb, fixture.x_trop_gb, desc); - //GrB_vxm(x, NULL, NULL, GrB_MIN_PLUS_SEMIRING_FP64, x, A, desc); + if (fixture.is_bool) { + GrB_mxv(fixture.y_gb, fixture.m_gb, NULL, GrB_LOR_LAND_SEMIRING_BOOL, fixture.A_gb, fixture.x_gb, desc); + } else { + GrB_mxv(fixture.y_gb, fixture.m_gb, NULL, GrB_MIN_PLUS_SEMIRING_FP64, fixture.A_gb, fixture.x_gb, desc); + } + } + taco_uassert(is_bitmap_vector(&fixture.y_gb)) << "y is not bitmap"; + if (!fixture.validate) { + GrB_Vector_free(&fixture.y_gb); } - GrB_Vector_free(&y); } static void bench_mxm_suitesparse(benchmark::State& state) { - GrB_init(GrB_BLOCKING); - GxB_Global_Option_set(GxB_HYPER_SWITCH, GxB_NEVER_HYPER); - GxB_Global_Option_set(GxB_FORMAT, GxB_BY_ROW); - GxB_Global_Option_set(GxB_NTHREADS, nthreads); - GrB_Descriptor desc; - GrB_Descriptor_set (desc, GrB_OUTP, GrB_REPLACE); + GrB_Descriptor_new(&desc); + GrB_Descriptor_set(desc, GrB_OUTP, GrB_REPLACE); + GrB_Descriptor_set(desc, GxB_AxB_METHOD, GxB_AxB_GUSTAVSON); - GrB_Matrix C = NULL; for (auto _ : state) { state.PauseTiming(); - GrB_Matrix_free(&C); + GrB_Matrix_free(&fixture.C_gb); + GrB_Matrix_new(&fixture.C_gb, fixture.is_bool ? GrB_BOOL : GrB_FP64, fixture.M, fixture.N); + GxB_Matrix_Option_set(fixture.C_gb, GxB_SPARSITY_CONTROL, GxB_SPARSE); + state.ResumeTiming(); - GrB_Matrix_new(&C, GrB_FP64, fixture.A_trop_taco.getDimension(0), fixture.A_trop_taco.getDimension(1)); - GrB_mxm(C, NULL, NULL, GrB_MIN_PLUS_SEMIRING_FP64, fixture.A_trop_gb, fixture.A_trop_gb, desc); + if (fixture.is_bool) { + GrB_mxm(fixture.C_gb, NULL, NULL, GrB_LOR_LAND_SEMIRING_BOOL, fixture.A_gb, fixture.A_gb, desc); + } else { + GrB_mxm(fixture.C_gb, NULL, NULL, GrB_MIN_PLUS_SEMIRING_FP64, fixture.A_gb, fixture.A_gb, desc); + } + //GrB_Matrix_wait(&fixture.C_gb); + } + taco_uassert(is_csr_matrix(&fixture.C_gb)) << "C is not CSR"; + if (!fixture.validate) { + GrB_Matrix_free(&fixture.C_gb); } - GrB_Matrix_free(&C); } +#if 0 static void bench_extract_suitesparse(benchmark::State& state) { - GrB_init(GrB_BLOCKING); - GxB_Global_Option_set(GxB_HYPER_SWITCH, GxB_NEVER_HYPER); - GxB_Global_Option_set(GxB_FORMAT, GxB_BY_ROW); - GxB_Global_Option_set(GxB_NTHREADS, nthreads); - + //GrB_init(GrB_BLOCKING); + //GxB_Global_Option_set(GxB_HYPER_SWITCH, GxB_NEVER_HYPER); + //GxB_Global_Option_set(GxB_FORMAT, GxB_BY_ROW); + //GxB_Global_Option_set(GxB_NTHREADS, nthreads); GrB_Descriptor desc; - GrB_Descriptor_set (desc, GrB_OUTP, GrB_REPLACE); + GrB_Descriptor_new(&desc); + GrB_Descriptor_set(desc, GrB_OUTP, GrB_REPLACE); GrB_Index* indices = fixture.indices.data(); GrB_Index size = fixture.indices.size(); @@ -330,19 +600,21 @@ static void bench_extract_suitesparse(benchmark::State& state) { state.ResumeTiming(); - //GrB_Matrix_new(&C, GrB_FP64, fixture.A_trop_taco.getDimension(0), fixture.A_trop_taco.getDimension(1)); + //GrB_Matrix_new(&C, GrB_FP64, fixture.A_taco.getDimension(0), fixture.A_taco.getDimension(1)); GrB_Matrix_new(&C, GrB_FP64, fixture.indices.size(), fixture.indices.size()); - GrB_Matrix_extract(C, NULL, NULL, fixture.A_trop_gb, indices, size, indices, size, desc); + GrB_Matrix_extract(C, NULL, NULL, fixture.A_gb, indices, size, indices, size, desc); } //GrB_Index nnz; //GrB_Matrix_nvals(&nnz, C); //std::cout << "nnz: " << nnz << std::endl; GrB_Matrix_free(&C); } +#endif #define restrict __restrict__ -int taco_mxv_trop(taco_tensor_t *y, taco_tensor_t *A, taco_tensor_t *x) { +int taco_mxv_trop(taco_tensor_t *y, taco_tensor_t *A, taco_tensor_t *x, taco_tensor_t *m) { +#if 0 GrB_Index y1_dimension = (GrB_Index)(y->dimensions[0]); double* restrict y_vals = (double*)(y->vals); GrB_Index A1_dimension = (GrB_Index)(A->dimensions[0]); @@ -351,6 +623,8 @@ int taco_mxv_trop(taco_tensor_t *y, taco_tensor_t *A, taco_tensor_t *x) { double* restrict A_vals = (double*)(A->vals); GrB_Index x1_dimension = (GrB_Index)(x->dimensions[0]); double* restrict x_vals = (double*)(x->vals); + GrB_Index m1_dimension = (GrB_Index)(m->dimensions[0]); + bool* restrict m_vals = (bool*)(m->vals); //y_vals = (double*)calloc(y1_dimension, sizeof(double)); y_vals = (double*)malloc(sizeof(double) * y1_dimension); @@ -358,7 +632,7 @@ int taco_mxv_trop(taco_tensor_t *y, taco_tensor_t *A, taco_tensor_t *x) { //#pragma omp parallel for schedule(static) num_threads(nthreads) #pragma omp parallel for schedule(dynamic, 256) num_threads(nthreads) for (GrB_Index i = 0; i < x1_dimension; i++) { - //if (!(x_vals[i] != INFINITY)) { + if (!(m_vals[i] != 0)) { double tj_val = INFINITY; //double tj_val = 0.0; for (GrB_Index jA = A2_pos[i]; jA < A2_pos[(i + 1)]; jA++) { @@ -366,13 +640,94 @@ int taco_mxv_trop(taco_tensor_t *y, taco_tensor_t *A, taco_tensor_t *x) { tj_val = fmin(tj_val,A_vals[jA] + x_vals[j]); } y_vals[i] = tj_val; - //} + } + else { + y_vals[i] = INFINITY; + } + } + + y->vals = (uint8_t*)y_vals; + return 0; +#else + GrB_Index y1_dimension = (GrB_Index)(y->dimensions[0]); + int8_t* restrict y1_valid = (int8_t*)(y->indices[0][0]); + double* restrict y_vals = (double*)(y->vals); + GrB_Index A1_dimension = (GrB_Index)(A->dimensions[0]); + GrB_Index* restrict A2_pos = (GrB_Index*)(A->indices[1][0]); + GrB_Index* restrict A2_crd = (GrB_Index*)(A->indices[1][1]); + double* restrict A_vals = (double*)(A->vals); + GrB_Index x1_dimension = (GrB_Index)(x->dimensions[0]); + int8_t* restrict x1_valid = (int8_t*)(x->indices[0][0]); + double* restrict x_vals = (double*)(x->vals); + GrB_Index m1_dimension = (GrB_Index)(m->dimensions[0]); + bool* restrict m_vals = (bool*)(m->vals); + + y1_valid = (int8_t*)calloc(1, sizeof(int8_t) * y1_dimension); + int32_t y_capacity = y1_dimension; + y_vals = (double*)malloc(sizeof(double) * y_capacity); + + #pragma omp parallel for schedule(dynamic, 256) num_threads(nthreads) + for (GrB_Index i = 0; i < m1_dimension; i++) { + if (!(m_vals[i] != 0)) { + //double tj_val = 0.0; + double tj_val = INFINITY; + for (GrB_Index jA = A2_pos[i]; jA < A2_pos[(i + 1)]; jA++) { + GrB_Index j = A2_crd[jA]; + if (x1_valid[j] == 1) { + tj_val = fmin(tj_val,A_vals[jA] + x_vals[j]); + } + } + y_vals[i] = tj_val; + y1_valid[i] = 1; + } } + y->indices[0][0] = (uint8_t*)(y1_valid); y->vals = (uint8_t*)y_vals; return 0; +#endif } +int taco_mxv_bool(taco_tensor_t *y, taco_tensor_t *A, taco_tensor_t *x, taco_tensor_t *m) { + GrB_Index y1_dimension = (GrB_Index)(y->dimensions[0]); + int8_t* restrict y1_valid = (int8_t*)(y->indices[0][0]); + bool* restrict y_vals = (bool*)(y->vals); + GrB_Index A1_dimension = (GrB_Index)(A->dimensions[0]); + GrB_Index* restrict A2_pos = (GrB_Index*)(A->indices[1][0]); + GrB_Index* restrict A2_crd = (GrB_Index*)(A->indices[1][1]); + bool* restrict A_vals = (bool*)(A->vals); + GrB_Index x1_dimension = (GrB_Index)(x->dimensions[0]); + int8_t* restrict x1_valid = (int8_t*)(x->indices[0][0]); + bool* restrict x_vals = (bool*)(x->vals); + GrB_Index m1_dimension = (GrB_Index)(m->dimensions[0]); + bool* restrict m_vals = (bool*)(m->vals); + + y1_valid = (int8_t*)calloc(1, sizeof(int8_t) * y1_dimension); + int32_t y_capacity = y1_dimension; + y_vals = (bool*)malloc(sizeof(bool) * y_capacity); + + #pragma omp parallel for schedule(dynamic, 256) num_threads(nthreads) + for (GrB_Index i = 0; i < m1_dimension; i++) { + if (!(m_vals[i] != 0)) { + bool tj_val = 0; + for (GrB_Index jA = A2_pos[i]; jA < A2_pos[(i + 1)]; jA++) { + GrB_Index j = A2_crd[jA]; + if (x1_valid[j] == 1) { + tj_val = tj_val || A_vals[jA]; + if (tj_val == 1) { + break; + } + } + } + y_vals[i] = tj_val; + y1_valid[i] = 1; + } + } + + y->indices[0][0] = (uint8_t*)(y1_valid); + y->vals = (uint8_t*)y_vals; + return 0; +} int cmp(const void *a, const void *b) { return *((const int*)a) - *((const int*)b); } @@ -392,13 +747,14 @@ int taco_mxm_trop(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) { GrB_Index* restrict C2_crd = (GrB_Index*)(C->indices[1][1]); double* restrict C_vals = (double*)(C->vals); +#if 0 GrB_Index* restrict A2_nnz = 0; A2_nnz = (GrB_Index*)malloc(sizeof(GrB_Index) * B1_dimension); GrB_Index* restrict qw_index_list_all = 0; qw_index_list_all = (GrB_Index*)malloc(sizeof(GrB_Index) * C2_dimension * omp_get_max_threads()); bool* restrict qw_already_set_all = (bool*)calloc(C2_dimension * omp_get_max_threads(), sizeof(bool)); - #pragma omp parallel for schedule(dynamic, 256) num_threads(nthreads) + #pragma omp parallel for schedule(dynamic, 128) num_threads(nthreads) for (GrB_Index qi = 0; qi < B1_dimension; qi++) { GrB_Index qw_index_list_size = 0; GrB_Index* qw_index_list = qw_index_list_all + (C2_dimension * omp_get_thread_num()); @@ -430,6 +786,7 @@ int taco_mxm_trop(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) { for (GrB_Index i = 0; i < A1_dimension; i++) { A2_pos[i + 1] = A2_pos[i] + A2_nnz[i]; } + std::cout << A2_pos[A1_dimension] << std::endl; A2_crd = (GrB_Index*)malloc(sizeof(GrB_Index) * A2_pos[A1_dimension]); A_vals = (double*)malloc(sizeof(double) * A2_pos[A1_dimension]); @@ -438,7 +795,7 @@ int taco_mxm_trop(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) { w_index_list_all = (GrB_Index*)malloc(sizeof(GrB_Index) * C2_dimension * omp_get_max_threads()); bool* restrict w_already_set_all = (bool*)calloc(C2_dimension * omp_get_max_threads(), sizeof(bool)); w_all = (double*)malloc(sizeof(double) * C2_dimension * omp_get_max_threads()); - #pragma omp parallel for schedule(dynamic, 256) num_threads(nthreads) + #pragma omp parallel for schedule(dynamic, 128) num_threads(nthreads) for (GrB_Index i = 0; i < B1_dimension; i++) { GrB_Index w_index_list_size = 0; GrB_Index* w_index_list = w_index_list_all + (C2_dimension * omp_get_thread_num()); @@ -455,7 +812,8 @@ int taco_mxm_trop(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) { w_index_list_size++; } else { - w[j] = fmin(w[j], B_vals[kB] * C_vals[jC]); + w[j] = fmin(w[j], B_vals[kB] + C_vals[jC]); + //w[j] = w[j] + B_vals[kB] * C_vals[jC]); } } } @@ -480,6 +838,107 @@ int taco_mxm_trop(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) { A2_pos[0] = 0; free(A2_nnz); +#else + GrB_Index* restrict A2_nnz = 0; + A2_nnz = (GrB_Index*)malloc(sizeof(GrB_Index) * B1_dimension); + + GrB_Index* restrict qw_index_list_all = 0; + qw_index_list_all = (GrB_Index*)malloc(sizeof(GrB_Index) * (C2_dimension * omp_get_max_threads())); + bool* restrict qw_already_set_all = calloc((C2_dimension * omp_get_max_threads()), sizeof(bool)); + + //#pragma omp parallel for schedule(runtime) + #pragma omp parallel for schedule(dynamic, 128) num_threads(nthreads) + for (GrB_Index qi = 0; qi < B1_dimension; qi++) { + GrB_Index qw_index_list_all_size = 0; + GrB_Index* restrict qw_index_list = qw_index_list_all + C2_dimension * omp_get_thread_num(); + //GrB_Index* restrict qw_index_list = qw_index_list_all + qw_index_list_all_size * omp_get_thread_num(); + bool* restrict qw_already_set = qw_already_set_all + C2_dimension * omp_get_thread_num(); + for (GrB_Index qkB = B2_pos[qi]; qkB < B2_pos[(qi + 1)]; qkB++) { + GrB_Index qk = B2_crd[qkB]; + for (GrB_Index qjC = C2_pos[qk]; qjC < C2_pos[(qk + 1)]; qjC++) { + GrB_Index qj = C2_crd[qjC]; + if (!qw_already_set[qj]) { + qw_index_list[qw_index_list_all_size] = qj; + qw_already_set[qj] = 1; + qw_index_list_all_size++; + } + } + } + GrB_Index tqjA2_nnz_val = 0; + for (GrB_Index qw_index_locator = 0; qw_index_locator < qw_index_list_all_size; qw_index_locator++) { + GrB_Index qj = qw_index_list[qw_index_locator]; + tqjA2_nnz_val += (GrB_Index)1; + qw_already_set[qj] = 0; + } + A2_nnz[qi] = tqjA2_nnz_val; + } + + free(qw_index_list_all); + free(qw_already_set_all); + + A2_pos = (GrB_Index*)malloc(sizeof(GrB_Index) * (A1_dimension + 1)); + A2_pos[0] = 0; + for (GrB_Index i = 0; i < A1_dimension; i++) { + A2_pos[i + 1] = A2_pos[i] + A2_nnz[i]; + } + //std::cout << A2_pos[A1_dimension] << std::endl; + A2_crd = (GrB_Index*)malloc(sizeof(GrB_Index) * A2_pos[A1_dimension]); + A_vals = (double*)malloc(sizeof(double) * A2_pos[A1_dimension]); + + double* restrict w_all = 0; + GrB_Index* restrict w_index_list_all = 0; + w_index_list_all = (GrB_Index*)malloc(sizeof(GrB_Index) * (C2_dimension * omp_get_max_threads())); + bool* restrict w_already_set_all = calloc((C2_dimension * omp_get_max_threads()), sizeof(bool)); + w_all = (double*)malloc(sizeof(double) * (C2_dimension * omp_get_max_threads())); + + //#pragma omp parallel for schedule(runtime) + #pragma omp parallel for schedule(dynamic, 128) num_threads(nthreads) + for (GrB_Index i = 0; i < B1_dimension; i++) { + GrB_Index w_index_list_all_size = 0; + double* restrict w = w_all + C2_dimension * omp_get_thread_num(); + GrB_Index* restrict w_index_list = w_index_list_all + C2_dimension * omp_get_thread_num(); + //GrB_Index* restrict w_index_list = w_index_list_all + w_index_list_all_size * omp_get_thread_num(); + bool* restrict w_already_set = w_already_set_all + C2_dimension * omp_get_thread_num(); + for (GrB_Index kB = B2_pos[i]; kB < B2_pos[(i + 1)]; kB++) { + GrB_Index k = B2_crd[kB]; + for (GrB_Index jC = C2_pos[k]; jC < C2_pos[(k + 1)]; jC++) { + GrB_Index j = C2_crd[jC]; + if (!w_already_set[j]) { + w[j] = B_vals[kB] + C_vals[jC]; + //w[j] = B_vals[kB] * C_vals[jC]; + w_index_list[w_index_list_all_size] = j; + w_already_set[j] = 1; + w_index_list_all_size++; + } + else { + w[j] = fmin(w[j], B_vals[kB] + C_vals[jC]); + //w[j] = w[j] + B_vals[kB] * C_vals[jC]; + } + } + } + //qsort(w_index_list, w_index_list_all_size, sizeof(GrB_Index), cmp); + + for (GrB_Index w_index_locator = 0; w_index_locator < w_index_list_all_size; w_index_locator++) { + GrB_Index j = w_index_list[w_index_locator]; + GrB_Index pA2 = A2_pos[i]; + A2_pos[i] = A2_pos[i] + 1; + A2_crd[pA2] = j; + A_vals[pA2] = w[j]; + w_already_set[j] = 0; + } + } + + free(w_index_list_all); + free(w_already_set_all); + free(w_all); + + for (GrB_Index p = 0; p < A1_dimension; p++) { + A2_pos[A1_dimension - p] = A2_pos[((A1_dimension - p) - 1)]; + } + A2_pos[0] = 0; + + free(A2_nnz); +#endif A->indices[1][0] = (uint8_t*)(A2_pos); A->indices[1][1] = (uint8_t*)(A2_crd); @@ -487,9 +946,131 @@ int taco_mxm_trop(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) { return 0; } +int taco_mxm_bool(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) { + GrB_Index A1_dimension = (GrB_Index)(A->dimensions[0]); + GrB_Index* restrict A2_pos = (GrB_Index*)(A->indices[1][0]); + GrB_Index* restrict A2_crd = (GrB_Index*)(A->indices[1][1]); + bool* restrict A_vals = (bool*)(A->vals); + GrB_Index B1_dimension = (GrB_Index)(B->dimensions[0]); + GrB_Index* restrict B2_pos = (GrB_Index*)(B->indices[1][0]); + GrB_Index* restrict B2_crd = (GrB_Index*)(B->indices[1][1]); + bool* restrict B_vals = (bool*)(B->vals); + GrB_Index C1_dimension = (GrB_Index)(C->dimensions[0]); + GrB_Index C2_dimension = (GrB_Index)(C->dimensions[1]); + GrB_Index* restrict C2_pos = (GrB_Index*)(C->indices[1][0]); + GrB_Index* restrict C2_crd = (GrB_Index*)(C->indices[1][1]); + bool* restrict C_vals = (bool*)(C->vals); + + GrB_Index* restrict A2_nnz = 0; + A2_nnz = (GrB_Index*)malloc(sizeof(GrB_Index) * B1_dimension); + + GrB_Index* restrict qw_index_list_all = 0; + qw_index_list_all = (GrB_Index*)malloc(sizeof(GrB_Index) * (C2_dimension * omp_get_max_threads())); + bool* restrict qw_already_set_all = calloc((C2_dimension * omp_get_max_threads()), sizeof(bool)); + + //#pragma omp parallel for schedule(runtime) + #pragma omp parallel for schedule(dynamic, 128) num_threads(nthreads) + for (GrB_Index qi = 0; qi < B1_dimension; qi++) { + GrB_Index qw_index_list_all_size = 0; + GrB_Index* restrict qw_index_list = qw_index_list_all + C2_dimension * omp_get_thread_num(); + //GrB_Index* restrict qw_index_list = qw_index_list_all + qw_index_list_all_size * omp_get_thread_num(); + bool* restrict qw_already_set = qw_already_set_all + C2_dimension * omp_get_thread_num(); + for (GrB_Index qkB = B2_pos[qi]; qkB < B2_pos[(qi + 1)]; qkB++) { + GrB_Index qk = B2_crd[qkB]; + for (GrB_Index qjC = C2_pos[qk]; qjC < C2_pos[(qk + 1)]; qjC++) { + GrB_Index qj = C2_crd[qjC]; + if (!qw_already_set[qj]) { + qw_index_list[qw_index_list_all_size] = qj; + qw_already_set[qj] = 1; + qw_index_list_all_size++; + } + } + } + GrB_Index tqjA2_nnz_val = 0; + for (GrB_Index qw_index_locator = 0; qw_index_locator < qw_index_list_all_size; qw_index_locator++) { + GrB_Index qj = qw_index_list[qw_index_locator]; + tqjA2_nnz_val += (GrB_Index)1; + qw_already_set[qj] = 0; + } + A2_nnz[qi] = tqjA2_nnz_val; + } + + free(qw_index_list_all); + free(qw_already_set_all); + + A2_pos = (GrB_Index*)malloc(sizeof(GrB_Index) * (A1_dimension + 1)); + A2_pos[0] = 0; + for (GrB_Index i = 0; i < A1_dimension; i++) { + A2_pos[i + 1] = A2_pos[i] + A2_nnz[i]; + } + //std::cout << A2_pos[A1_dimension] << std::endl; + A2_crd = (GrB_Index*)malloc(sizeof(GrB_Index) * A2_pos[A1_dimension]); + A_vals = (bool*)malloc(sizeof(bool) * A2_pos[A1_dimension]); + + bool* restrict w_all = 0; + GrB_Index* restrict w_index_list_all = 0; + w_index_list_all = (GrB_Index*)malloc(sizeof(GrB_Index) * (C2_dimension * omp_get_max_threads())); + bool* restrict w_already_set_all = calloc((C2_dimension * omp_get_max_threads()), sizeof(bool)); + w_all = (bool*)malloc(sizeof(bool) * (C2_dimension * omp_get_max_threads())); + + //#pragma omp parallel for schedule(runtime) + #pragma omp parallel for schedule(dynamic, 128) num_threads(nthreads) + for (GrB_Index i = 0; i < B1_dimension; i++) { + GrB_Index w_index_list_all_size = 0; + bool* restrict w = w_all + C2_dimension * omp_get_thread_num(); + GrB_Index* restrict w_index_list = w_index_list_all + C2_dimension * omp_get_thread_num(); + //GrB_Index* restrict w_index_list = w_index_list_all + w_index_list_all_size * omp_get_thread_num(); + bool* restrict w_already_set = w_already_set_all + C2_dimension * omp_get_thread_num(); + for (GrB_Index kB = B2_pos[i]; kB < B2_pos[(i + 1)]; kB++) { + GrB_Index k = B2_crd[kB]; + for (GrB_Index jC = C2_pos[k]; jC < C2_pos[(k + 1)]; jC++) { + GrB_Index j = C2_crd[jC]; + if (!w_already_set[j]) { + w[j] = B_vals[kB] && C_vals[jC]; + //w[j] = B_vals[kB] * C_vals[jC]; + w_index_list[w_index_list_all_size] = j; + w_already_set[j] = 1; + w_index_list_all_size++; + } + else { + w[j] = w[j] || B_vals[kB] && C_vals[jC]; + //w[j] = w[j] + B_vals[kB] * C_vals[jC]; + } + } + } + //qsort(w_index_list, w_index_list_all_size, sizeof(GrB_Index), cmp); + + for (GrB_Index w_index_locator = 0; w_index_locator < w_index_list_all_size; w_index_locator++) { + GrB_Index j = w_index_list[w_index_locator]; + GrB_Index pA2 = A2_pos[i]; + A2_pos[i] = A2_pos[i] + 1; + A2_crd[pA2] = j; + A_vals[pA2] = w[j]; + w_already_set[j] = 0; + } + } + + free(w_index_list_all); + free(w_already_set_all); + free(w_all); + + for (GrB_Index p = 0; p < A1_dimension; p++) { + A2_pos[A1_dimension - p] = A2_pos[((A1_dimension - p) - 1)]; + } + A2_pos[0] = 0; + + free(A2_nnz); + + A->indices[1][0] = (uint8_t*)(A2_pos); + A->indices[1][1] = (uint8_t*)(A2_crd); + A->vals = (uint8_t*)A_vals; + return 0; +} + +#if 0 #define TACO_MIN(_a,_b) ((_a) < (_b) ? (_a) : (_b)) -int taco_extract_trop(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *I, taco_tensor_t *J) { +int taco_extract(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *I, taco_tensor_t *J) { GrB_Index A1_dimension = (GrB_Index)(A->dimensions[0]); GrB_Index* restrict A2_pos = (GrB_Index*)(A->indices[1][0]); GrB_Index* restrict A2_crd = (GrB_Index*)(A->indices[1][1]); @@ -599,24 +1180,143 @@ int taco_extract_trop(taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *I, taco A->vals = (uint8_t*)A_vals; return 0; } +#endif + +ir::Expr orImpl(const std::vector& v) { + return ir::Or::make(v[0], v[1]); +} +Func OrOp("or", orImpl, {Annihilator(true), Identity(false), Commutative(), Associative()}); + +ir::Expr andImpl(const std::vector& v) { + return ir::And::make(v[0], v[1]); +} +Func AndOp("and", andImpl, {Annihilator(false), Identity(true), Commutative(), Associative()}); + +ir::Expr addImpl(const std::vector& v) { + return ir::Add::make(v[0], v[1]); +} +Func AddOp("add", addImpl, {Annihilator(std::numeric_limits::infinity()), Identity(0), Commutative(), Associative()}); + +ir::Expr minImpl(const std::vector& v) { + return ir::Min::make(v[0], v[1]); +} +Func MinOp("min", minImpl, {Identity(std::numeric_limits::infinity()), Commutative(), Associative()}); + +ir::Expr maskImpl(const std::vector& v) { + return v[0]; +} +struct MaskAlgebra { + IterationAlgebra operator()(const std::vector& r) { + return Intersect(r[0], Complement(r[1])); + } +}; +Func MaskOp("mask", maskImpl, MaskAlgebra()); + +#if 0 +ir::Expr selectImpl(const std::vector& v) { + return v[1]; +} +ir::Expr defaultImpl(const std::vector& v) { + return v[2]; +} +struct SelectAlgebra { + IterationAlgebra operator()(const std::vector& r) { + return Union(Intersect(Complement(r[0]), r[1]), Intersect(r[0], r[2])); + return Union(Intersect(Complement(r[0]), r[1]), r[2]); + return Intersect(Complement(r[0]), r[1]); + } +}; +#endif + +class BitmapModeFormat : public ModeFormatImpl { +public: + BitmapModeFormat() : + ModeFormatImpl("dense", false, true, true, false, false, true, false, + false, true, true, false) {} + + ~BitmapModeFormat() override {} + + ModeFormat copy(std::vector properties) const override { + return ModeFormat(std::make_shared()); + } + + ModeFunction locate(ir::Expr parentPos, std::vector coords, + Mode mode) const override { + ir::Expr pos = ir::Add::make(ir::Mul::make(parentPos, getWidth(mode)), coords.back()); + return ModeFunction(ir::Stmt(), {pos, ir::Eq::make(ir::Load::make(getValidArray(mode.getModePack()), pos), 1)}); + } + + ir::Stmt getInsertCoord(ir::Expr p, const std::vector& i, + Mode mode) const override { + return ir::Store::make(getValidArray(mode.getModePack()), p, 1); + } + + ir::Expr getWidth(Mode mode) const override { + return (mode.getSize().isFixed() && mode.getSize().getSize() < 16) ? + (int)mode.getSize().getSize() : + getSizeArray(mode.getModePack()); + } + + //ir::Stmt getInsertInitCoords(ir::Expr pBegin, ir::Expr pEnd, + // Mode mode) const override; + + ir::Stmt getInsertInitLevel(ir::Expr szPrev, ir::Expr sz, + Mode mode) const override { + return ir::Allocate::make(getValidArray(mode.getModePack()), sz, false, ir::Expr(), true); + } + + std::vector getArrays(ir::Expr tensor, int mode, + int level) const override { + return {ir::GetProperty::make(tensor, ir::TensorProperty::Dimension, mode), + ir::GetProperty::make(tensor, ir::TensorProperty::Indices, + level - 1, 0, util::toString(tensor) + + std::to_string(level) + "_valid")}; + } + + ir::Expr getSizeArray(ModePack pack) const { + return pack.getArray(0); + } + + ir::Expr getValidArray(ModePack pack) const { + return pack.getArray(1); + } +}; +ModeFormat Bitmap(std::make_shared()); static void bench_mxv_taco(benchmark::State& state) { #if 0 + //std::map, FuncBodyGenerator> def; + //def[{1,0,0}] = selectImpl; + //Func SelectOp("select", selectImpl, SelectAlgebra(), {{{0, 2}, defaultImpl}}); + //Func SelectOp("select", selectImpl); + taco_set_num_threads(nthreads); for (auto _ : state) { state.PauseTiming(); IndexVar i, j; - Tensor y({fixture.A_trop_taco.getDimension(0)}, dv, std::numeric_limits::infinity()); - y(i) = Reduction(MinOp(), j, AddOp(fixture.A_trop_taco(i,j), fixture.x_trop_taco(j))); - //y(i) = MaskOp(Reduction(MinOp(), j, AddOp(fixture.A_trop_taco(i,j), fixture.x_trop_taco(j))), fixture.x_trop_taco(i)); - //y(i) = MinOp(Reduction(MinOp(), j, AddOp(A(i,j), x(j))), x(i)); + Format dv({Dense}); + Format bmv({Bitmap}); + //Tensor A("A", {fixture.M, fixture.N}, CSR, std::numeric_limits::infinity()); + //Tensor y("y", {fixture.M}, bmv, std::numeric_limits::infinity()); + //Tensor x("x", {fixture.N}, bmv, std::numeric_limits::infinity()); + Tensor A("A", {fixture.M, fixture.N}, CSR); + Tensor y("y", {fixture.M}, bmv); + Tensor x("x", {fixture.N}, bmv); + Tensor m("m", {fixture.M}, dv); + y(i) = MaskOp(Reduction(OrOp(), j, AndOp(A(i,j), x(j))), m(i)); + //y(i) = MaskOp(Reduction(MinOp(), j, AddOp(A(i,j), x(j))), m(i)); + //y(i) = x(i); + //y(i) = SelectOp(m(i), Reduction(MinOp(), j, AddOp(A(i,j), x(j))), x(i)); //y(i) = MaskOp(Reduction(MinOp(), j, AddOp(A(i,j), x(j))), x(i)); + //y(i) = Reduction(MinOp(), j, AddOp(A(i,j), x(j))); + //y(i) = MinOp(Reduction(MinOp(), j, AddOp(A(i,j), x(j))), x(i)); //y(i) = MinOp(MaskOp(Reduction(MinOp(), j, AddOp(A(i,j), x(j))), x(i)), x(i)); //y(i) = MaskOp(MinOp(Reduction(MinOp(), j, AddOp(A(i,j), x(j))), x(i)), x(i)); //y(i) = MinOp(FilterOp(x(i)) * Reduction(MinOp(), j, AddOp(A(i,j), x(j))), x(i)); - y.compile(); + auto stmt = y.getAssignment().concretize().parallelize(i, ParallelUnit::CPUThread, OutputRaceStrategy::NoRaces); + y.compile(stmt, true); state.ResumeTiming(); @@ -625,66 +1325,101 @@ static void bench_mxv_taco(benchmark::State& state) { } taco_set_num_threads(1); #else - if (!fixture.A_trop_taco_t) { - fixture.A_trop_taco_t = to_taco_tensor(&fixture.A_trop_gb); + if (!fixture.A_taco_t) { + fixture.A_taco_t = to_csr_taco_tensor(&fixture.A_gb); } - if (!fixture.x_trop_taco_t) { - fixture.x_trop_taco_t = to_taco_tensor(&fixture.x_trop_gb); + if (!fixture.x_taco_t) { + fixture.x_taco_t = to_bitmap_taco_tensor(&fixture.x_gb); } - taco_tensor_t y = new_vec_taco_tensor(fixture.A_trop_taco.getDimension(0)); + if (!fixture.m_taco_t) { + fixture.m_taco_t = to_dense_taco_tensor(&fixture.m_gb); + } + taco_tensor_t y = new_bitmap_taco_tensor(fixture.M); for (auto _ : state) { state.PauseTiming(); - free(y.vals); + free_bitmap_taco_tensor(y); state.ResumeTiming(); - taco_mxv_trop(&y, fixture.A_trop_taco_t, fixture.x_trop_taco_t); + if (fixture.is_bool) { + taco_mxv_bool(&y, fixture.A_taco_t, fixture.x_taco_t, fixture.m_taco_t); + } else { + taco_mxv_trop(&y, fixture.A_taco_t, fixture.x_taco_t, fixture.m_taco_t); + } } - free(y.vals); + if (fixture.validate && fixture.y_gb) { + auto y_gb = to_bitmap_taco_tensor(&fixture.y_gb); + std::cout << "comparing mxv: "; + if (fixture.is_bool) { + std::cout << compare_bool_bitmap(get_bitmap_arrays(y), get_bitmap_arrays(*y_gb)); + } else { + std::cout << compare_double_bitmap(get_bitmap_arrays(y), get_bitmap_arrays(*y_gb)); + } + std::cout << std::endl; + fixture.y_gb = nullptr; + } + free_bitmap_taco_tensor(y); #endif } static void bench_mxm_taco(benchmark::State& state) { - if (!fixture.A_trop_taco_t) { - fixture.A_trop_taco_t = to_taco_tensor(&fixture.A_trop_gb); + if (!fixture.A_taco_t) { + fixture.A_taco_t = to_csr_taco_tensor(&fixture.A_gb); } - taco_tensor_t C = new_mat_taco_tensor(fixture.A_trop_taco.getDimension(0), fixture.A_trop_taco.getDimension(1)); + taco_tensor_t C = new_csr_taco_tensor(fixture.M, fixture.N); for (auto _ : state) { state.PauseTiming(); - free_mat_taco_tensor(C); + free_csr_taco_tensor(C); state.ResumeTiming(); - taco_mxm_trop(&C, fixture.A_trop_taco_t, fixture.A_trop_taco_t); + if (fixture.is_bool) { + taco_mxm_bool(&C, fixture.A_taco_t, fixture.A_taco_t); + } else { + taco_mxm_trop(&C, fixture.A_taco_t, fixture.A_taco_t); + } + } + if (fixture.validate && fixture.C_gb) { + auto C_gb = to_csr_taco_tensor(&fixture.C_gb); + std::cout << "comparing mxm: "; + if (fixture.is_bool) { + std::cout << compare_bool_csr(get_csr_arrays(C), get_csr_arrays(*C_gb)); + } else { + std::cout << compare_double_csr(get_csr_arrays(C), get_csr_arrays(*C_gb)); + } + std::cout << std::endl; + fixture.C_gb = nullptr; } - free_mat_taco_tensor(C); + free_csr_taco_tensor(C); } +#if 0 static void bench_extract_taco(benchmark::State& state) { - if (!fixture.A_trop_taco_t) { - fixture.A_trop_taco_t = to_taco_tensor(&fixture.A_trop_gb); + if (!fixture.A_taco_t) { + fixture.A_taco_t = to_csr_taco_tensor(&fixture.A_gb); } - taco_tensor_t B = new_mat_taco_tensor(fixture.indices.size(), fixture.indices.size()); + taco_tensor_t B = new_csr_taco_tensor(fixture.indices.size(), fixture.indices.size()); for (auto _ : state) { state.PauseTiming(); - free_mat_taco_tensor(B); + free_csr_taco_tensor(B); state.ResumeTiming(); - taco_extract_trop(&B, fixture.A_trop_taco_t, &fixture.indices_taco, &fixture.indices_taco); + taco_extract(&B, fixture.A_taco_t, &fixture.indices_taco, &fixture.indices_taco); } //std::cout << ((GrB_Index*)(B.indices[1][0]))[B.dimensions[0]] << std::endl; - free_mat_taco_tensor(B); + free_csr_taco_tensor(B); } +#endif GRAPHBLAS_BENCH(bench_mxv_suitesparse, 1000); GRAPHBLAS_BENCH(bench_mxm_suitesparse, 25); -GRAPHBLAS_BENCH(bench_extract_suitesparse, 1000); +//GRAPHBLAS_BENCH(bench_extract_suitesparse, 10); GRAPHBLAS_BENCH(bench_mxv_taco, 1000); GRAPHBLAS_BENCH(bench_mxm_taco, 25); -GRAPHBLAS_BENCH(bench_extract_taco, 1000); +//GRAPHBLAS_BENCH(bench_extract_taco, 10); #endif diff --git a/taco/image.cpp b/taco/image.cpp index b9bcb93..71cdffc 100644 --- a/taco/image.cpp +++ b/taco/image.cpp @@ -50,20 +50,46 @@ struct xorAndAlgebra { IterationAlgebra operator()(const std::vector& regions) { auto m1 = Intersect(regions[0], regions[2]); auto m2 = Intersect(regions[1], regions[2]); - auto noIntersect = Complement(Intersect(m1, m2)); + auto noIntersect = Complement(Intersect(Intersect(regions[0], regions[1]), regions[2])); return Intersect(noIntersect, Union(m1, m2)); } }; +struct testConstructionAlgebra { + IterationAlgebra operator()(const std::vector& regions) { + auto m1 = Union(Complement(regions[0]), Complement(regions[2])); + auto m2 = Union(Complement(regions[1]), Complement(regions[2])); + return Intersect(m1, m2); + } +}; + Func xorOp1("logical_xor", Boolean(), xorAlgebra()); Func andOp1("logical_and", Boolean(), andAlgebra()); Func xorAndOp("fused_xor_and", Boolean(), xorAndAlgebra()); +Func testOp("test", Boolean(), testConstructionAlgebra()); static void bench_image_xor(benchmark::State& state, const Format& f) { - int num = state.range(0); - auto t1 = 0.5; - auto t2 = 0.55; - Tensor matrix1 = castToType("A", loadImageTensor("A", num, f, t1, 1 /* variant */)); - Tensor matrix2 = castToType("B", loadImageTensor("B", num, f, t2, 2 /* variant */)); + auto t1 = 0.75; + auto t2 = 0.80; + + auto num_str = getEnvVar("IMAGE_NUM"); + if (num_str == "") { + state.error_occurred(); + return; + } + + int num = std::stoi(num_str); +// int num = state.range(0); + taco::Tensor matrix1, matrix2; + try { + matrix1 = castToTypeZero("A", loadImageTensor("A", num, f, t1, 1 /* variant */)); + matrix2 = castToTypeZero("B", loadImageTensor("B", num, f, t2, 2 /* variant */)); + } catch (TacoException& e) { + // Counters don't show up in the generated CSV if we used SkipWithError, so + // just add in the label that this run is skipped. + state.SetLabel(num_str+"/SKIPPED-FAILED-READ"); + return; + } + auto dims = matrix1.getDimensions(); for (auto _ : state) { @@ -75,75 +101,148 @@ static void bench_image_xor(benchmark::State& state, const Format& f) { result.compile(); state.ResumeTiming(); result.compute(); - result = result.removeExplicitZeros(result.getFormat()); - - int nnz = 0; - for (auto& it : iterate(result)) { - nnz++; - } - std::cout << "Result NNZ = " << nnz << std::endl; - std::cout << result << std::endl; +// std::cout << "Result NNZ = " << nnz << std::endl; +// std::cout << result << std::endl; } } static void CustomArguments(benchmark::internal::Benchmark* b) { - for (int i = 1; i <= 1; ++i) + for (int i = 1; i <= 98; ++i) b->Args({i}); } -TACO_BENCH_ARGS(bench_image_xor, csr, CSR)->Apply(CustomArguments); +TACO_BENCH_ARGS(bench_image_xor, csr, CSR);//->Apply(CustomArguments); static void bench_image_fused(benchmark::State& state, const Format& f) { - int num = state.range(0); - auto t1 = 0.5; - auto t2 = 0.55; - Tensor matrix1 = castToType("A", loadImageTensor("A", num, f, t1, 1 /* variant */)); - Tensor matrix2 = castToType("B", loadImageTensor("B", num, f, t2, 2 /* variant */)); - Tensor matrix3 = castToType("C", loadImageTensor("C", num, f, 3 /* variant */)); - auto dims = matrix1.getDimensions(); +// int num = state.range(0); + auto t1 = 0.75; + auto t2 = 0.80; - int nnz = 0; - for (auto& it : iterate(matrix1)) { - nnz++; - } - std::cout << "Matrix1 NNZ = " << nnz << std::endl; - nnz = 0; - for (auto& it : iterate(matrix2)) { - nnz++; + auto num_str = getEnvVar("IMAGE_NUM"); + if (num_str == "") { + state.error_occurred(); + return; } - std::cout << "Matrix2 NNZ = " << nnz << std::endl; - nnz = 0; - for (auto& it : iterate(matrix3)) { - nnz++; + + int num = std::stoi(num_str); + + taco::Tensor matrix1, matrix2, matrix3; + try { + matrix1 = castToTypeZero("A", loadImageTensor("A", num, f, t1, 1 /* variant */)); + matrix2 = castToTypeZero("B", loadImageTensor("B", num, f, t2, 2 /* variant */)); + matrix3 = castToTypeZero("C", loadImageTensor("C", num, f, 3 /* variant */)); + } catch (TacoException& e) { + // Counters don't show up in the generated CSV if we used SkipWithError, so + // just add in the label that this run is skipped. + state.SetLabel(num_str+"/SKIPPED-FAILED-READ"); + return; } - std::cout << "Matrix3 NNZ = " << nnz << std::endl; + + auto dims = matrix1.getDimensions(); + +// write("temp/taco-mat1-" + std::to_string(num) + ".tns", matrix1); +// write("temp/taco-mat2-" + std::to_string(num) + ".tns", matrix2); +// write("temp/taco-mat3-" + std::to_string(num) + ".tns", matrix3); +// int nnz = 0; +// for (auto& it : iterate(matrix1)) { +// nnz++; +// } +// std::cout << "Matrix1 NNZ = " << nnz << std::endl; +// nnz = 0; +// for (auto& it : iterate(matrix2)) { +// nnz++; +// } +// std::cout << "Matrix2 NNZ = " << nnz << std::endl; +// nnz = 0; +// for (auto& it : iterate(matrix3)) { +// nnz++; +// } +// std::cout << "Matrix3 NNZ = " << nnz << std::endl; + for (auto _ : state) { state.PauseTiming(); Tensor result("result", dims, f, 0); - Tensor temp1("t1", dims, f, 0); - Tensor temp2("t2", dims, f, 0); + IndexVar i("i"), j("j"); -// temp1(i,j) = andOp1(matrix1(i, j), matrix3(i, j)); -// temp2(i,j) = andOp1(matrix2(i, j), matrix3(i, j)); -// result(i, j) = xorOp1(temp1(i,j), temp2(i,j)); -// result(i, j) = xorOp1(andOp1(matrix1(i, j), matrix3(i, j)), andOp1(matrix2(i, j), matrix3(i, j))); result(i, j) = xorAndOp(matrix1(i, j), matrix2(i, j), matrix3(i, j)); IndexStmt stmt = result.getAssignment().concretize(); result.setAssembleWhileCompute(true); result.compile(); state.ResumeTiming(); result.compute(); - temp1 = temp1.removeExplicitZeros(temp1.getFormat()); - temp2 = temp2.removeExplicitZeros(temp2.getFormat()); - result = result.removeExplicitZeros(result.getFormat()); - int nnz = 0; - for (auto& it : iterate(result)) { - nnz++; - } +// result = result.removeExplicitZeros(result.getFormat()); - std::cout << "Result NNZ = " << nnz << std::endl; - std::shared_ptr codegen = ir::CodeGen::init_default(std::cout, ir::CodeGen::ImplementationGen); - ir::Stmt compute = lower(stmt, "compute", false, true); - codegen->compile(compute, true); -// std::cout << result << std::endl; +// int nnz = 0; +// for (auto& it : iterate(result)) { +// nnz++; +// } +// std::cout << "Result NNZ = " << nnz << std::endl; +// write("temp/taco-result" + std::to_string(num) + ".tns", result); + // Used to print out generated TACO code +// std::shared_ptr codegen = ir::CodeGen::init_default(std::cout, ir::CodeGen::ImplementationGen); +// ir::Stmt compute = lower(stmt, "compute", false, true); +// codegen->compile(compute, true); + } +} +TACO_BENCH_ARGS(bench_image_fused, csr, CSR); + +static void bench_image_window(benchmark::State& state, const Format& f, double window_size) { +// int num = state.range(0); + auto t1 = 0.75; + auto t2 = 0.80; + + auto num_str = getEnvVar("IMAGE_NUM"); + if (num_str == "") { + state.error_occurred(); + return; + } + + int num = std::stoi(num_str); + + taco::Tensor matrix1, matrix2, matrix3; + try { + matrix1 = castToTypeZero("A", loadImageTensor("A", num, f, t1, 1 /* variant */)); + matrix2 = castToTypeZero("B", loadImageTensor("B", num, f, t2, 2 /* variant */)); + } catch (TacoException& e) { + // Counters don't show up in the generated CSV if we used SkipWithError, so + // just add in the label that this run is skipped. + state.SetLabel(num_str+"/SKIPPED-FAILED-READ"); + return; + } + + auto dims = matrix1.getDimensions(); + + int mid0 = (dims[0]/2.0); + int mid1 = (dims[1]/2.0); + int win_len0 = int(window_size * dims[0]); + int win_len1 = int(window_size * dims[1]); + + for (auto _ : state) { + state.PauseTiming(); + Tensor result("result", {2*win_len0, 2*win_len1}, f, 0); + + IndexVar i("i"), j("j"); + result(i, j) = xorOp1(matrix1(i(mid0-win_len0, mid0+win_len0), j(mid1-win_len1, mid1+win_len1)), + matrix2(i(mid0-win_len0, mid0+win_len0), j(mid1-win_len1, mid1+win_len1))); + IndexStmt stmt = result.getAssignment().concretize(); + result.setAssembleWhileCompute(true); + result.compile(); + state.ResumeTiming(); + result.compute(); +// result = result.removeExplicitZeros(result.getFormat()); + +// int nnz = 0; +// for (auto& it : iterate(result)) { +// nnz++; +// } +// std::cout << "Result NNZ = " << nnz << std::endl; + +// write("temp/taco-result" + std::to_string(num) + ".tns", result); + // Used to print out generated TACO code +// std::shared_ptr codegen = ir::CodeGen::init_default(std::cout, ir::CodeGen::ImplementationGen); +// ir::Stmt compute = lower(stmt, "compute", false, true); +// codegen->compile(compute, true); } } -TACO_BENCH_ARGS(bench_image_fused, csr, CSR)->Apply(CustomArguments); \ No newline at end of file +TACO_BENCH_ARGS(bench_image_window, csr/0.45, CSR, 0.45); +TACO_BENCH_ARGS(bench_image_window, csr/0.4, CSR, 0.4); +TACO_BENCH_ARGS(bench_image_window, csr/0.35, CSR, 0.35); +TACO_BENCH_ARGS(bench_image_window, csr/0.3, CSR, 0.3); diff --git a/taco/minimax.cpp b/taco/minimax.cpp new file mode 100644 index 0000000..069d8bb --- /dev/null +++ b/taco/minimax.cpp @@ -0,0 +1,97 @@ +#include "bench.h" +#include "benchmark/benchmark.h" + +#include "taco/tensor.h" +#include "taco/format.h" +#include "taco/index_notation/index_notation.h" +#include "taco/index_notation/tensor_operator.h" + +#include "taco/util/env.h" + +using namespace taco; + +struct Min { + ir::Expr operator()(const std::vector &v) { + if (v.size() == 1) { + return v[0]; + } + return ir::Min::make(v); + } +}; + +struct Max { + ir::Expr operator()(const std::vector &v) { + if (v.size() == 1) { + return v[0]; + } + return ir::Max::make(v); + } +}; + +// We don't need to provide algebras since we're only iterating over one tensor. +Func minOp("min", Min()); +Func maxOp("max", Max()); + +IndexExpr genMinMaxExpr(Tensor& game, std::vector& indexVars, int index) { + Func op = (index % 2 == 0) ? maxOp : minOp; + if (index == (game.getOrder() - 1)) { + std::vector slice; + for (int i = 0; i <= index; i++) { + slice.push_back(indexVars[i]); + } + return Reduction(op(), indexVars[index], game(slice)); + } + return Reduction(op(), indexVars[index], genMinMaxExpr(game, indexVars, index + 1)); +} + +static void bench_minimax(benchmark::State& state) { + auto order_str = getEnvVar("MINMAX_ORDER"); + if (order_str == "") { + state.error_occurred(); + return; + } + int order = std::stoi(order_str) + 2; + + state.counters["order"] = order - 2; + + std::vector modes(order, Sparse); + Format f(modes); + taco::Tensor game = loadMinMaxTensor("A", order, f); + + // This benchmark needs this hack activated to generate correct code. + if(util::getFromEnv("TACO_CONCRETIZE_HACK", "0") == "0") { + state.SkipWithError("must set TACO_CONCRETIZE_HACK=1"); + return; + } + + std::vector ivars = { + IndexVar("i"), + IndexVar("j"), + IndexVar("k"), + IndexVar("l"), + IndexVar("m"), + IndexVar("n"), + IndexVar("o"), + IndexVar("p"), + IndexVar("q"), + IndexVar("r"), + IndexVar("s"), + IndexVar("t"), + }; + + std::vector dims = {20, 20, 43, 43, 43, 43, 43}; + dims.resize(order); + double return_value; + // TODO (rohany, owhsu): We need to actually generate the input game state. + for (auto _ : state) { + state.PauseTiming(); + Tensor result("C"); + result = genMinMaxExpr(game, ivars, 0); + result.setAssembleWhileCompute(true); + result.compile(); +// std::cout << result.getSource() << std::endl; + state.ResumeTiming(); + result.compute(); + } +} +TACO_BENCH(bench_minimax); diff --git a/taco/taco b/taco/taco index e2c4c10..bc765ba 160000 --- a/taco/taco +++ b/taco/taco @@ -1 +1 @@ -Subproject commit e2c4c105920a7df5b2b1d8e767807af3f440370a +Subproject commit bc765ba63b5bd20af178d685b917029e64bfad7c diff --git a/taco/ufuncs.cpp b/taco/ufuncs.cpp index cc485e2..fc1ce83 100644 --- a/taco/ufuncs.cpp +++ b/taco/ufuncs.cpp @@ -494,7 +494,7 @@ struct SuiteSparseTensors { }; SuiteSparseTensors ssTensors; -static void bench_suitesparse_ufunc(benchmark::State& state, Func op) { +static void bench_suitesparse_ufunc(benchmark::State& state, Func op, int fill_value = 0) { // Counters must be present in every run to get reported to the CSV. state.counters["dimx"] = 0; state.counters["dimy"] = 0; @@ -527,7 +527,7 @@ static void bench_suitesparse_ufunc(benchmark::State& state, Func op) { for (auto _ : state) { state.PauseTiming(); - Tensor result("result", ssTensor.getDimensions(), ssTensor.getFormat()); + Tensor result("result", ssTensor.getDimensions(), ssTensor.getFormat(), fill_value); result.setAssembleWhileCompute(true); IndexVar i, j; result(i, j) = op(ssTensor(i, j), other(i, j)); @@ -548,3 +548,4 @@ static void bench_suitesparse_ufunc(benchmark::State& state, Func op) { TACO_BENCH_ARGS(bench_suitesparse_ufunc, xor, xorOp); TACO_BENCH_ARGS(bench_suitesparse_ufunc, ldExp, ldExp); TACO_BENCH_ARGS(bench_suitesparse_ufunc, rightShift, rightShift); +TACO_BENCH_ARGS(bench_suitesparse_ufunc, pow1Comp, pow1Comp, 1); \ No newline at end of file