55
66using namespace symspellcpppy ;
77
8- TEST_CASE (" Testing English" , " [english]" ) {
8+ TEST_CASE (" Testing English" , " [english]" )
9+ {
910 const int maxEditDistance = 2 ;
1011 const int prefixLength = 3 ;
1112
12- SECTION (" Do Word Segmentation" ) {
13+ SECTION (" Do Word Segmentation" )
14+ {
1315 SymSpell symSpell (maxEditDistance, prefixLength);
1416 symSpell.LoadDictionary (" ../resources/frequency_dictionary_en_82_765.txt" , 0 , 1 , XL (' ' ));
1517 std::unordered_map<xstring, xstring> sentences = {
16- {XL (" thequickbrownfoxjumpsoverthelazydog" ), XL (" the quick brown fox jumps over the lazy dog" )},
17- {XL (" itwasabrightcolddayinaprilandtheclockswerestrikingthirteen" ), XL (" it was bright holiday in april and the clocks were striking thirteen" )},
18- {XL (" itwasthebestoftimesitwastheworstoftimesitwastheageofwisdomitwastheageoffoolishness" ), XL (" iowa the best of times it waste worst of times it was thereof wisdom it was thereof foolishness" )}
19- };
18+ {XL (" thequickbrownfoxjumpsoverthelazydog" ), XL (" the quick brown fox jumps over the lazy dog" )},
19+ {XL (" itwasabrightcolddayinaprilandtheclockswerestrikingthirteen" ), XL (" it was bright holiday in april and the clocks were striking thirteen" )},
20+ {XL (" itwasthebestoftimesitwastheworstoftimesitwastheageofwisdomitwastheageoffoolishness" ), XL (" iowa the best of times it waste worst of times it was thereof wisdom it was thereof foolishness" )}};
2021
21- for (auto &sentence : sentences) {
22+ for (auto &sentence : sentences)
23+ {
2224 auto results = symSpell.WordSegmentation (sentence.first );
2325 REQUIRE (results.getCorrected () == sentence.second );
2426 }
2527 }
2628
27- // SECTION("Test 6.7 Changes") {
28- // xstring input =
29- // XL("AbstractHowdoesauser’spriorexperiencewithdeeplearningimpactaccuracy?Wepresentaninitialstudybased"
30- // "on31participantswithdifferentlevelsofexperience.Theirtaskistoperformhyperparameteroptimizationfor"
31- // "agivendeeplearningarchitecture.There-sultsshowastrongpositivecorrelationbetweentheparticipant’sexperience"
32- // "andthefinalperformance.Theyadditionallyindicatethatanexperiencedparticipantfindsbettersolu-tions"
33- // "usingfewerresourcesonaverage.Thedatasuggestsfurthermorethatparticipantswithnopriorexperiencefollow"
34- // "randomstrategiesintheirpursuitofoptimalhyperpa-rameters.Ourstudyinvestigatesthesubjectivehumanfactor"
35- // "incomparisonsofstateoftheartresultsandscientificreproducibilityindeeplearning.1IntroductionThepopularity"
36- // "ofdeeplearninginvariousfieldssuchasimagerecognition[9,19],speech[11,30],bioinformatics[21,24],"
37- // "questionanswering[3]etc.stemsfromtheseeminglyfavorabletrade-offbetweentherecognitionaccuracy"
38- // "andtheiroptimizationburdenlecunetal20attributetheirsuccess");
39- // XL("AbstractHowdoesauser’spriorexperience?");
40-
41- // xstring output =
42- // XL("Abstract How does a user’s prior experience with deep learning impact accuracy? We present an initial "
43- // "study based on 31 participants with different levels of experience. Their task is to perform hyper "
44- // "parameter optimization for a given deep learning architecture. The results show a strong positive "
45- // "correlation between the participant’s experience and the final performance. They additionally indicate "
46- // "that an experienced participant finds better solutions using fewer resources on average. The data "
47- // "suggests furthermore that participants with no prior experience follow random strategies in their "
48- // "pursuit of optimal hyper parameters. Our study investigates the subjective human factor in comparisons "
49- // "of state of the art results and scientific reproducibility in deep learning. 1 Introduction The "
50- // "popularity of deep learning in various fields such as image recognition [9,19], speech [11,30], bio "
51- // "informatics [21,24], question answering [3] etc. stems from the seemingly favorable trade off between "
52- // "the recognition accuracy and their optimization burden l ecu net al 20 attribute their success");
53- // XL("Abstract How does a user’s prior experience?");
54- //
55- // SymSpell symSpell(maxEditDistance, prefixLength);
56- // symSpell.LoadDictionary("../resources/frequency_dictionary_en_82_765.txt", 0, 1, XL(' '));
57- //
58- // auto results = symSpell.WordSegmentation(input, 2, 28);
59- // REQUIRE(results.getSegmented() == output);
60- // }
61-
62- SECTION (" Do Spell Correction" ) {
29+ // SECTION("Test 6.7 Changes") {
30+ // xstring input =
31+ // XL("AbstractHowdoesauser’spriorexperiencewithdeeplearningimpactaccuracy?Wepresentaninitialstudybased"
32+ // "on31participantswithdifferentlevelsofexperience.Theirtaskistoperformhyperparameteroptimizationfor"
33+ // "agivendeeplearningarchitecture.There-sultsshowastrongpositivecorrelationbetweentheparticipant’sexperience"
34+ // "andthefinalperformance.Theyadditionallyindicatethatanexperiencedparticipantfindsbettersolu-tions"
35+ // "usingfewerresourcesonaverage.Thedatasuggestsfurthermorethatparticipantswithnopriorexperiencefollow"
36+ // "randomstrategiesintheirpursuitofoptimalhyperpa-rameters.Ourstudyinvestigatesthesubjectivehumanfactor"
37+ // "incomparisonsofstateoftheartresultsandscientificreproducibilityindeeplearning.1IntroductionThepopularity"
38+ // "ofdeeplearninginvariousfieldssuchasimagerecognition[9,19],speech[11,30],bioinformatics[21,24],"
39+ // "questionanswering[3]etc.stemsfromtheseeminglyfavorabletrade-offbetweentherecognitionaccuracy"
40+ // "andtheiroptimizationburdenlecunetal20attributetheirsuccess");
41+ // XL("AbstractHowdoesauser’spriorexperience?");
42+
43+ // xstring output =
44+ // XL("Abstract How does a user’s prior experience with deep learning impact accuracy? We present an initial "
45+ // "study based on 31 participants with different levels of experience. Their task is to perform hyper "
46+ // "parameter optimization for a given deep learning architecture. The results show a strong positive "
47+ // "correlation between the participant’s experience and the final performance. They additionally indicate "
48+ // "that an experienced participant finds better solutions using fewer resources on average. The data "
49+ // "suggests furthermore that participants with no prior experience follow random strategies in their "
50+ // "pursuit of optimal hyper parameters. Our study investigates the subjective human factor in comparisons "
51+ // "of state of the art results and scientific reproducibility in deep learning. 1 Introduction The "
52+ // "popularity of deep learning in various fields such as image recognition [9,19], speech [11,30], bio "
53+ // "informatics [21,24], question answering [3] etc. stems from the seemingly favorable trade off between "
54+ // "the recognition accuracy and their optimization burden l ecu net al 20 attribute their success");
55+ // XL("Abstract How does a user’s prior experience?");
56+ //
57+ // SymSpell symSpell(maxEditDistance, prefixLength);
58+ // symSpell.LoadDictionary("../resources/frequency_dictionary_en_82_765.txt", 0, 1, XL(' '));
59+ //
60+ // auto results = symSpell.WordSegmentation(input, 2, 28);
61+ // REQUIRE(results.getSegmented() == output);
62+ // }
63+
64+ SECTION (" Do Spell Correction" )
65+ {
6366 SymSpell symSpell (maxEditDistance, prefixLength);
6467 symSpell.LoadDictionary (" ../resources/frequency_dictionary_en_82_765.txt" , 0 , 1 , XL (' ' ));
6568 std::unordered_map<xstring, xstring> words = {
66- {XL (" tke" ), XL (" the" )},
67- {XL (" abolution" ), XL (" abolition" )},
68- {XL (" intermedaite" ), XL (" intermediate" )}
69- };
69+ {XL (" tke" ), XL (" the" )},
70+ {XL (" abolution" ), XL (" abolition" )},
71+ {XL (" intermedaite" ), XL (" intermediate" )}};
7072
71- for (auto &word : words) {
73+ for (auto &word : words)
74+ {
7275 auto results = symSpell.Lookup (word.first , Verbosity::Closest);
7376 REQUIRE (results[0 ].term == word.second );
7477 }
7578 }
7679
77- SECTION (" Do Spell Correction With MaxEditDistance=2" ) {
80+ SECTION (" Do Spell Correction With MaxEditDistance=2" )
81+ {
7882 SymSpell symSpell (maxEditDistance, prefixLength);
7983 symSpell.LoadDictionary (" ../resources/frequency_dictionary_en_82_765.txt" , 0 , 1 , XL (' ' ));
8084 std::unordered_map<xstring, xstring> words_within_distance = {
81- {XL (" tke" ), XL (" the" )},
82- {XL (" extrine" ), XL (" extreme" )}
83- };
85+ {XL (" tke" ), XL (" the" )},
86+ {XL (" extrine" ), XL (" extreme" )}};
8487
8588 std::unordered_map<xstring, xstring> words_far_distance = {
86- {XL (" elipnaht" ), XL (" elephant" )},
87- {XL (" aotocrasie" ), XL (" autocracy" )}
88- };
89+ {XL (" elipnaht" ), XL (" elephant" )},
90+ {XL (" aotocrasie" ), XL (" autocracy" )}};
8991
90- for (auto &word : words_within_distance) {
92+ for (auto &word : words_within_distance)
93+ {
9194 auto results = symSpell.Lookup (word.first , Verbosity::Closest, 2 );
9295 REQUIRE (results[0 ].term == word.second );
9396 }
9497
95- for (auto &word : words_far_distance) {
98+ for (auto &word : words_far_distance)
99+ {
96100 auto results = symSpell.Lookup (word.first , Verbosity::Closest, 2 );
97101 REQUIRE (results.empty ());
98102 }
99103 }
100104
101- SECTION (" Correct Compound Mistakes" ) {
105+ SECTION (" Correct Compound Mistakes" )
106+ {
102107 SymSpell symSpell (maxEditDistance, prefixLength);
103108 symSpell.LoadDictionary (" ../resources/frequency_dictionary_en_82_765.txt" , 0 , 1 , XL (' ' ));
104109 symSpell.LoadBigramDictionary (" ../resources/frequency_bigramdictionary_en_243_342.txt" , 0 , 2 , XL (' ' ));
105110 std::unordered_map<xstring, xstring> compunded_sentences = {
106- {XL (" whereis th elove" ), XL (" where is the love" )},
107- {XL (" whereis th elove hehad dated forImuch of thepast who couqdn'tread in sixthgrade and ins pired him" ), XL (" where is the love he had dated for much of the past who couldn't read in sixth grade and inspired him" )},
108- {XL (" in te dhird qarter oflast jear he hadlearned ofca sekretplan" ), XL (" in the third quarter of last year he had learned of a secret plan" )},
109- {XL (" the bigjest playrs in te strogsommer film slatew ith plety of funn" ), XL (" the biggest players in the strong summer film slate with plenty of fun" )}
110- };
111+ {XL (" whereis th elove" ), XL (" where is the love" )},
112+ {XL (" whereis th elove hehad dated forImuch of thepast who couqdn'tread in sixthgrade and ins pired him" ), XL (" where is the love he had dated for much of the past who couldn't read in sixth grade and inspired him" )},
113+ {XL (" in te dhird qarter oflast jear he hadlearned ofca sekretplan" ), XL (" in the third quarter of last year he had learned of a secret plan" )},
114+ {XL (" the bigjest playrs in te strogsommer film slatew ith plety of funn" ), XL (" the biggest players in the strong summer film slate with plenty of fun" )}};
111115
112- for (auto &sentence : compunded_sentences) {
116+ for (auto &sentence : compunded_sentences)
117+ {
113118 auto results = symSpell.LookupCompound (sentence.first );
114119 REQUIRE (results[0 ].term == sentence.second );
115120 }
116121 }
117122
118- SECTION (" Check top verbosity" ) {
123+ SECTION (" Check top verbosity" )
124+ {
119125 SymSpell symSpellcustom (maxEditDistance, prefixLength);
120126 symSpellcustom.LoadDictionary (" ../resources/frequency_dictionary_en_test_verbosity.txt" , 0 , 1 , XL (' ' ));
121127 auto results = symSpellcustom.Lookup (XL (" stream" ), Verbosity::Top, 2 );
122128 REQUIRE (1 == results.size ());
123129 REQUIRE (XL (" streamc" ) == results[0 ].term );
124130 }
125131
126- SECTION (" Check all verbosity" ) {
132+ SECTION (" Check all verbosity" )
133+ {
127134 SymSpell symSpellcustom (maxEditDistance, prefixLength);
128135 symSpellcustom.LoadDictionary (" ../resources/frequency_dictionary_en_test_verbosity.txt" , 0 , 1 , XL (' ' ));
129136 auto results = symSpellcustom.Lookup (XL (" stream" ), Verbosity::All, 2 );
130137 REQUIRE (2 == results.size ());
131138 }
132139
133- SECTION (" check custom entry of dictionary" ) {
140+ SECTION (" check custom entry of dictionary" )
141+ {
134142 SymSpell symSpellcustom (maxEditDistance, prefixLength, DEFAULT_COUNT_THRESHOLD, DEFAULT_INITIAL_CAPACITY,
135143 DEFAULT_COMPACT_LEVEL);
136144 auto staging = std::make_shared<SuggestionStage>(100 );
@@ -140,22 +148,27 @@ TEST_CASE("Testing English", "[english]") {
140148 REQUIRE (XL (" take" ) == results[0 ].term );
141149 }
142150
143- SECTION (" check save works fine." ) {
151+ SECTION (" check save works fine." )
152+ {
144153 SymSpell symSpellcustom (maxEditDistance, prefixLength, DEFAULT_COUNT_THRESHOLD, DEFAULT_INITIAL_CAPACITY,
145154 DEFAULT_COMPACT_LEVEL);
146155 symSpellcustom.LoadDictionary (" ../resources/frequency_dictionary_en_test_verbosity.txt" , 0 , 1 , XL (' ' ));
147156 auto filepath = " ../resources/model.bin" ;
148157 std::ofstream binary_path (filepath, std::ios::out | std::ios::app | std::ios::binary);
149- if (binary_path.is_open ()) {
158+ if (binary_path.is_open ())
159+ {
150160 cereal::BinaryOutputArchive oarchive (binary_path);
151161 oarchive (symSpellcustom);
152- } else {
162+ }
163+ else
164+ {
153165 throw std::invalid_argument (" Cannot save to file" );
154166 }
155167 std::remove (filepath);
156168 }
157169
158- SECTION (" Compund mistakes distance" ) {
170+ SECTION (" Compund mistakes distance" )
171+ {
159172 SymSpell symSpell (maxEditDistance, prefixLength);
160173 symSpell.LoadDictionary (" ../resources/frequency_dictionary_en_82_765.txt" , 0 , 1 , XL (' ' ));
161174 symSpell.LoadBigramDictionary (" ../resources/frequency_bigramdictionary_en_243_342.txt" , 0 , 2 , XL (' ' ));
@@ -165,7 +178,8 @@ TEST_CASE("Testing English", "[english]") {
165178 REQUIRE (results[0 ].distance == 2 );
166179 }
167180
168- SECTION (" Compund mistakes capitals" ) {
181+ SECTION (" Compund mistakes capitals" )
182+ {
169183 SymSpell symSpell (maxEditDistance, prefixLength);
170184 symSpell.LoadDictionary (" ../resources/frequency_dictionary_en_82_765.txt" , 0 , 1 , XL (' ' ));
171185 xstring typo = XL (" can yu readthis" );
@@ -174,7 +188,8 @@ TEST_CASE("Testing English", "[english]") {
174188 REQUIRE (results[0 ].term == correction);
175189 }
176190
177- SECTION (" Lookup transfer casing" ) {
191+ SECTION (" Lookup transfer casing" )
192+ {
178193 SymSpell symSpell (maxEditDistance, prefixLength);
179194 symSpell.LoadDictionary (" ../resources/frequency_dictionary_en_82_765.txt" , 0 , 1 , XL (' ' ));
180195 xstring typo = XL (" meMberSa" );
0 commit comments