diff --git a/CMakeLists.txt b/CMakeLists.txt index ec0c297..c9181b7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,7 +10,6 @@ include_directories(nlp-engine/cs/include) include_directories(nlp-engine/include) include_directories(nlp-engine/include/Api) include_directories(nlp-engine/include/Api/lite) -include_directories(nlp-engine/include) include_directories(${ICU_INCLUDE_DIR}) if(WIN32) diff --git a/NLPPlus/analyzers/address-parser/input/text.txt b/NLPPlus/analyzers/address-parser/input/text.txt new file mode 100644 index 0000000..57e2986 --- /dev/null +++ b/NLPPlus/analyzers/address-parser/input/text.txt @@ -0,0 +1,24 @@ +The address is 123 Main Street,345 Wyoming chanel Cityville, 64775 Country . +Located at 456 Oak Street, Townsville, 24512 USA. +John Smith resides in Apartment 789, Pine Towers,76 sde 45114 Canada. +She's living at 987 Elm Lane, Riverside, Canada 14521. +Dwelling at 654 Cedar Road, Hamletsville, 15451 India. +My home address is 876 Pine Circle, Mountainside, 51873 Brazil. +Here is my address - 234 Willow Street, Hilltop, 48929 South Africa. +Find me at 543 Redwood Lane, Lakeside,38849 Mexico . +His residence is 321 Maple Avenue, Villageton, 56234 UK . +We stay at 789 Birch Place, Meadowville, New Zealand 34245. +12 1ST ST NW +HAMPTON IA 50441-1902 +BG LIGHT CO +HC 72 BOX 293 A +DULUTH MN 55811-9702 +ATTN MS LUCY MARTIN +XYZ CORPORATION +4321 OAK ST +OAKTON MD 12345-6789 +You can reach me at 456 Maple Road, Suburbia, France 34245. +Come to 987 Oak Lane, Riverside, Canada 34235. +Check out 876 Pine Circle, Mountainside, Brazil 32345. +Living in 543 Redwood Lane, Lakeside, Mexico 34415. +At this location, you'll find 654 Cedar Road, Hamletsville, Australia 34213. diff --git a/NLPPlus/analyzers/emailaddress/input/text.txt b/NLPPlus/analyzers/emailaddress/input/text.txt new file mode 100644 index 0000000..2c0d5ef --- /dev/null +++ b/NLPPlus/analyzers/emailaddress/input/text.txt @@ -0,0 +1,30 @@ +John Smith can be reached at john.smith@emailprovider.com for any +inquiries about the project. Meanwhile, Sarah Johnson's contact information +is sarah_j@example.org, and she's available for collaboration. If you need +technical support, please contact support-team123@techsupport.net, and +they will assist you promptly. For general questions, you can also reach +out to info.business@company-inc.com, and our team will be happy to help. +euhwu@i3734h2ibfba.com iojwoije woiejr, email id: alice.smith@dept.departmentname.university.edu.uk +support.team@my_company-123.support 12345@numeric-email-address.5678 This address contains a hyphen in both the local and domain parts, which is allowed in email addresses. custom.name@example-email.net It combines letters, a period, a hyphen, and underscores in both the local and domain parts. +user_name@subdomain.co.uk +please send an email at kenethfpp at mails dot yahoo dot uk. But dont spam charitysc123 at 243 dot org +john.doe@example.com +mary.smith123@gmail.com +jane_doe@outlook.co.uk +steve-jones@yahoo.com +jennifer.smith+work@emaildomain.net +contact.us@my-website.org +user1234@subdomain.example.com +support_team@company-inc.com +user@email.tld +info@mycompany.net +sales@e-commerce-site.org +custom.er123@domain-name.com +webmaster@emailserver.biz +no_reply@noreply-domain.info +admin@server123.net +test.email@emailprovider.co +marketing.department@email.org +me@myname.me +contact-sales@email-service.io +support@customer-service.pro \ No newline at end of file diff --git a/NLPPlus/analyzers/links/input/text.txt b/NLPPlus/analyzers/links/input/text.txt new file mode 100644 index 0000000..a2ead3c --- /dev/null +++ b/NLPPlus/analyzers/links/input/text.txt @@ -0,0 +1,19 @@ +In the world of technology, [artificial intelligence](https://en.wikipedia.org/wiki/Artificial_intelligence) has become a major player. From autonomous vehicles to smart homes, AI is changing the way we live and work. One of the most exciting developments is in the field of [machine learning](https://en.wikipedia.org/wiki/Machine_learning), where computers can learn and improve from experience without being explicitly programmed. + +If you're interested in staying up-to-date with the latest tech trends, consider following reputable tech news websites like [TechCrunch](https://techcrunch.com/) and [Wired](https://www.wired.com/). These sources provide insightful articles and in-depth analysis on the ever-evolving tech landscape. + +For those looking to enhance their coding skills, websites like [Codecademy](https://www.codecademy.com/) and [Coursera](https://www.coursera.org/) offer a wide range of online courses in programming and data science. + +Explore the fascinating world of AI and tech, and you'll discover a wealth of opportunities to learn and grow! + +Final domain is in www.artrmidjifwoith-jvjf.cpa.aero. Apart from this a file transfer protocal hyperlink ftp://ftp.example.com/myfile.zip. +https://download.mozilla.org.uk/?product=firefox-latest-ssl&os=win64&lang=en-US +For any further assistnace go to https://music.youtube.com/channel/UCE5XNpliPM-SmyFEp61tL_g. This is the bext website out there +https://play.google.com/store/apps/details?id=com.example.app. +https://www.google.com/search?q=example. +ftp://ftp.example.com/files/file.zip +https://www.facebook.com/user. +https://bit.ly/123xyz +https://www.proxyserver.com/browse?url=https://www.example.com +https://www.example.com/videos/video.mp4 +https://github.com/VisualText/py-package-nlpengine/tree/main/NLPPlus/analyzers diff --git a/NLPPlus/analyzers/parse-en-us/input/text.txt b/NLPPlus/analyzers/parse-en-us/input/text.txt new file mode 100644 index 0000000..6769dd6 --- /dev/null +++ b/NLPPlus/analyzers/parse-en-us/input/text.txt @@ -0,0 +1 @@ +Hello world! \ No newline at end of file diff --git a/NLPPlus/analyzers/parse-en-us/kb/user/all.dict b/NLPPlus/analyzers/parse-en-us/kb/user/en-full.dict similarity index 99% rename from NLPPlus/analyzers/parse-en-us/kb/user/all.dict rename to NLPPlus/analyzers/parse-en-us/kb/user/en-full.dict index 567cc2b..3bfca44 100644 --- a/NLPPlus/analyzers/parse-en-us/kb/user/all.dict +++ b/NLPPlus/analyzers/parse-en-us/kb/user/en-full.dict @@ -1,3 +1,4 @@ +# Full English Dictionary aah pos=verb aah pos=int aahed pos=verb diff --git a/NLPPlus/analyzers/telephone/input/text.txt b/NLPPlus/analyzers/telephone/input/text.txt new file mode 100644 index 0000000..8740ba6 --- /dev/null +++ b/NLPPlus/analyzers/telephone/input/text.txt @@ -0,0 +1,27 @@ +020 1234 1234 ++442012341234 +8 601 12345 ++37060112345 +2124567890 +212-456-7890 +(212)456-7890 +(212)-456-7890 +212.456.7890 +212 456 7890 ++12124567890 ++12124567890 ++1 212.456.7890 ++212-456-7890 +1-212-456-7890 +456-7890 +212-456-7890 ++1-212-456-7890 +1-212-456-7890 +001-212-456-7890 +191-212-456-7890 +415 123 1234 ++14151231234 +020 1234 1234 ++442012341234 +8 601 12345 ++37060112345 \ No newline at end of file diff --git a/nlp-engine b/nlp-engine index dcefb6d..0ea5b3a 160000 --- a/nlp-engine +++ b/nlp-engine @@ -1 +1 @@ -Subproject commit dcefb6dd60fa2f3e644dbe878fde1d77c058b7d8 +Subproject commit 0ea5b3a3d362c124dbdfbea93f3c36111986889f diff --git a/tests/data/address-parser/text2.txt b/tests/data/address-parser/text2.txt deleted file mode 100644 index 1d2d4e9..0000000 --- a/tests/data/address-parser/text2.txt +++ /dev/null @@ -1,8 +0,0 @@ -On a crisp morning in Plummer's Landing, Kentucky, Mr. Walter W. Witherspoon Jr. of MDM Enterprises Inc. was busy managing the operations at 1401 S. Main St., Plummer’s Landing, KY 41081-1411. The postman, familiar with the rural route, delivered important correspondence to RR 3 Box 9, Canton, OH 44730-9521, where Mr. Robert Miller resides. -Meanwhile, ABC Electronics, located on HC 72 Box 293 A, Duluth, MN 55811-9702, received a shipment via highway contract. The delivery, expertly handled, ensured that the electronic components would be used to light up homes throughout the region. -Over in Hampton, Iowa, Robin Monet enjoyed a peaceful afternoon on 12 1st St NW, Hampton, IA 50441-1902. The numbered street, though modest in size, held a certain charm that captivated the residents of the small town. -In the urban landscape of Washington, DC, Mr. James F. Jones resided at 4417 Brooks St NE, Washington, DC 20019-4649. The individual's address, like many in the city, boasted a mix of history and modernity. -As dusk fell in the Pacific Northwest, a package arrived at 123 Main Street SW, 98106-1234, where the team at XYZ Corporation eagerly awaited a shipment of innovative products. -The rural tranquility of West Stockbridge and the serene atmosphere of Newberry Springs provided a stark contrast to the bustling city life. Residents cherished the simplicity of W Stockbridge and Newberry Spgs, knowing that their communities thrived in harmony with nature. -In the heart of Puerto Rico, the urbanization of Flamingo Hills adorned the address of 123 Calle Main, Anytown, PR 00957-1234. The unique blend of urban and tropical elements made it a distinctive location on the island. -This diverse tapestry of addresses reflects the richness and variety of American landscapes, each telling its own story within the fabric of the nation. \ No newline at end of file diff --git a/tests/data/address-parser/text2.txt_log/final.tree b/tests/data/address-parser/text2.txt_log/final.tree deleted file mode 100644 index 821f09a..0000000 --- a/tests/data/address-parser/text2.txt_log/final.tree +++ /dev/null @@ -1,335 +0,0 @@ - -FINAL OUTPUT TREE: - -_ROOT [0,1843,0,1841,0,0,node,un, ("TOT LINES" 1) ("TOT TABS" 0) ("TOT LOWERS" 194) ("TOT CAPS" 88) ("TOT UPPERS" 20) ("TOT NUMS" 24)] - _BLANKLINE [0,0,0,0,4,13,node] - \n [0,0,0,0,0,0,white] - On [1,2,1,2,0,0,alpha, ("SP" 1) ("NL" 1) ("cap" 1)] - a [4,4,4,4,0,0,alpha, ("SP" 1) ("lower" 1)] - crisp [6,10,6,10,0,0,alpha, ("SP" 1) ("lower" 1)] - morning [12,18,12,18,0,0,alpha, ("SP" 1) ("lower" 1)] - in [20,21,20,21,0,0,alpha, ("SP" 1) ("lower" 1) ("abbrev" 1) ("state" "indiana")] - Plummer [23,29,23,29,0,0,alpha, ("SP" 1) ("cap" 1)] - s [31,31,31,31,0,0,alpha, ("NOSP" 1) ("lower" 1)] - Landing [33,39,33,39,0,0,alpha, ("SP" 1) ("cap" 1) ("usps" "lndg")] - Kentucky [42,49,42,49,0,0,alpha, ("SP" 1) ("cap" 1) ("abbrev" "KY")] - Mr [52,53,52,53,0,0,alpha, ("SP" 1) ("cap" 1)] - Walter [56,61,56,61,0,0,alpha, ("SP" 1) ("cap" 1)] - W [63,63,63,63,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1)] - Witherspoon [66,76,66,76,0,0,alpha, ("SP" 1) ("cap" 1)] - Jr [78,79,78,79,0,0,alpha, ("SP" 1) ("cap" 1)] - of [82,83,82,83,0,0,alpha, ("SP" 1) ("lower" 1)] - MDM [85,87,85,87,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1)] - Enterprises [89,99,89,99,0,0,alpha, ("SP" 1) ("cap" 1)] - Inc [101,103,101,103,0,0,alpha, ("SP" 1) ("cap" 1)] - was [106,108,106,108,0,0,alpha, ("SP" 1) ("lower" 1)] - busy [110,113,110,113,0,0,alpha, ("SP" 1) ("lower" 1)] - managing [115,122,115,122,0,0,alpha, ("SP" 1) ("lower" 1)] - the [124,126,124,126,0,0,alpha, ("SP" 1) ("lower" 1)] - operations [128,137,128,137,0,0,alpha, ("SP" 1) ("lower" 1)] - at [139,140,139,140,0,0,alpha, ("SP" 1) ("lower" 1)] - _address [142,193,142,191,10,13,node, ("state" "kentucky") ("city" "Landing") ("streetsuff" "ky") ("streetname" "S") ("streetnum" "1401")] - 1401 [142,145,142,145,0,0,num, ("SP" 1) ("starting" 1) ("streetnum" 1)] - S [147,147,147,147,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1) ("streetname" 1)] - Main [150,153,150,153,0,0,alpha, ("SP" 1) ("cap" 1) ("usps" "main") ("streetname" 1)] - St [155,156,155,156,0,0,alpha, ("SP" 1) ("cap" 1) ("usps" "st")] - Plummer [160,166,160,166,0,0,alpha, ("SP" 1) ("cap" 1)] - s [170,170,168,168,0,0,alpha, ("NOSP" 1) ("lower" 1) ("streetname" 1)] - Landing [172,178,170,176,0,0,alpha, ("SP" 1) ("cap" 1) ("usps" "lndg") ("city" 1) ("streetname" 1)] - KY [181,182,179,180,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1) ("abbrev" 1) ("state" "kentucky") ("usps" "ky")] - _pincode [184,193,182,191,6,17,node] - 41081 [184,188,182,186,0,0,num, ("SP" 1) ("part1" 1) ("pin" 1)] - - [189,189,187,187,0,0,punct, ("NOSP" 1)] - 1411 [190,193,188,191,0,0,num, ("NOSP" 1) ("part2" 1)] - The [196,198,194,196,0,0,alpha, ("SP" 1) ("cap" 1)] - postman [200,206,198,204,0,0,alpha, ("SP" 1) ("lower" 1)] - familiar [209,216,207,214,0,0,alpha, ("SP" 1) ("lower" 1)] - with [218,221,216,219,0,0,alpha, ("SP" 1) ("lower" 1)] - the [223,225,221,223,0,0,alpha, ("SP" 1) ("lower" 1)] - rural [227,231,225,229,0,0,alpha, ("SP" 1) ("lower" 1)] - route [233,237,231,235,0,0,alpha, ("SP" 1) ("lower" 1) ("usps" "rte")] - delivered [240,248,238,246,0,0,alpha, ("SP" 1) ("lower" 1)] - important [250,258,248,256,0,0,alpha, ("SP" 1) ("lower" 1)] - correspondence [260,273,258,271,0,0,alpha, ("SP" 1) ("lower" 1)] - to [275,276,273,274,0,0,alpha, ("SP" 1) ("lower" 1)] - _address [278,310,276,308,10,13,node, ("state" "ohio") ("city" "Canton")] - _ruralroute [278,287,276,285,11,18,node] - RR [278,279,276,277,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1) ("starting" 1)] - 3 [281,281,279,279,0,0,num, ("SP" 1)] - Box [283,285,281,283,0,0,alpha, ("SP" 1) ("cap" 1)] - 9 [287,287,285,285,0,0,num, ("SP" 1) ("starting" 1)] - Canton [290,295,288,293,0,0,alpha, ("SP" 1) ("cap" 1) ("city" 1)] - OH [298,299,296,297,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1) ("abbrev" 1) ("state" "ohio")] - _pincode [301,310,299,308,6,17,node] - 44730 [301,305,299,303,0,0,num, ("SP" 1) ("part1" 1) ("pin" 1)] - - [306,306,304,304,0,0,punct, ("NOSP" 1)] - 9521 [307,310,305,308,0,0,num, ("NOSP" 1) ("part2" 1)] - where [313,317,311,315,0,0,alpha, ("SP" 1) ("lower" 1)] - Mr [319,320,317,318,0,0,alpha, ("SP" 1) ("cap" 1)] - Robert [323,328,321,326,0,0,alpha, ("SP" 1) ("cap" 1)] - Miller [330,335,328,333,0,0,alpha, ("SP" 1) ("cap" 1)] - resides [337,343,335,341,0,0,alpha, ("SP" 1) ("lower" 1) ("address" 1)] - Meanwhile [346,354,344,352,0,0,alpha, ("SP" 1) ("NL" 1) ("cap" 1)] - ABC [357,359,355,357,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1)] - Electronics [361,371,359,369,0,0,alpha, ("SP" 1) ("cap" 1)] - located [374,380,372,378,0,0,alpha, ("SP" 1) ("lower" 1) ("address" 1)] - on [382,383,380,381,0,0,alpha, ("SP" 1) ("lower" 1)] - _address [385,422,383,420,10,13,node, ("state" "minnesota") ("city" "Duluth")] - _highwayContract [385,399,383,397,11,34,node] - HC [385,386,383,384,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1) ("starting" 1)] - 72 [388,389,386,387,0,0,num, ("SP" 1)] - Box [391,393,389,391,0,0,alpha, ("SP" 1) ("cap" 1)] - 293 [395,397,393,395,0,0,num, ("SP" 1) ("starting" 1)] - A [399,399,397,397,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1)] - Duluth [402,407,400,405,0,0,alpha, ("SP" 1) ("cap" 1) ("city" 1)] - MN [410,411,408,409,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1) ("abbrev" 1) ("state" "minnesota")] - _pincode [413,422,411,420,6,17,node] - 55811 [413,417,411,415,0,0,num, ("SP" 1) ("part1" 1) ("pin" 1)] - - [418,418,416,416,0,0,punct, ("NOSP" 1)] - 9702 [419,422,417,420,0,0,num, ("NOSP" 1) ("part2" 1)] - received [425,432,423,430,0,0,alpha, ("SP" 1) ("lower" 1)] - a [434,434,432,432,0,0,alpha, ("SP" 1) ("lower" 1)] - shipment [436,443,434,441,0,0,alpha, ("SP" 1) ("lower" 1)] - via [445,447,443,445,0,0,alpha, ("SP" 1) ("lower" 1) ("usps" "via")] - highway [449,455,447,453,0,0,alpha, ("SP" 1) ("lower" 1) ("usps" "hwy")] - contract [457,464,455,462,0,0,alpha, ("SP" 1) ("lower" 1)] - The [467,469,465,467,0,0,alpha, ("SP" 1) ("cap" 1)] - delivery [471,478,469,476,0,0,alpha, ("SP" 1) ("lower" 1)] - expertly [481,488,479,486,0,0,alpha, ("SP" 1) ("lower" 1)] - handled [490,496,488,494,0,0,alpha, ("SP" 1) ("lower" 1)] - ensured [499,505,497,503,0,0,alpha, ("SP" 1) ("lower" 1)] - that [507,510,505,508,0,0,alpha, ("SP" 1) ("lower" 1)] - the [512,514,510,512,0,0,alpha, ("SP" 1) ("lower" 1)] - electronic [516,525,514,523,0,0,alpha, ("SP" 1) ("lower" 1)] - components [527,536,525,534,0,0,alpha, ("SP" 1) ("lower" 1)] - would [538,542,536,540,0,0,alpha, ("SP" 1) ("lower" 1)] - be [544,545,542,543,0,0,alpha, ("SP" 1) ("lower" 1)] - used [547,550,545,548,0,0,alpha, ("SP" 1) ("lower" 1)] - to [552,553,550,551,0,0,alpha, ("SP" 1) ("lower" 1)] - light [555,559,553,557,0,0,alpha, ("SP" 1) ("lower" 1) ("usps" "lgt")] - up [561,562,559,560,0,0,alpha, ("SP" 1) ("lower" 1)] - homes [564,568,562,566,0,0,alpha, ("SP" 1) ("lower" 1)] - throughout [570,579,568,577,0,0,alpha, ("SP" 1) ("lower" 1)] - the [581,583,579,581,0,0,alpha, ("SP" 1) ("lower" 1)] - region [585,590,583,588,0,0,alpha, ("SP" 1) ("lower" 1)] - Over [593,596,591,594,0,0,alpha, ("SP" 1) ("NL" 1) ("cap" 1)] - in [598,599,596,597,0,0,alpha, ("SP" 1) ("lower" 1) ("abbrev" 1) ("state" "indiana")] - Hampton [601,607,599,605,0,0,alpha, ("SP" 1) ("cap" 1)] - Iowa [610,613,608,611,0,0,alpha, ("SP" 1) ("cap" 1) ("abbrev" "IA")] - Robin [616,620,614,618,0,0,alpha, ("SP" 1) ("cap" 1)] - Monet [622,626,620,624,0,0,alpha, ("SP" 1) ("cap" 1)] - enjoyed [628,634,626,632,0,0,alpha, ("SP" 1) ("lower" 1)] - a [636,636,634,634,0,0,alpha, ("SP" 1) ("lower" 1)] - peaceful [638,645,636,643,0,0,alpha, ("SP" 1) ("lower" 1)] - afternoon [647,655,645,653,0,0,alpha, ("SP" 1) ("lower" 1)] - on [657,658,655,656,0,0,alpha, ("SP" 1) ("lower" 1)] - _address [660,695,658,693,10,13,node, ("state" "iowa") ("city" "Hampton") ("streetsuff" "st") ("streetnum" "12") ("streetname" "st")] - 12 [660,661,658,659,0,0,num, ("SP" 1) ("starting" 1) ("streetnum" 1)] - 1 [663,663,661,661,0,0,num, ("SP" 1) ("starting" 1)] - st [664,665,662,663,0,0,alpha, ("NOSP" 1) ("lower" 1) ("usps" "st") ("streetname" 1)] - St [667,668,665,666,0,0,alpha, ("SP" 1) ("cap" 1) ("usps" "st")] - NW [670,671,668,669,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1)] - Hampton [674,680,672,678,0,0,alpha, ("SP" 1) ("cap" 1) ("city" 1)] - IA [683,684,681,682,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1) ("abbrev" 1) ("state" "iowa")] - _pincode [686,695,684,693,6,17,node] - 50441 [686,690,684,688,0,0,num, ("SP" 1) ("part1" 1) ("pin" 1)] - - [691,691,689,689,0,0,punct, ("NOSP" 1)] - 1902 [692,695,690,693,0,0,num, ("NOSP" 1) ("part2" 1)] - The [698,700,696,698,0,0,alpha, ("SP" 1) ("cap" 1)] - numbered [702,709,700,707,0,0,alpha, ("SP" 1) ("lower" 1)] - street [711,716,709,714,0,0,alpha, ("SP" 1) ("lower" 1) ("usps" "st")] - though [719,724,717,722,0,0,alpha, ("SP" 1) ("lower" 1)] - modest [726,731,724,729,0,0,alpha, ("SP" 1) ("lower" 1)] - in [733,734,731,732,0,0,alpha, ("SP" 1) ("lower" 1) ("abbrev" 1) ("state" "indiana")] - size [736,739,734,737,0,0,alpha, ("SP" 1) ("lower" 1)] - held [742,745,740,743,0,0,alpha, ("SP" 1) ("lower" 1)] - a [747,747,745,745,0,0,alpha, ("SP" 1) ("lower" 1)] - certain [749,755,747,753,0,0,alpha, ("SP" 1) ("lower" 1)] - charm [757,761,755,759,0,0,alpha, ("SP" 1) ("lower" 1)] - that [763,766,761,764,0,0,alpha, ("SP" 1) ("lower" 1)] - captivated [768,777,766,775,0,0,alpha, ("SP" 1) ("lower" 1)] - the [779,781,777,779,0,0,alpha, ("SP" 1) ("lower" 1)] - residents [783,791,781,789,0,0,alpha, ("SP" 1) ("lower" 1)] - of [793,794,791,792,0,0,alpha, ("SP" 1) ("lower" 1)] - the [796,798,794,796,0,0,alpha, ("SP" 1) ("lower" 1)] - small [800,804,798,802,0,0,alpha, ("SP" 1) ("lower" 1)] - town [806,809,804,807,0,0,alpha, ("SP" 1) ("lower" 1)] - In [812,813,810,811,0,0,alpha, ("SP" 1) ("NL" 1) ("cap" 1) ("abbrev" 1) ("state" "indiana")] - the [815,817,813,815,0,0,alpha, ("SP" 1) ("lower" 1)] - urban [819,823,817,821,0,0,alpha, ("SP" 1) ("lower" 1)] - landscape [825,833,823,831,0,0,alpha, ("SP" 1) ("lower" 1)] - of [835,836,833,834,0,0,alpha, ("SP" 1) ("lower" 1)] - Washington [838,847,836,845,0,0,alpha, ("SP" 1) ("cap" 1) ("abbrev" "WA")] - DC [850,851,848,849,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1)] - Mr [854,855,852,853,0,0,alpha, ("SP" 1) ("cap" 1)] - James [858,862,856,860,0,0,alpha, ("SP" 1) ("cap" 1)] - F [864,864,862,862,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1)] - Jones [867,871,865,869,0,0,alpha, ("SP" 1) ("cap" 1)] - resided [873,879,871,877,0,0,alpha, ("SP" 1) ("lower" 1) ("address" 1)] - at [881,882,879,880,0,0,alpha, ("SP" 1) ("lower" 1)] - _address [884,927,882,925,10,13,node, ("state" "Washington") ("city" "St") ("streetsuff" "st") ("streetname" "Brooks") ("streetnum" "4417")] - 4417 [884,887,882,885,0,0,num, ("SP" 1) ("starting" 1) ("streetnum" 1)] - Brooks [889,894,887,892,0,0,alpha, ("SP" 1) ("cap" 1) ("usps" "brks") ("streetname" 1)] - St [896,897,894,895,0,0,alpha, ("SP" 1) ("cap" 1) ("usps" "st") ("city" 1)] - NE [899,900,897,898,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1) ("abbrev" 1) ("state" "nebraska") ("city" 1)] - Washington [903,912,901,910,0,0,alpha, ("SP" 1) ("cap" 1) ("abbrev" "WA")] - DC [915,916,913,914,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1)] - _pincode [918,927,916,925,6,17,node] - 20019 [918,922,916,920,0,0,num, ("SP" 1) ("part1" 1) ("pin" 1)] - - [923,923,921,921,0,0,punct, ("NOSP" 1)] - 4649 [924,927,922,925,0,0,num, ("NOSP" 1) ("part2" 1)] - The [930,932,928,930,0,0,alpha, ("SP" 1) ("cap" 1)] - individual [934,943,932,941,0,0,alpha, ("SP" 1) ("lower" 1)] - s [945,945,943,943,0,0,alpha, ("NOSP" 1) ("lower" 1)] - address [947,953,945,951,0,0,alpha, ("SP" 1) ("lower" 1) ("address" 1)] - like [956,959,954,957,0,0,alpha, ("SP" 1) ("lower" 1)] - many [961,964,959,962,0,0,alpha, ("SP" 1) ("lower" 1)] - in [966,967,964,965,0,0,alpha, ("SP" 1) ("lower" 1) ("abbrev" 1) ("state" "indiana")] - the [969,971,967,969,0,0,alpha, ("SP" 1) ("lower" 1)] - city [973,976,971,974,0,0,alpha, ("SP" 1) ("lower" 1)] - boasted [979,985,977,983,0,0,alpha, ("SP" 1) ("lower" 1)] - a [987,987,985,985,0,0,alpha, ("SP" 1) ("lower" 1)] - mix [989,991,987,989,0,0,alpha, ("SP" 1) ("lower" 1)] - of [993,994,991,992,0,0,alpha, ("SP" 1) ("lower" 1)] - history [996,1002,994,1000,0,0,alpha, ("SP" 1) ("lower" 1)] - and [1004,1006,1002,1004,0,0,alpha, ("SP" 1) ("lower" 1)] - modernity [1008,1016,1006,1014,0,0,alpha, ("SP" 1) ("lower" 1)] - As [1019,1020,1017,1018,0,0,alpha, ("SP" 1) ("NL" 1) ("cap" 1)] - dusk [1022,1025,1020,1023,0,0,alpha, ("SP" 1) ("lower" 1)] - fell [1027,1030,1025,1028,0,0,alpha, ("SP" 1) ("lower" 1)] - in [1032,1033,1030,1031,0,0,alpha, ("SP" 1) ("lower" 1) ("abbrev" 1) ("state" "indiana")] - the [1035,1037,1033,1035,0,0,alpha, ("SP" 1) ("lower" 1)] - Pacific [1039,1045,1037,1043,0,0,alpha, ("SP" 1) ("cap" 1)] - Northwest [1047,1055,1045,1053,0,0,alpha, ("SP" 1) ("cap" 1)] - a [1058,1058,1056,1056,0,0,alpha, ("SP" 1) ("lower" 1)] - package [1060,1066,1058,1064,0,0,alpha, ("SP" 1) ("lower" 1)] - arrived [1068,1074,1066,1072,0,0,alpha, ("SP" 1) ("lower" 1)] - at [1076,1077,1074,1075,0,0,alpha, ("SP" 1) ("lower" 1)] - _address [1079,1108,1077,1106,10,13,node, ("streetsuff" "st") ("streetname" "Main") ("streetnum" "123")] - 123 [1079,1081,1077,1079,0,0,num, ("SP" 1) ("starting" 1) ("streetnum" 1)] - Main [1083,1086,1081,1084,0,0,alpha, ("SP" 1) ("cap" 1) ("usps" "main") ("streetname" 1)] - Street [1088,1093,1086,1091,0,0,alpha, ("SP" 1) ("cap" 1) ("usps" "st")] - SW [1095,1096,1093,1094,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1)] - _pincode [1099,1108,1097,1106,6,17,node] - 98106 [1099,1103,1097,1101,0,0,num, ("SP" 1) ("part1" 1) ("pin" 1)] - - [1104,1104,1102,1102,0,0,punct, ("NOSP" 1)] - 1234 [1105,1108,1103,1106,0,0,num, ("NOSP" 1) ("part2" 1)] - where [1111,1115,1109,1113,0,0,alpha, ("SP" 1) ("lower" 1)] - the [1117,1119,1115,1117,0,0,alpha, ("SP" 1) ("lower" 1)] - team [1121,1124,1119,1122,0,0,alpha, ("SP" 1) ("lower" 1)] - at [1126,1127,1124,1125,0,0,alpha, ("SP" 1) ("lower" 1)] - XYZ [1129,1131,1127,1129,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1)] - Corporation [1133,1143,1131,1141,0,0,alpha, ("SP" 1) ("cap" 1)] - eagerly [1145,1151,1143,1149,0,0,alpha, ("SP" 1) ("lower" 1)] - awaited [1153,1159,1151,1157,0,0,alpha, ("SP" 1) ("lower" 1)] - a [1161,1161,1159,1159,0,0,alpha, ("SP" 1) ("lower" 1)] - shipment [1163,1170,1161,1168,0,0,alpha, ("SP" 1) ("lower" 1)] - of [1172,1173,1170,1171,0,0,alpha, ("SP" 1) ("lower" 1)] - innovative [1175,1184,1173,1182,0,0,alpha, ("SP" 1) ("lower" 1)] - products [1186,1193,1184,1191,0,0,alpha, ("SP" 1) ("lower" 1)] - The [1196,1198,1194,1196,0,0,alpha, ("SP" 1) ("NL" 1) ("cap" 1)] - rural [1200,1204,1198,1202,0,0,alpha, ("SP" 1) ("lower" 1)] - tranquility [1206,1216,1204,1214,0,0,alpha, ("SP" 1) ("lower" 1)] - of [1218,1219,1216,1217,0,0,alpha, ("SP" 1) ("lower" 1)] - West [1221,1224,1219,1222,0,0,alpha, ("SP" 1) ("cap" 1)] - Stockbridge [1226,1236,1224,1234,0,0,alpha, ("SP" 1) ("cap" 1)] - and [1238,1240,1236,1238,0,0,alpha, ("SP" 1) ("lower" 1)] - the [1242,1244,1240,1242,0,0,alpha, ("SP" 1) ("lower" 1)] - serene [1246,1251,1244,1249,0,0,alpha, ("SP" 1) ("lower" 1)] - atmosphere [1253,1262,1251,1260,0,0,alpha, ("SP" 1) ("lower" 1)] - of [1264,1265,1262,1263,0,0,alpha, ("SP" 1) ("lower" 1)] - Newberry [1267,1274,1265,1272,0,0,alpha, ("SP" 1) ("cap" 1)] - Springs [1276,1282,1274,1280,0,0,alpha, ("SP" 1) ("cap" 1) ("usps" "spgs")] - provided [1284,1291,1282,1289,0,0,alpha, ("SP" 1) ("lower" 1)] - a [1293,1293,1291,1291,0,0,alpha, ("SP" 1) ("lower" 1)] - stark [1295,1299,1293,1297,0,0,alpha, ("SP" 1) ("lower" 1)] - contrast [1301,1308,1299,1306,0,0,alpha, ("SP" 1) ("lower" 1)] - to [1310,1311,1308,1309,0,0,alpha, ("SP" 1) ("lower" 1)] - the [1313,1315,1311,1313,0,0,alpha, ("SP" 1) ("lower" 1)] - bustling [1317,1324,1315,1322,0,0,alpha, ("SP" 1) ("lower" 1)] - city [1326,1329,1324,1327,0,0,alpha, ("SP" 1) ("lower" 1)] - life [1331,1334,1329,1332,0,0,alpha, ("SP" 1) ("lower" 1)] - Residents [1337,1345,1335,1343,0,0,alpha, ("SP" 1) ("cap" 1)] - cherished [1347,1355,1345,1353,0,0,alpha, ("SP" 1) ("lower" 1)] - the [1357,1359,1355,1357,0,0,alpha, ("SP" 1) ("lower" 1)] - simplicity [1361,1370,1359,1368,0,0,alpha, ("SP" 1) ("lower" 1)] - of [1372,1373,1370,1371,0,0,alpha, ("SP" 1) ("lower" 1)] - W [1375,1375,1373,1373,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1)] - Stockbridge [1377,1387,1375,1385,0,0,alpha, ("SP" 1) ("cap" 1)] - and [1389,1391,1387,1389,0,0,alpha, ("SP" 1) ("lower" 1)] - Newberry [1393,1400,1391,1398,0,0,alpha, ("SP" 1) ("cap" 1)] - Spgs [1402,1405,1400,1403,0,0,alpha, ("SP" 1) ("cap" 1) ("usps" "spgs")] - knowing [1408,1414,1406,1412,0,0,alpha, ("SP" 1) ("lower" 1)] - that [1416,1419,1414,1417,0,0,alpha, ("SP" 1) ("lower" 1)] - their [1421,1425,1419,1423,0,0,alpha, ("SP" 1) ("lower" 1)] - communities [1427,1437,1425,1435,0,0,alpha, ("SP" 1) ("lower" 1)] - thrived [1439,1445,1437,1443,0,0,alpha, ("SP" 1) ("lower" 1)] - in [1447,1448,1445,1446,0,0,alpha, ("SP" 1) ("lower" 1) ("abbrev" 1) ("state" "indiana")] - harmony [1450,1456,1448,1454,0,0,alpha, ("SP" 1) ("lower" 1)] - with [1458,1461,1456,1459,0,0,alpha, ("SP" 1) ("lower" 1)] - nature [1463,1468,1461,1466,0,0,alpha, ("SP" 1) ("lower" 1)] - In [1471,1472,1469,1470,0,0,alpha, ("SP" 1) ("NL" 1) ("cap" 1) ("abbrev" 1) ("state" "indiana")] - the [1474,1476,1472,1474,0,0,alpha, ("SP" 1) ("lower" 1)] - heart [1478,1482,1476,1480,0,0,alpha, ("SP" 1) ("lower" 1)] - of [1484,1485,1482,1483,0,0,alpha, ("SP" 1) ("lower" 1)] - Puerto [1487,1492,1485,1490,0,0,alpha, ("SP" 1) ("cap" 1)] - Rico [1494,1497,1492,1495,0,0,alpha, ("SP" 1) ("cap" 1)] - the [1500,1502,1498,1500,0,0,alpha, ("SP" 1) ("lower" 1)] - urbanization [1504,1515,1502,1513,0,0,alpha, ("SP" 1) ("lower" 1)] - of [1517,1518,1515,1516,0,0,alpha, ("SP" 1) ("lower" 1)] - Flamingo [1520,1527,1518,1525,0,0,alpha, ("SP" 1) ("cap" 1)] - Hills [1529,1533,1527,1531,0,0,alpha, ("SP" 1) ("cap" 1) ("usps" "hls")] - adorned [1535,1541,1533,1539,0,0,alpha, ("SP" 1) ("lower" 1)] - the [1543,1545,1541,1543,0,0,alpha, ("SP" 1) ("lower" 1)] - address [1547,1553,1545,1551,0,0,alpha, ("SP" 1) ("lower" 1) ("address" 1)] - of [1555,1556,1553,1554,0,0,alpha, ("SP" 1) ("lower" 1)] - _address [1558,1595,1556,1593,10,13,node, ("streetsuff" "pr") ("streetname" "Calle") ("streetnum" "123")] - 123 [1558,1560,1556,1558,0,0,num, ("SP" 1) ("starting" 1) ("streetnum" 1)] - Calle [1562,1566,1560,1564,0,0,alpha, ("SP" 1) ("cap" 1) ("streetname" 1)] - Main [1568,1571,1566,1569,0,0,alpha, ("SP" 1) ("cap" 1) ("usps" "main")] - Anytown [1574,1580,1572,1578,0,0,alpha, ("SP" 1) ("cap" 1) ("streetname" 1)] - PR [1583,1584,1581,1582,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1) ("usps" "pr")] - _pincode [1586,1595,1584,1593,6,17,node] - 00957 [1586,1590,1584,1588,0,0,num, ("SP" 1) ("part1" 1) ("pin" 1)] - - [1591,1591,1589,1589,0,0,punct, ("NOSP" 1)] - 1234 [1592,1595,1590,1593,0,0,num, ("NOSP" 1) ("part2" 1)] - The [1598,1600,1596,1598,0,0,alpha, ("SP" 1) ("cap" 1)] - unique [1602,1607,1600,1605,0,0,alpha, ("SP" 1) ("lower" 1)] - blend [1609,1613,1607,1611,0,0,alpha, ("SP" 1) ("lower" 1)] - of [1615,1616,1613,1614,0,0,alpha, ("SP" 1) ("lower" 1)] - urban [1618,1622,1616,1620,0,0,alpha, ("SP" 1) ("lower" 1)] - and [1624,1626,1622,1624,0,0,alpha, ("SP" 1) ("lower" 1)] - tropical [1628,1635,1626,1633,0,0,alpha, ("SP" 1) ("lower" 1)] - elements [1637,1644,1635,1642,0,0,alpha, ("SP" 1) ("lower" 1)] - made [1646,1649,1644,1647,0,0,alpha, ("SP" 1) ("lower" 1)] - it [1651,1652,1649,1650,0,0,alpha, ("SP" 1) ("lower" 1)] - a [1654,1654,1652,1652,0,0,alpha, ("SP" 1) ("lower" 1)] - distinctive [1656,1666,1654,1664,0,0,alpha, ("SP" 1) ("lower" 1)] - location [1668,1675,1666,1673,0,0,alpha, ("SP" 1) ("lower" 1) ("address" 1)] - on [1677,1678,1675,1676,0,0,alpha, ("SP" 1) ("lower" 1)] - the [1680,1682,1678,1680,0,0,alpha, ("SP" 1) ("lower" 1)] - island [1684,1689,1682,1687,0,0,alpha, ("SP" 1) ("lower" 1) ("usps" "is")] - This [1692,1695,1690,1693,0,0,alpha, ("SP" 1) ("NL" 1) ("cap" 1)] - diverse [1697,1703,1695,1701,0,0,alpha, ("SP" 1) ("lower" 1)] - tapestry [1705,1712,1703,1710,0,0,alpha, ("SP" 1) ("lower" 1)] - of [1714,1715,1712,1713,0,0,alpha, ("SP" 1) ("lower" 1)] - addresses [1717,1725,1715,1723,0,0,alpha, ("SP" 1) ("lower" 1)] - reflects [1727,1734,1725,1732,0,0,alpha, ("SP" 1) ("lower" 1)] - the [1736,1738,1734,1736,0,0,alpha, ("SP" 1) ("lower" 1)] - richness [1740,1747,1738,1745,0,0,alpha, ("SP" 1) ("lower" 1)] - and [1749,1751,1747,1749,0,0,alpha, ("SP" 1) ("lower" 1)] - variety [1753,1759,1751,1757,0,0,alpha, ("SP" 1) ("lower" 1)] - of [1761,1762,1759,1760,0,0,alpha, ("SP" 1) ("lower" 1)] - American [1764,1771,1762,1769,0,0,alpha, ("SP" 1) ("cap" 1)] - landscapes [1773,1782,1771,1780,0,0,alpha, ("SP" 1) ("lower" 1)] - each [1785,1788,1783,1786,0,0,alpha, ("SP" 1) ("lower" 1)] - telling [1790,1796,1788,1794,0,0,alpha, ("SP" 1) ("lower" 1)] - its [1798,1800,1796,1798,0,0,alpha, ("SP" 1) ("lower" 1)] - own [1802,1804,1800,1802,0,0,alpha, ("SP" 1) ("lower" 1)] - story [1806,1810,1804,1808,0,0,alpha, ("SP" 1) ("lower" 1)] - within [1812,1817,1810,1815,0,0,alpha, ("SP" 1) ("lower" 1)] - the [1819,1821,1817,1819,0,0,alpha, ("SP" 1) ("lower" 1)] - fabric [1823,1828,1821,1826,0,0,alpha, ("SP" 1) ("lower" 1)] - of [1830,1831,1828,1829,0,0,alpha, ("SP" 1) ("lower" 1)] - the [1833,1835,1831,1833,0,0,alpha, ("SP" 1) ("lower" 1)] - nation [1837,1842,1835,1840,0,0,alpha, ("SP" 1) ("lower" 1)] diff --git a/tests/data/address-parser/text3.txt b/tests/data/address-parser/text3.txt deleted file mode 100644 index 60208ff..0000000 --- a/tests/data/address-parser/text3.txt +++ /dev/null @@ -1,8 +0,0 @@ -On a sunny afternoon in Charleston, South Carolina, Mrs. Catherine M. Rodriguez of Rodriguez & Sons Enterprises managed the bustling operations at 225 Broad St., Charleston, SC 29401-1234. The local courier, well-acquainted with the urban landscape, navigated the charming streets to deliver packages to P.O. Box 5678, Greenville, SC 29607-8901. -Meanwhile, Summit Innovations, located at 789 Mountain View Ave, Boulder, CO 80302-4567, received a shipment via highway contract. The delivery, efficiently coordinated, ensured that cutting-edge technology would soon be available to residents throughout the region. -Over in Portland, Oregon, Ms. Emily Turner enjoyed the vibrant energy of 45 Elm St NW, Portland, OR 97201-6789. The numbered street, nestled in the heart of the city, provided a perfect backdrop for the creative minds working in the area. -In the suburbs of Chicago, Illinois, Mr. Richard Davis resided at 123 Oakwood Lane, Naperville, IL 60540-9876. The individual's address, surrounded by greenery and tranquility, offered a peaceful retreat from the urban hustle. -As twilight approached in the Pacific Northwest, a package arrived at 567 Pine Street SW, Seattle, WA 98101-3456, where the team at Evergreen Tech eagerly awaited a shipment of state-of-the-art devices. -The quiet neighborhoods of Lexington, Kentucky, and Jackson, Mississippi, showcased the beauty of simplicity. Residents cherished the charm of 789 Maple Avenue, Lexington, KY 40502-2109, and 456 Magnolia Blvd, Jackson, MS 39201-5432, knowing that their communities thrived on a strong sense of community. -In the heart of Puerto Rico, the urbanization of Vista del Mar adorned the address of 987 Calle Principal, San Juan, PR 00921-6543. The unique blend of urban and coastal elements made it a distinctive location on the island. -This diverse tapestry of addresses reflects the richness and variety of American landscapes, each telling its own story within the fabric of the nation. \ No newline at end of file diff --git a/tests/data/address-parser/text3.txt_log/final.tree b/tests/data/address-parser/text3.txt_log/final.tree deleted file mode 100644 index 2425330..0000000 --- a/tests/data/address-parser/text3.txt_log/final.tree +++ /dev/null @@ -1,342 +0,0 @@ - -FINAL OUTPUT TREE: - -_ROOT [0,1963,0,1963,0,0,node,un, ("TOT LINES" 1) ("TOT TABS" 0) ("TOT LOWERS" 194) ("TOT CAPS" 90) ("TOT UPPERS" 14) ("TOT NUMS" 27)] - On [0,1,0,1,0,0,alpha, ("NOSP" 1) ("cap" 1)] - a [3,3,3,3,0,0,alpha, ("SP" 1) ("lower" 1)] - sunny [5,9,5,9,0,0,alpha, ("SP" 1) ("lower" 1)] - afternoon [11,19,11,19,0,0,alpha, ("SP" 1) ("lower" 1)] - in [21,22,21,22,0,0,alpha, ("SP" 1) ("lower" 1) ("abbrev" 1) ("state" "indiana")] - Charleston [24,33,24,33,0,0,alpha, ("SP" 1) ("cap" 1)] - South [36,40,36,40,0,0,alpha, ("SP" 1) ("cap" 1) ("country" 1)] - Carolina [42,49,42,49,0,0,alpha, ("SP" 1) ("cap" 1)] - Mrs [52,54,52,54,0,0,alpha, ("SP" 1) ("cap" 1)] - Catherine [57,65,57,65,0,0,alpha, ("SP" 1) ("cap" 1)] - M [67,67,67,67,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1)] - Rodriguez [70,78,70,78,0,0,alpha, ("SP" 1) ("cap" 1)] - of [80,81,80,81,0,0,alpha, ("SP" 1) ("lower" 1)] - Rodriguez [83,91,83,91,0,0,alpha, ("SP" 1) ("cap" 1)] - Sons [95,98,95,98,0,0,alpha, ("SP" 1) ("cap" 1)] - Enterprises [100,110,100,110,0,0,alpha, ("SP" 1) ("cap" 1)] - managed [112,118,112,118,0,0,alpha, ("SP" 1) ("lower" 1)] - the [120,122,120,122,0,0,alpha, ("SP" 1) ("lower" 1)] - bustling [124,131,124,131,0,0,alpha, ("SP" 1) ("lower" 1)] - operations [133,142,133,142,0,0,alpha, ("SP" 1) ("lower" 1)] - at [144,145,144,145,0,0,alpha, ("SP" 1) ("lower" 1)] - _address [147,186,147,186,10,13,node, ("state" "south carolina") ("city" "Charleston") ("streetsuff" "st") ("streetname" "Broad") ("streetnum" "225")] - 225 [147,149,147,149,0,0,num, ("SP" 1) ("starting" 1) ("streetnum" 1)] - Broad [151,155,151,155,0,0,alpha, ("SP" 1) ("cap" 1) ("streetname" 1)] - St [157,158,157,158,0,0,alpha, ("SP" 1) ("cap" 1) ("usps" "st")] - Charleston [162,171,162,171,0,0,alpha, ("SP" 1) ("cap" 1) ("city" 1)] - SC [174,175,174,175,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1) ("abbrev" 1) ("state" "south carolina")] - _pincode [177,186,177,186,6,17,node] - 29401 [177,181,177,181,0,0,num, ("SP" 1) ("part1" 1) ("pin" 1)] - - [182,182,182,182,0,0,punct, ("NOSP" 1)] - 1234 [183,186,183,186,0,0,num, ("NOSP" 1) ("part2" 1)] - The [189,191,189,191,0,0,alpha, ("SP" 1) ("cap" 1)] - local [193,197,193,197,0,0,alpha, ("SP" 1) ("lower" 1)] - courier [199,205,199,205,0,0,alpha, ("SP" 1) ("lower" 1)] - well [208,211,208,211,0,0,alpha, ("SP" 1) ("lower" 1) ("usps" "wl")] - acquainted [213,222,213,222,0,0,alpha, ("NOSP" 1) ("lower" 1)] - with [224,227,224,227,0,0,alpha, ("SP" 1) ("lower" 1)] - the [229,231,229,231,0,0,alpha, ("SP" 1) ("lower" 1)] - urban [233,237,233,237,0,0,alpha, ("SP" 1) ("lower" 1)] - landscape [239,247,239,247,0,0,alpha, ("SP" 1) ("lower" 1)] - navigated [250,258,250,258,0,0,alpha, ("SP" 1) ("lower" 1)] - the [260,262,260,262,0,0,alpha, ("SP" 1) ("lower" 1)] - charming [264,271,264,271,0,0,alpha, ("SP" 1) ("lower" 1)] - streets [273,279,273,279,0,0,alpha, ("SP" 1) ("lower" 1) ("usps" "sts")] - to [281,282,281,282,0,0,alpha, ("SP" 1) ("lower" 1)] - deliver [284,290,284,290,0,0,alpha, ("SP" 1) ("lower" 1)] - packages [292,299,292,299,0,0,alpha, ("SP" 1) ("lower" 1)] - to [301,302,301,302,0,0,alpha, ("SP" 1) ("lower" 1)] - P [304,304,304,304,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1)] - O [306,306,306,306,0,0,alpha, ("NOSP" 1) ("cap" 1) ("upper" 1)] - Box [309,311,309,311,0,0,alpha, ("SP" 1) ("cap" 1)] - _address [313,343,313,343,10,13,node, ("state" "south carolina") ("city" "Greenville")] - 5678 [313,316,313,316,0,0,num, ("SP" 1) ("starting" 1)] - Greenville [319,328,319,328,0,0,alpha, ("SP" 1) ("cap" 1) ("city" 1)] - SC [331,332,331,332,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1) ("abbrev" 1) ("state" "south carolina")] - _pincode [334,343,334,343,6,17,node] - 29607 [334,338,334,338,0,0,num, ("SP" 1) ("part1" 1) ("pin" 1)] - - [339,339,339,339,0,0,punct, ("NOSP" 1)] - 8901 [340,343,340,343,0,0,num, ("NOSP" 1) ("part2" 1)] - Meanwhile [346,354,346,354,0,0,alpha, ("SP" 1) ("NL" 1) ("cap" 1)] - Summit [357,362,357,362,0,0,alpha, ("SP" 1) ("cap" 1) ("usps" "smt")] - Innovations [364,374,364,374,0,0,alpha, ("SP" 1) ("cap" 1)] - located [377,383,377,383,0,0,alpha, ("SP" 1) ("lower" 1) ("address" 1)] - at [385,386,385,386,0,0,alpha, ("SP" 1) ("lower" 1)] - _address [388,432,388,432,10,13,node, ("state" "colorado") ("city" "Boulder") ("streetsuff" "ave") ("streetname" "Mountain") ("streetnum" "789")] - 789 [388,390,388,390,0,0,num, ("SP" 1) ("starting" 1) ("streetnum" 1)] - Mountain [392,399,392,399,0,0,alpha, ("SP" 1) ("cap" 1) ("usps" "mtn") ("streetname" 1)] - View [401,404,401,404,0,0,alpha, ("SP" 1) ("cap" 1) ("usps" "vw") ("streetname" 1)] - Ave [406,408,406,408,0,0,alpha, ("SP" 1) ("cap" 1) ("usps" "ave")] - Boulder [411,417,411,417,0,0,alpha, ("SP" 1) ("cap" 1) ("city" 1)] - CO [420,421,420,421,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1) ("abbrev" 1) ("state" "colorado")] - _pincode [423,432,423,432,6,17,node] - 80302 [423,427,423,427,0,0,num, ("SP" 1) ("part1" 1) ("pin" 1)] - - [428,428,428,428,0,0,punct, ("NOSP" 1)] - 4567 [429,432,429,432,0,0,num, ("NOSP" 1) ("part2" 1)] - received [435,442,435,442,0,0,alpha, ("SP" 1) ("lower" 1)] - a [444,444,444,444,0,0,alpha, ("SP" 1) ("lower" 1)] - shipment [446,453,446,453,0,0,alpha, ("SP" 1) ("lower" 1)] - via [455,457,455,457,0,0,alpha, ("SP" 1) ("lower" 1) ("usps" "via")] - highway [459,465,459,465,0,0,alpha, ("SP" 1) ("lower" 1) ("usps" "hwy")] - contract [467,474,467,474,0,0,alpha, ("SP" 1) ("lower" 1)] - The [477,479,477,479,0,0,alpha, ("SP" 1) ("cap" 1)] - delivery [481,488,481,488,0,0,alpha, ("SP" 1) ("lower" 1)] - efficiently [491,501,491,501,0,0,alpha, ("SP" 1) ("lower" 1)] - coordinated [503,513,503,513,0,0,alpha, ("SP" 1) ("lower" 1)] - ensured [516,522,516,522,0,0,alpha, ("SP" 1) ("lower" 1)] - that [524,527,524,527,0,0,alpha, ("SP" 1) ("lower" 1)] - cutting [529,535,529,535,0,0,alpha, ("SP" 1) ("lower" 1)] - edge [537,540,537,540,0,0,alpha, ("NOSP" 1) ("lower" 1)] - technology [542,551,542,551,0,0,alpha, ("SP" 1) ("lower" 1)] - would [553,557,553,557,0,0,alpha, ("SP" 1) ("lower" 1)] - soon [559,562,559,562,0,0,alpha, ("SP" 1) ("lower" 1)] - be [564,565,564,565,0,0,alpha, ("SP" 1) ("lower" 1)] - available [567,575,567,575,0,0,alpha, ("SP" 1) ("lower" 1)] - to [577,578,577,578,0,0,alpha, ("SP" 1) ("lower" 1)] - residents [580,588,580,588,0,0,alpha, ("SP" 1) ("lower" 1)] - throughout [590,599,590,599,0,0,alpha, ("SP" 1) ("lower" 1)] - the [601,603,601,603,0,0,alpha, ("SP" 1) ("lower" 1)] - region [605,610,605,610,0,0,alpha, ("SP" 1) ("lower" 1)] - Over [613,616,613,616,0,0,alpha, ("SP" 1) ("NL" 1) ("cap" 1)] - in [618,619,618,619,0,0,alpha, ("SP" 1) ("lower" 1) ("abbrev" 1) ("state" "indiana")] - Portland [621,628,621,628,0,0,alpha, ("SP" 1) ("cap" 1)] - Oregon [631,636,631,636,0,0,alpha, ("SP" 1) ("cap" 1) ("abbrev" "OR")] - Ms [639,640,639,640,0,0,alpha, ("SP" 1) ("cap" 1) ("abbrev" 1) ("state" "mississippi")] - Emily [643,647,643,647,0,0,alpha, ("SP" 1) ("cap" 1)] - Turner [649,654,649,654,0,0,alpha, ("SP" 1) ("cap" 1)] - enjoyed [656,662,656,662,0,0,alpha, ("SP" 1) ("lower" 1)] - the [664,666,664,666,0,0,alpha, ("SP" 1) ("lower" 1)] - vibrant [668,674,668,674,0,0,alpha, ("SP" 1) ("lower" 1)] - energy [676,681,676,681,0,0,alpha, ("SP" 1) ("lower" 1)] - of [683,684,683,684,0,0,alpha, ("SP" 1) ("lower" 1)] - _address [686,722,686,722,10,13,node, ("state" "oregon") ("city" "Portland") ("streetsuff" "st") ("streetname" "Elm") ("streetnum" "45")] - 45 [686,687,686,687,0,0,num, ("SP" 1) ("starting" 1) ("streetnum" 1)] - Elm [689,691,689,691,0,0,alpha, ("SP" 1) ("cap" 1) ("streetname" 1)] - St [693,694,693,694,0,0,alpha, ("SP" 1) ("cap" 1) ("usps" "st")] - NW [696,697,696,697,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1)] - Portland [700,707,700,707,0,0,alpha, ("SP" 1) ("cap" 1) ("city" 1)] - OR [710,711,710,711,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1) ("abbrev" 1) ("state" "oregon")] - _pincode [713,722,713,722,6,17,node] - 97201 [713,717,713,717,0,0,num, ("SP" 1) ("part1" 1) ("pin" 1)] - - [718,718,718,718,0,0,punct, ("NOSP" 1)] - 6789 [719,722,719,722,0,0,num, ("NOSP" 1) ("part2" 1)] - The [725,727,725,727,0,0,alpha, ("SP" 1) ("cap" 1)] - numbered [729,736,729,736,0,0,alpha, ("SP" 1) ("lower" 1)] - street [738,743,738,743,0,0,alpha, ("SP" 1) ("lower" 1) ("usps" "st")] - nestled [746,752,746,752,0,0,alpha, ("SP" 1) ("lower" 1)] - in [754,755,754,755,0,0,alpha, ("SP" 1) ("lower" 1) ("abbrev" 1) ("state" "indiana")] - the [757,759,757,759,0,0,alpha, ("SP" 1) ("lower" 1)] - heart [761,765,761,765,0,0,alpha, ("SP" 1) ("lower" 1)] - of [767,768,767,768,0,0,alpha, ("SP" 1) ("lower" 1)] - the [770,772,770,772,0,0,alpha, ("SP" 1) ("lower" 1)] - city [774,777,774,777,0,0,alpha, ("SP" 1) ("lower" 1)] - provided [780,787,780,787,0,0,alpha, ("SP" 1) ("lower" 1)] - a [789,789,789,789,0,0,alpha, ("SP" 1) ("lower" 1)] - perfect [791,797,791,797,0,0,alpha, ("SP" 1) ("lower" 1)] - backdrop [799,806,799,806,0,0,alpha, ("SP" 1) ("lower" 1)] - for [808,810,808,810,0,0,alpha, ("SP" 1) ("lower" 1)] - the [812,814,812,814,0,0,alpha, ("SP" 1) ("lower" 1)] - creative [816,823,816,823,0,0,alpha, ("SP" 1) ("lower" 1)] - minds [825,829,825,829,0,0,alpha, ("SP" 1) ("lower" 1)] - working [831,837,831,837,0,0,alpha, ("SP" 1) ("lower" 1)] - in [839,840,839,840,0,0,alpha, ("SP" 1) ("lower" 1) ("abbrev" 1) ("state" "indiana")] - the [842,844,842,844,0,0,alpha, ("SP" 1) ("lower" 1)] - area [846,849,846,849,0,0,alpha, ("SP" 1) ("lower" 1)] - In [852,853,852,853,0,0,alpha, ("SP" 1) ("NL" 1) ("cap" 1) ("abbrev" 1) ("state" "indiana")] - the [855,857,855,857,0,0,alpha, ("SP" 1) ("lower" 1)] - suburbs [859,865,859,865,0,0,alpha, ("SP" 1) ("lower" 1)] - of [867,868,867,868,0,0,alpha, ("SP" 1) ("lower" 1)] - Chicago [870,876,870,876,0,0,alpha, ("SP" 1) ("cap" 1)] - Illinois [879,886,879,886,0,0,alpha, ("SP" 1) ("cap" 1) ("abbrev" "IL")] - Mr [889,890,889,890,0,0,alpha, ("SP" 1) ("cap" 1)] - Richard [893,899,893,899,0,0,alpha, ("SP" 1) ("cap" 1)] - Davis [901,905,901,905,0,0,alpha, ("SP" 1) ("cap" 1)] - resided [907,913,907,913,0,0,alpha, ("SP" 1) ("lower" 1) ("address" 1)] - at [915,916,915,916,0,0,alpha, ("SP" 1) ("lower" 1)] - _address [918,960,918,960,10,13,node, ("state" "illinois") ("city" "Naperville") ("streetsuff" "ln") ("streetname" "Oakwood") ("streetnum" "123")] - 123 [918,920,918,920,0,0,num, ("SP" 1) ("starting" 1) ("streetnum" 1)] - Oakwood [922,928,922,928,0,0,alpha, ("SP" 1) ("cap" 1) ("streetname" 1)] - Lane [930,933,930,933,0,0,alpha, ("SP" 1) ("cap" 1) ("usps" "ln")] - Naperville [936,945,936,945,0,0,alpha, ("SP" 1) ("cap" 1) ("city" 1)] - IL [948,949,948,949,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1) ("abbrev" 1) ("state" "illinois")] - _pincode [951,960,951,960,6,17,node] - 60540 [951,955,951,955,0,0,num, ("SP" 1) ("part1" 1) ("pin" 1)] - - [956,956,956,956,0,0,punct, ("NOSP" 1)] - 9876 [957,960,957,960,0,0,num, ("NOSP" 1) ("part2" 1)] - The [963,965,963,965,0,0,alpha, ("SP" 1) ("cap" 1)] - individual [967,976,967,976,0,0,alpha, ("SP" 1) ("lower" 1)] - s [978,978,978,978,0,0,alpha, ("NOSP" 1) ("lower" 1)] - address [980,986,980,986,0,0,alpha, ("SP" 1) ("lower" 1) ("address" 1)] - surrounded [989,998,989,998,0,0,alpha, ("SP" 1) ("lower" 1)] - by [1000,1001,1000,1001,0,0,alpha, ("SP" 1) ("lower" 1)] - greenery [1003,1010,1003,1010,0,0,alpha, ("SP" 1) ("lower" 1)] - and [1012,1014,1012,1014,0,0,alpha, ("SP" 1) ("lower" 1)] - tranquility [1016,1026,1016,1026,0,0,alpha, ("SP" 1) ("lower" 1)] - offered [1029,1035,1029,1035,0,0,alpha, ("SP" 1) ("lower" 1)] - a [1037,1037,1037,1037,0,0,alpha, ("SP" 1) ("lower" 1)] - peaceful [1039,1046,1039,1046,0,0,alpha, ("SP" 1) ("lower" 1)] - retreat [1048,1054,1048,1054,0,0,alpha, ("SP" 1) ("lower" 1)] - from [1056,1059,1056,1059,0,0,alpha, ("SP" 1) ("lower" 1)] - the [1061,1063,1061,1063,0,0,alpha, ("SP" 1) ("lower" 1)] - urban [1065,1069,1065,1069,0,0,alpha, ("SP" 1) ("lower" 1)] - hustle [1071,1076,1071,1076,0,0,alpha, ("SP" 1) ("lower" 1)] - As [1079,1080,1079,1080,0,0,alpha, ("SP" 1) ("NL" 1) ("cap" 1)] - twilight [1082,1089,1082,1089,0,0,alpha, ("SP" 1) ("lower" 1)] - approached [1091,1100,1091,1100,0,0,alpha, ("SP" 1) ("lower" 1)] - in [1102,1103,1102,1103,0,0,alpha, ("SP" 1) ("lower" 1) ("abbrev" 1) ("state" "indiana")] - the [1105,1107,1105,1107,0,0,alpha, ("SP" 1) ("lower" 1)] - Pacific [1109,1115,1109,1115,0,0,alpha, ("SP" 1) ("cap" 1)] - Northwest [1117,1125,1117,1125,0,0,alpha, ("SP" 1) ("cap" 1)] - a [1128,1128,1128,1128,0,0,alpha, ("SP" 1) ("lower" 1)] - package [1130,1136,1130,1136,0,0,alpha, ("SP" 1) ("lower" 1)] - arrived [1138,1144,1138,1144,0,0,alpha, ("SP" 1) ("lower" 1)] - at [1146,1147,1146,1147,0,0,alpha, ("SP" 1) ("lower" 1)] - _address [1149,1190,1149,1190,10,13,node, ("state" "washington") ("city" "Seattle") ("streetsuff" "st") ("streetname" "Pine") ("streetnum" "567")] - 567 [1149,1151,1149,1151,0,0,num, ("SP" 1) ("starting" 1) ("streetnum" 1)] - Pine [1153,1156,1153,1156,0,0,alpha, ("SP" 1) ("cap" 1) ("usps" "pne") ("streetname" 1)] - Street [1158,1163,1158,1163,0,0,alpha, ("SP" 1) ("cap" 1) ("usps" "st")] - SW [1165,1166,1165,1166,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1)] - Seattle [1169,1175,1169,1175,0,0,alpha, ("SP" 1) ("cap" 1) ("city" 1)] - WA [1178,1179,1178,1179,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1) ("abbrev" 1) ("state" "washington")] - _pincode [1181,1190,1181,1190,6,17,node] - 98101 [1181,1185,1181,1185,0,0,num, ("SP" 1) ("part1" 1) ("pin" 1)] - - [1186,1186,1186,1186,0,0,punct, ("NOSP" 1)] - 3456 [1187,1190,1187,1190,0,0,num, ("NOSP" 1) ("part2" 1)] - where [1193,1197,1193,1197,0,0,alpha, ("SP" 1) ("lower" 1)] - the [1199,1201,1199,1201,0,0,alpha, ("SP" 1) ("lower" 1)] - team [1203,1206,1203,1206,0,0,alpha, ("SP" 1) ("lower" 1)] - at [1208,1209,1208,1209,0,0,alpha, ("SP" 1) ("lower" 1)] - Evergreen [1211,1219,1211,1219,0,0,alpha, ("SP" 1) ("cap" 1)] - Tech [1221,1224,1221,1224,0,0,alpha, ("SP" 1) ("cap" 1)] - eagerly [1226,1232,1226,1232,0,0,alpha, ("SP" 1) ("lower" 1)] - awaited [1234,1240,1234,1240,0,0,alpha, ("SP" 1) ("lower" 1)] - a [1242,1242,1242,1242,0,0,alpha, ("SP" 1) ("lower" 1)] - shipment [1244,1251,1244,1251,0,0,alpha, ("SP" 1) ("lower" 1)] - of [1253,1254,1253,1254,0,0,alpha, ("SP" 1) ("lower" 1)] - state [1256,1260,1256,1260,0,0,alpha, ("SP" 1) ("lower" 1)] - of [1262,1263,1262,1263,0,0,alpha, ("NOSP" 1) ("lower" 1)] - the [1265,1267,1265,1267,0,0,alpha, ("NOSP" 1) ("lower" 1)] - art [1269,1271,1269,1271,0,0,alpha, ("NOSP" 1) ("lower" 1)] - devices [1273,1279,1273,1279,0,0,alpha, ("SP" 1) ("lower" 1)] - The [1282,1284,1282,1284,0,0,alpha, ("SP" 1) ("NL" 1) ("cap" 1)] - quiet [1286,1290,1286,1290,0,0,alpha, ("SP" 1) ("lower" 1)] - neighborhoods [1292,1304,1292,1304,0,0,alpha, ("SP" 1) ("lower" 1)] - of [1306,1307,1306,1307,0,0,alpha, ("SP" 1) ("lower" 1)] - Lexington [1309,1317,1309,1317,0,0,alpha, ("SP" 1) ("cap" 1)] - Kentucky [1320,1327,1320,1327,0,0,alpha, ("SP" 1) ("cap" 1) ("abbrev" "KY")] - and [1330,1332,1330,1332,0,0,alpha, ("SP" 1) ("lower" 1)] - Jackson [1334,1340,1334,1340,0,0,alpha, ("SP" 1) ("cap" 1)] - Mississippi [1343,1353,1343,1353,0,0,alpha, ("SP" 1) ("cap" 1) ("abbrev" "MS")] - showcased [1356,1364,1356,1364,0,0,alpha, ("SP" 1) ("lower" 1)] - the [1366,1368,1366,1368,0,0,alpha, ("SP" 1) ("lower" 1)] - beauty [1370,1375,1370,1375,0,0,alpha, ("SP" 1) ("lower" 1)] - of [1377,1378,1377,1378,0,0,alpha, ("SP" 1) ("lower" 1)] - simplicity [1380,1389,1380,1389,0,0,alpha, ("SP" 1) ("lower" 1)] - Residents [1392,1400,1392,1400,0,0,alpha, ("SP" 1) ("cap" 1)] - cherished [1402,1410,1402,1410,0,0,alpha, ("SP" 1) ("lower" 1)] - the [1412,1414,1412,1414,0,0,alpha, ("SP" 1) ("lower" 1)] - charm [1416,1420,1416,1420,0,0,alpha, ("SP" 1) ("lower" 1)] - of [1422,1423,1422,1423,0,0,alpha, ("SP" 1) ("lower" 1)] - _address [1425,1466,1425,1466,10,13,node, ("state" "kentucky") ("city" "Lexington") ("streetsuff" "ky") ("streetname" "Maple") ("streetnum" "789")] - 789 [1425,1427,1425,1427,0,0,num, ("SP" 1) ("starting" 1) ("streetnum" 1)] - Maple [1429,1433,1429,1433,0,0,alpha, ("SP" 1) ("cap" 1) ("streetname" 1)] - Avenue [1435,1440,1435,1440,0,0,alpha, ("SP" 1) ("cap" 1) ("usps" "ave")] - Lexington [1443,1451,1443,1451,0,0,alpha, ("SP" 1) ("cap" 1) ("city" 1) ("streetname" 1)] - KY [1454,1455,1454,1455,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1) ("abbrev" 1) ("state" "kentucky") ("usps" "ky")] - _pincode [1457,1466,1457,1466,6,17,node] - 40502 [1457,1461,1457,1461,0,0,num, ("SP" 1) ("part1" 1) ("pin" 1)] - - [1462,1462,1462,1462,0,0,punct, ("NOSP" 1)] - 2109 [1463,1466,1463,1466,0,0,num, ("NOSP" 1) ("part2" 1)] - and [1469,1471,1469,1471,0,0,alpha, ("SP" 1) ("lower" 1)] - _address [1473,1513,1473,1513,10,13,node, ("state" "mississippi") ("city" "Jackson") ("streetsuff" "blvd") ("streetname" "Magnolia") ("streetnum" "456")] - 456 [1473,1475,1473,1475,0,0,num, ("SP" 1) ("starting" 1) ("streetnum" 1)] - Magnolia [1477,1484,1477,1484,0,0,alpha, ("SP" 1) ("cap" 1) ("streetname" 1)] - Blvd [1486,1489,1486,1489,0,0,alpha, ("SP" 1) ("cap" 1) ("usps" "blvd")] - Jackson [1492,1498,1492,1498,0,0,alpha, ("SP" 1) ("cap" 1) ("city" 1)] - MS [1501,1502,1501,1502,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1) ("abbrev" 1) ("state" "mississippi")] - _pincode [1504,1513,1504,1513,6,17,node] - 39201 [1504,1508,1504,1508,0,0,num, ("SP" 1) ("part1" 1) ("pin" 1)] - - [1509,1509,1509,1509,0,0,punct, ("NOSP" 1)] - 5432 [1510,1513,1510,1513,0,0,num, ("NOSP" 1) ("part2" 1)] - knowing [1516,1522,1516,1522,0,0,alpha, ("SP" 1) ("lower" 1)] - that [1524,1527,1524,1527,0,0,alpha, ("SP" 1) ("lower" 1)] - their [1529,1533,1529,1533,0,0,alpha, ("SP" 1) ("lower" 1)] - communities [1535,1545,1535,1545,0,0,alpha, ("SP" 1) ("lower" 1)] - thrived [1547,1553,1547,1553,0,0,alpha, ("SP" 1) ("lower" 1)] - on [1555,1556,1555,1556,0,0,alpha, ("SP" 1) ("lower" 1)] - a [1558,1558,1558,1558,0,0,alpha, ("SP" 1) ("lower" 1)] - strong [1560,1565,1560,1565,0,0,alpha, ("SP" 1) ("lower" 1)] - sense [1567,1571,1567,1571,0,0,alpha, ("SP" 1) ("lower" 1)] - of [1573,1574,1573,1574,0,0,alpha, ("SP" 1) ("lower" 1)] - community [1576,1584,1576,1584,0,0,alpha, ("SP" 1) ("lower" 1)] - In [1587,1588,1587,1588,0,0,alpha, ("SP" 1) ("NL" 1) ("cap" 1) ("abbrev" 1) ("state" "indiana")] - the [1590,1592,1590,1592,0,0,alpha, ("SP" 1) ("lower" 1)] - heart [1594,1598,1594,1598,0,0,alpha, ("SP" 1) ("lower" 1)] - of [1600,1601,1600,1601,0,0,alpha, ("SP" 1) ("lower" 1)] - Puerto [1603,1608,1603,1608,0,0,alpha, ("SP" 1) ("cap" 1)] - Rico [1610,1613,1610,1613,0,0,alpha, ("SP" 1) ("cap" 1)] - the [1616,1618,1616,1618,0,0,alpha, ("SP" 1) ("lower" 1)] - urbanization [1620,1631,1620,1631,0,0,alpha, ("SP" 1) ("lower" 1)] - of [1633,1634,1633,1634,0,0,alpha, ("SP" 1) ("lower" 1)] - Vista [1636,1640,1636,1640,0,0,alpha, ("SP" 1) ("cap" 1) ("usps" "vis")] - del [1642,1644,1642,1644,0,0,alpha, ("SP" 1) ("lower" 1)] - Mar [1646,1648,1646,1648,0,0,alpha, ("SP" 1) ("cap" 1)] - adorned [1650,1656,1650,1656,0,0,alpha, ("SP" 1) ("lower" 1)] - the [1658,1660,1658,1660,0,0,alpha, ("SP" 1) ("lower" 1)] - address [1662,1668,1662,1668,0,0,alpha, ("SP" 1) ("lower" 1) ("address" 1)] - of [1670,1671,1670,1671,0,0,alpha, ("SP" 1) ("lower" 1)] - _address [1673,1716,1673,1716,10,13,node, ("streetsuff" "pr") ("streetname" "Juan")] - 987 [1673,1675,1673,1675,0,0,num, ("SP" 1) ("starting" 1)] - Calle [1677,1681,1677,1681,0,0,alpha, ("SP" 1) ("cap" 1)] - Principal [1683,1691,1683,1691,0,0,alpha, ("SP" 1) ("cap" 1)] - San [1694,1696,1694,1696,0,0,alpha, ("SP" 1) ("cap" 1) ("country" 1)] - Juan [1698,1701,1698,1701,0,0,alpha, ("SP" 1) ("cap" 1) ("streetname" 1)] - PR [1704,1705,1704,1705,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1) ("usps" "pr")] - _pincode [1707,1716,1707,1716,6,17,node] - 00921 [1707,1711,1707,1711,0,0,num, ("SP" 1) ("part1" 1) ("pin" 1)] - - [1712,1712,1712,1712,0,0,punct, ("NOSP" 1)] - 6543 [1713,1716,1713,1716,0,0,num, ("NOSP" 1) ("part2" 1)] - The [1719,1721,1719,1721,0,0,alpha, ("SP" 1) ("cap" 1)] - unique [1723,1728,1723,1728,0,0,alpha, ("SP" 1) ("lower" 1)] - blend [1730,1734,1730,1734,0,0,alpha, ("SP" 1) ("lower" 1)] - of [1736,1737,1736,1737,0,0,alpha, ("SP" 1) ("lower" 1)] - urban [1739,1743,1739,1743,0,0,alpha, ("SP" 1) ("lower" 1)] - and [1745,1747,1745,1747,0,0,alpha, ("SP" 1) ("lower" 1)] - coastal [1749,1755,1749,1755,0,0,alpha, ("SP" 1) ("lower" 1)] - elements [1757,1764,1757,1764,0,0,alpha, ("SP" 1) ("lower" 1)] - made [1766,1769,1766,1769,0,0,alpha, ("SP" 1) ("lower" 1)] - it [1771,1772,1771,1772,0,0,alpha, ("SP" 1) ("lower" 1)] - a [1774,1774,1774,1774,0,0,alpha, ("SP" 1) ("lower" 1)] - distinctive [1776,1786,1776,1786,0,0,alpha, ("SP" 1) ("lower" 1)] - location [1788,1795,1788,1795,0,0,alpha, ("SP" 1) ("lower" 1) ("address" 1)] - on [1797,1798,1797,1798,0,0,alpha, ("SP" 1) ("lower" 1)] - the [1800,1802,1800,1802,0,0,alpha, ("SP" 1) ("lower" 1)] - island [1804,1809,1804,1809,0,0,alpha, ("SP" 1) ("lower" 1) ("usps" "is")] - This [1812,1815,1812,1815,0,0,alpha, ("SP" 1) ("NL" 1) ("cap" 1)] - diverse [1817,1823,1817,1823,0,0,alpha, ("SP" 1) ("lower" 1)] - tapestry [1825,1832,1825,1832,0,0,alpha, ("SP" 1) ("lower" 1)] - of [1834,1835,1834,1835,0,0,alpha, ("SP" 1) ("lower" 1)] - addresses [1837,1845,1837,1845,0,0,alpha, ("SP" 1) ("lower" 1)] - reflects [1847,1854,1847,1854,0,0,alpha, ("SP" 1) ("lower" 1)] - the [1856,1858,1856,1858,0,0,alpha, ("SP" 1) ("lower" 1)] - richness [1860,1867,1860,1867,0,0,alpha, ("SP" 1) ("lower" 1)] - and [1869,1871,1869,1871,0,0,alpha, ("SP" 1) ("lower" 1)] - variety [1873,1879,1873,1879,0,0,alpha, ("SP" 1) ("lower" 1)] - of [1881,1882,1881,1882,0,0,alpha, ("SP" 1) ("lower" 1)] - American [1884,1891,1884,1891,0,0,alpha, ("SP" 1) ("cap" 1)] - landscapes [1893,1902,1893,1902,0,0,alpha, ("SP" 1) ("lower" 1)] - each [1905,1908,1905,1908,0,0,alpha, ("SP" 1) ("lower" 1)] - telling [1910,1916,1910,1916,0,0,alpha, ("SP" 1) ("lower" 1)] - its [1918,1920,1918,1920,0,0,alpha, ("SP" 1) ("lower" 1)] - own [1922,1924,1922,1924,0,0,alpha, ("SP" 1) ("lower" 1)] - story [1926,1930,1926,1930,0,0,alpha, ("SP" 1) ("lower" 1)] - within [1932,1937,1932,1937,0,0,alpha, ("SP" 1) ("lower" 1)] - the [1939,1941,1939,1941,0,0,alpha, ("SP" 1) ("lower" 1)] - fabric [1943,1948,1943,1948,0,0,alpha, ("SP" 1) ("lower" 1)] - of [1950,1951,1950,1951,0,0,alpha, ("SP" 1) ("lower" 1)] - the [1953,1955,1953,1955,0,0,alpha, ("SP" 1) ("lower" 1)] - nation [1957,1962,1957,1962,0,0,alpha, ("SP" 1) ("lower" 1)] diff --git a/tests/data/address-parser/text4.txt b/tests/data/address-parser/text4.txt deleted file mode 100644 index ba4b714..0000000 --- a/tests/data/address-parser/text4.txt +++ /dev/null @@ -1,6 +0,0 @@ - -In the serene countryside of Lancaster, Pennsylvania, Mr. Samuel Thompson managed his farm at RR 2 Box 123, Lancaster, PA 17601-4321. The rural route, winding through picturesque landscapes, provided a tranquil setting for the Thompson family's homestead. Meanwhile, Green Valley Farms, situated at HC 45 Box 678, Springfield, MO 65804-5678, received a shipment via highway contract. The well-maintained highway ensured that the agricultural products from the farm reached markets across the region efficiently. -As the sun dipped below the horizon in the Pacific Northwest, a package arrived at 789 Sunset Lane SW, Portland, OR 97205-3456. The team at Cascade Distributors eagerly awaited a shipment of locally sourced goods, ready to be distributed to various rural communities. -Over in Lexington, Kentucky, the rural charm extended to 456 Country Road, Lexington, KY 40503-7890, where Mrs. Elizabeth Adams lived amidst the rolling hills. The numbered streets and rural routes coexisted harmoniously, each contributing to the unique tapestry of the American landscape. -In the heart of Puerto Rico, the rural landscape of Hacienda Esperanza was adorned with the address of 123 Rural Way, Anytown, PR 00923-4567. The unique blend of rural and tropical elements made it a distinctive location on the island. -This diverse tapestry of addresses reflects the richness and variety of American landscapes, each telling its own story within the fabric of the nation. \ No newline at end of file diff --git a/tests/data/address-parser/text4.txt_log/final.tree b/tests/data/address-parser/text4.txt_log/final.tree deleted file mode 100644 index af4fe50..0000000 --- a/tests/data/address-parser/text4.txt_log/final.tree +++ /dev/null @@ -1,250 +0,0 @@ - -FINAL OUTPUT TREE: - -_ROOT [0,1458,0,1458,0,0,node,un, ("TOT LINES" 1) ("TOT TABS" 0) ("TOT LOWERS" 156) ("TOT CAPS" 56) ("TOT UPPERS" 8) ("TOT NUMS" 17)] - _BLANKLINE [0,0,0,0,5,13,node] - \n [0,0,0,0,0,0,white] - In [1,2,1,2,0,0,alpha, ("SP" 1) ("NL" 1) ("cap" 1) ("abbrev" 1) ("state" "indiana")] - the [4,6,4,6,0,0,alpha, ("SP" 1) ("lower" 1)] - serene [8,13,8,13,0,0,alpha, ("SP" 1) ("lower" 1)] - countryside [15,25,15,25,0,0,alpha, ("SP" 1) ("lower" 1)] - of [27,28,27,28,0,0,alpha, ("SP" 1) ("lower" 1)] - Lancaster [30,38,30,38,0,0,alpha, ("SP" 1) ("cap" 1)] - Pennsylvania [41,52,41,52,0,0,alpha, ("SP" 1) ("cap" 1) ("abbrev" "PA")] - Mr [55,56,55,56,0,0,alpha, ("SP" 1) ("cap" 1)] - Samuel [59,64,59,64,0,0,alpha, ("SP" 1) ("cap" 1)] - Thompson [66,73,66,73,0,0,alpha, ("SP" 1) ("cap" 1)] - managed [75,81,75,81,0,0,alpha, ("SP" 1) ("lower" 1)] - his [83,85,83,85,0,0,alpha, ("SP" 1) ("lower" 1)] - farm [87,90,87,90,0,0,alpha, ("SP" 1) ("lower" 1)] - at [92,93,92,93,0,0,alpha, ("SP" 1) ("lower" 1)] - _address [95,132,95,132,11,13,node, ("routenum" "2") ("boxnum" "123") ("type" "RuralRoute") ("state" "pennsylvania") ("city" "Lancaster") ("pincode" "17601-4321") ("con" concept:"address1")] - RR [95,96,95,96,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1) ("starting" 1)] - 2 [98,98,98,98,0,0,num, ("SP" 1)] - Box [100,102,100,102,0,0,alpha, ("SP" 1) ("cap" 1)] - 123 [104,106,104,106,0,0,num, ("SP" 1) ("starting" 1)] - Lancaster [109,117,109,117,0,0,alpha, ("SP" 1) ("cap" 1) ("city" 1)] - PA [120,121,120,121,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1) ("abbrev" 1) ("state" "pennsylvania")] - _pincode [123,132,123,132,7,17,node] - 17601 [123,127,123,127,0,0,num, ("SP" 1) ("part1" 1) ("pin" 1)] - - [128,128,128,128,0,0,punct, ("NOSP" 1)] - 4321 [129,132,129,132,0,0,num, ("NOSP" 1) ("part2" 1)] - The [135,137,135,137,0,0,alpha, ("SP" 1) ("cap" 1)] - rural [139,143,139,143,0,0,alpha, ("SP" 1) ("lower" 1)] - route [145,149,145,149,0,0,alpha, ("SP" 1) ("lower" 1) ("usps" "rte")] - winding [152,158,152,158,0,0,alpha, ("SP" 1) ("lower" 1)] - through [160,166,160,166,0,0,alpha, ("SP" 1) ("lower" 1)] - picturesque [168,178,168,178,0,0,alpha, ("SP" 1) ("lower" 1)] - landscapes [180,189,180,189,0,0,alpha, ("SP" 1) ("lower" 1)] - provided [192,199,192,199,0,0,alpha, ("SP" 1) ("lower" 1)] - a [201,201,201,201,0,0,alpha, ("SP" 1) ("lower" 1)] - tranquil [203,210,203,210,0,0,alpha, ("SP" 1) ("lower" 1)] - setting [212,218,212,218,0,0,alpha, ("SP" 1) ("lower" 1)] - for [220,222,220,222,0,0,alpha, ("SP" 1) ("lower" 1)] - the [224,226,224,226,0,0,alpha, ("SP" 1) ("lower" 1)] - Thompson [228,235,228,235,0,0,alpha, ("SP" 1) ("cap" 1)] - family [237,242,237,242,0,0,alpha, ("SP" 1) ("lower" 1)] - s [244,244,244,244,0,0,alpha, ("NOSP" 1) ("lower" 1) ("direction" 1) ("attr" "south")] - homestead [246,254,246,254,0,0,alpha, ("SP" 1) ("lower" 1)] - Meanwhile [257,265,257,265,0,0,alpha, ("SP" 1) ("cap" 1)] - Green [268,272,268,272,0,0,alpha, ("SP" 1) ("cap" 1) ("usps" "grn")] - Valley [274,279,274,279,0,0,alpha, ("SP" 1) ("cap" 1) ("usps" "vly")] - Farms [281,285,281,285,0,0,alpha, ("SP" 1) ("cap" 1)] - situated [288,295,288,295,0,0,alpha, ("SP" 1) ("lower" 1)] - at [297,298,297,298,0,0,alpha, ("SP" 1) ("lower" 1)] - _address [300,340,300,340,11,13,node, ("hcnum" "45") ("boxnum" "678") ("type" "HighwayContract") ("state" "missouri") ("city" "Springfield") ("pincode" "65804-5678") ("con" concept:"address2")] - HC [300,301,300,301,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1) ("starting" 1)] - 45 [303,304,303,304,0,0,num, ("SP" 1)] - Box [306,308,306,308,0,0,alpha, ("SP" 1) ("cap" 1)] - 678 [310,312,310,312,0,0,num, ("SP" 1) ("starting" 1)] - Springfield [315,325,315,325,0,0,alpha, ("SP" 1) ("cap" 1) ("city" 1)] - MO [328,329,328,329,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1) ("abbrev" 1) ("state" "missouri")] - _pincode [331,340,331,340,7,17,node] - 65804 [331,335,331,335,0,0,num, ("SP" 1) ("part1" 1) ("pin" 1)] - - [336,336,336,336,0,0,punct, ("NOSP" 1)] - 5678 [337,340,337,340,0,0,num, ("NOSP" 1) ("part2" 1)] - received [343,350,343,350,0,0,alpha, ("SP" 1) ("lower" 1)] - a [352,352,352,352,0,0,alpha, ("SP" 1) ("lower" 1)] - shipment [354,361,354,361,0,0,alpha, ("SP" 1) ("lower" 1)] - via [363,365,363,365,0,0,alpha, ("SP" 1) ("lower" 1) ("usps" "via")] - highway [367,373,367,373,0,0,alpha, ("SP" 1) ("lower" 1) ("usps" "hwy")] - contract [375,382,375,382,0,0,alpha, ("SP" 1) ("lower" 1)] - The [385,387,385,387,0,0,alpha, ("SP" 1) ("cap" 1)] - well [389,392,389,392,0,0,alpha, ("SP" 1) ("lower" 1) ("usps" "wl")] - maintained [394,403,394,403,0,0,alpha, ("NOSP" 1) ("lower" 1)] - highway [405,411,405,411,0,0,alpha, ("SP" 1) ("lower" 1) ("usps" "hwy")] - ensured [413,419,413,419,0,0,alpha, ("SP" 1) ("lower" 1)] - that [421,424,421,424,0,0,alpha, ("SP" 1) ("lower" 1)] - the [426,428,426,428,0,0,alpha, ("SP" 1) ("lower" 1)] - agricultural [430,441,430,441,0,0,alpha, ("SP" 1) ("lower" 1)] - products [443,450,443,450,0,0,alpha, ("SP" 1) ("lower" 1)] - from [452,455,452,455,0,0,alpha, ("SP" 1) ("lower" 1)] - the [457,459,457,459,0,0,alpha, ("SP" 1) ("lower" 1)] - farm [461,464,461,464,0,0,alpha, ("SP" 1) ("lower" 1)] - reached [466,472,466,472,0,0,alpha, ("SP" 1) ("lower" 1)] - markets [474,480,474,480,0,0,alpha, ("SP" 1) ("lower" 1)] - across [482,487,482,487,0,0,alpha, ("SP" 1) ("lower" 1)] - the [489,491,489,491,0,0,alpha, ("SP" 1) ("lower" 1)] - region [493,498,493,498,0,0,alpha, ("SP" 1) ("lower" 1)] - efficiently [500,510,500,510,0,0,alpha, ("SP" 1) ("lower" 1)] - As [513,514,513,514,0,0,alpha, ("SP" 1) ("NL" 1) ("cap" 1)] - the [516,518,516,518,0,0,alpha, ("SP" 1) ("lower" 1)] - sun [520,522,520,522,0,0,alpha, ("SP" 1) ("lower" 1)] - dipped [524,529,524,529,0,0,alpha, ("SP" 1) ("lower" 1)] - below [531,535,531,535,0,0,alpha, ("SP" 1) ("lower" 1)] - the [537,539,537,539,0,0,alpha, ("SP" 1) ("lower" 1)] - horizon [541,547,541,547,0,0,alpha, ("SP" 1) ("lower" 1)] - in [549,550,549,550,0,0,alpha, ("SP" 1) ("lower" 1) ("abbrev" 1) ("state" "indiana")] - the [552,554,552,554,0,0,alpha, ("SP" 1) ("lower" 1)] - Pacific [556,562,556,562,0,0,alpha, ("SP" 1) ("cap" 1)] - Northwest [564,572,564,572,0,0,alpha, ("SP" 1) ("cap" 1) ("direction" 1) ("attr" "northwest")] - a [575,575,575,575,0,0,alpha, ("SP" 1) ("lower" 1)] - package [577,583,577,583,0,0,alpha, ("SP" 1) ("lower" 1)] - arrived [585,591,585,591,0,0,alpha, ("SP" 1) ("lower" 1)] - at [593,594,593,594,0,0,alpha, ("SP" 1) ("lower" 1)] - _address [596,638,596,638,11,13,node, ("state" "oregon") ("city" "Portland") ("streetsuff" "ln") ("streettype" "Lane") ("streetname" "Sunset") ("streetnum" "789") ("direction" 0) ("pincode" "97205-3456") ("con" concept:"address3")] - 789 [596,598,596,598,0,0,num, ("SP" 1) ("starting" 1) ("streetnum" 1)] - Sunset [600,605,600,605,0,0,alpha, ("SP" 1) ("cap" 1) ("streetname" 1)] - Lane [607,610,607,610,0,0,alpha, ("SP" 1) ("cap" 1) ("usps" "ln")] - SW [612,613,612,613,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1) ("direction" 1) ("attr" "southwest")] - Portland [616,623,616,623,0,0,alpha, ("SP" 1) ("cap" 1) ("city" 1)] - OR [626,627,626,627,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1) ("abbrev" 1) ("state" "oregon")] - _pincode [629,638,629,638,7,17,node] - 97205 [629,633,629,633,0,0,num, ("SP" 1) ("part1" 1) ("pin" 1)] - - [634,634,634,634,0,0,punct, ("NOSP" 1)] - 3456 [635,638,635,638,0,0,num, ("NOSP" 1) ("part2" 1)] - The [641,643,641,643,0,0,alpha, ("SP" 1) ("cap" 1)] - team [645,648,645,648,0,0,alpha, ("SP" 1) ("lower" 1)] - at [650,651,650,651,0,0,alpha, ("SP" 1) ("lower" 1)] - Cascade [653,659,653,659,0,0,alpha, ("SP" 1) ("cap" 1)] - Distributors [661,672,661,672,0,0,alpha, ("SP" 1) ("cap" 1)] - eagerly [674,680,674,680,0,0,alpha, ("SP" 1) ("lower" 1)] - awaited [682,688,682,688,0,0,alpha, ("SP" 1) ("lower" 1)] - a [690,690,690,690,0,0,alpha, ("SP" 1) ("lower" 1)] - shipment [692,699,692,699,0,0,alpha, ("SP" 1) ("lower" 1)] - of [701,702,701,702,0,0,alpha, ("SP" 1) ("lower" 1)] - locally [704,710,704,710,0,0,alpha, ("SP" 1) ("lower" 1)] - sourced [712,718,712,718,0,0,alpha, ("SP" 1) ("lower" 1)] - goods [720,724,720,724,0,0,alpha, ("SP" 1) ("lower" 1)] - ready [727,731,727,731,0,0,alpha, ("SP" 1) ("lower" 1)] - to [733,734,733,734,0,0,alpha, ("SP" 1) ("lower" 1)] - be [736,737,736,737,0,0,alpha, ("SP" 1) ("lower" 1)] - distributed [739,749,739,749,0,0,alpha, ("SP" 1) ("lower" 1)] - to [751,752,751,752,0,0,alpha, ("SP" 1) ("lower" 1)] - various [754,760,754,760,0,0,alpha, ("SP" 1) ("lower" 1)] - rural [762,766,762,766,0,0,alpha, ("SP" 1) ("lower" 1)] - communities [768,778,768,778,0,0,alpha, ("SP" 1) ("lower" 1)] - Over [781,784,781,784,0,0,alpha, ("SP" 1) ("NL" 1) ("cap" 1)] - in [786,787,786,787,0,0,alpha, ("SP" 1) ("lower" 1) ("abbrev" 1) ("state" "indiana")] - Lexington [789,797,789,797,0,0,alpha, ("SP" 1) ("cap" 1)] - Kentucky [800,807,800,807,0,0,alpha, ("SP" 1) ("cap" 1) ("abbrev" "KY")] - the [810,812,810,812,0,0,alpha, ("SP" 1) ("lower" 1)] - rural [814,818,814,818,0,0,alpha, ("SP" 1) ("lower" 1)] - charm [820,824,820,824,0,0,alpha, ("SP" 1) ("lower" 1)] - extended [826,833,826,833,0,0,alpha, ("SP" 1) ("lower" 1)] - to [835,836,835,836,0,0,alpha, ("SP" 1) ("lower" 1)] - _address [838,879,838,879,11,13,node, ("state" "kentucky") ("city" "Lexington") ("streetsuff" "rd") ("streettype" "Road") ("streetname" "Country") ("streetnum" "456") ("pincode" "40503-7890") ("con" concept:"address4")] - 456 [838,840,838,840,0,0,num, ("SP" 1) ("starting" 1) ("streetnum" 1)] - Country [842,848,842,848,0,0,alpha, ("SP" 1) ("cap" 1) ("streetname" 1)] - Road [850,853,850,853,0,0,alpha, ("SP" 1) ("cap" 1) ("usps" "rd")] - Lexington [856,864,856,864,0,0,alpha, ("SP" 1) ("cap" 1) ("city" 1)] - KY [867,868,867,868,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1) ("abbrev" 1) ("state" "kentucky")] - _pincode [870,879,870,879,7,17,node] - 40503 [870,874,870,874,0,0,num, ("SP" 1) ("part1" 1) ("pin" 1)] - - [875,875,875,875,0,0,punct, ("NOSP" 1)] - 7890 [876,879,876,879,0,0,num, ("NOSP" 1) ("part2" 1)] - where [882,886,882,886,0,0,alpha, ("SP" 1) ("lower" 1)] - Mrs [888,890,888,890,0,0,alpha, ("SP" 1) ("cap" 1)] - Elizabeth [893,901,893,901,0,0,alpha, ("SP" 1) ("cap" 1)] - Adams [903,907,903,907,0,0,alpha, ("SP" 1) ("cap" 1)] - lived [909,913,909,913,0,0,alpha, ("SP" 1) ("lower" 1)] - amidst [915,920,915,920,0,0,alpha, ("SP" 1) ("lower" 1)] - the [922,924,922,924,0,0,alpha, ("SP" 1) ("lower" 1)] - rolling [926,932,926,932,0,0,alpha, ("SP" 1) ("lower" 1)] - hills [934,938,934,938,0,0,alpha, ("SP" 1) ("lower" 1) ("usps" "hls")] - The [941,943,941,943,0,0,alpha, ("SP" 1) ("cap" 1)] - numbered [945,952,945,952,0,0,alpha, ("SP" 1) ("lower" 1)] - streets [954,960,954,960,0,0,alpha, ("SP" 1) ("lower" 1) ("usps" "sts")] - and [962,964,962,964,0,0,alpha, ("SP" 1) ("lower" 1)] - rural [966,970,966,970,0,0,alpha, ("SP" 1) ("lower" 1)] - routes [972,977,972,977,0,0,alpha, ("SP" 1) ("lower" 1)] - coexisted [979,987,979,987,0,0,alpha, ("SP" 1) ("lower" 1)] - harmoniously [989,1000,989,1000,0,0,alpha, ("SP" 1) ("lower" 1)] - each [1003,1006,1003,1006,0,0,alpha, ("SP" 1) ("lower" 1)] - contributing [1008,1019,1008,1019,0,0,alpha, ("SP" 1) ("lower" 1)] - to [1021,1022,1021,1022,0,0,alpha, ("SP" 1) ("lower" 1)] - the [1024,1026,1024,1026,0,0,alpha, ("SP" 1) ("lower" 1)] - unique [1028,1033,1028,1033,0,0,alpha, ("SP" 1) ("lower" 1)] - tapestry [1035,1042,1035,1042,0,0,alpha, ("SP" 1) ("lower" 1)] - of [1044,1045,1044,1045,0,0,alpha, ("SP" 1) ("lower" 1)] - the [1047,1049,1047,1049,0,0,alpha, ("SP" 1) ("lower" 1)] - American [1051,1058,1051,1058,0,0,alpha, ("SP" 1) ("cap" 1)] - landscape [1060,1068,1060,1068,0,0,alpha, ("SP" 1) ("lower" 1)] - In [1071,1072,1071,1072,0,0,alpha, ("SP" 1) ("NL" 1) ("cap" 1) ("abbrev" 1) ("state" "indiana")] - the [1074,1076,1074,1076,0,0,alpha, ("SP" 1) ("lower" 1)] - heart [1078,1082,1078,1082,0,0,alpha, ("SP" 1) ("lower" 1)] - of [1084,1085,1084,1085,0,0,alpha, ("SP" 1) ("lower" 1)] - Puerto [1087,1092,1087,1092,0,0,alpha, ("SP" 1) ("cap" 1)] - Rico [1094,1097,1094,1097,0,0,alpha, ("SP" 1) ("cap" 1)] - the [1100,1102,1100,1102,0,0,alpha, ("SP" 1) ("lower" 1)] - rural [1104,1108,1104,1108,0,0,alpha, ("SP" 1) ("lower" 1)] - landscape [1110,1118,1110,1118,0,0,alpha, ("SP" 1) ("lower" 1)] - of [1120,1121,1120,1121,0,0,alpha, ("SP" 1) ("lower" 1)] - Hacienda [1123,1130,1123,1130,0,0,alpha, ("SP" 1) ("cap" 1)] - Esperanza [1132,1140,1132,1140,0,0,alpha, ("SP" 1) ("cap" 1)] - was [1142,1144,1142,1144,0,0,alpha, ("SP" 1) ("lower" 1)] - adorned [1146,1152,1146,1152,0,0,alpha, ("SP" 1) ("lower" 1)] - with [1154,1157,1154,1157,0,0,alpha, ("SP" 1) ("lower" 1)] - the [1159,1161,1159,1161,0,0,alpha, ("SP" 1) ("lower" 1)] - address [1163,1169,1163,1169,0,0,alpha, ("SP" 1) ("lower" 1) ("address" 1)] - of [1171,1172,1171,1172,0,0,alpha, ("SP" 1) ("lower" 1)] - _address [1174,1210,1174,1210,11,13,node, ("streetsuff" "pr") ("streettype" "PR") ("streetname" "Rural") ("streetnum" "123") ("pincode" "00923-4567") ("con" concept:"address5")] - 123 [1174,1176,1174,1176,0,0,num, ("SP" 1) ("starting" 1) ("streetnum" 1)] - Rural [1178,1182,1178,1182,0,0,alpha, ("SP" 1) ("cap" 1) ("streetname" 1)] - Way [1184,1186,1184,1186,0,0,alpha, ("SP" 1) ("cap" 1) ("usps" "way")] - Anytown [1189,1195,1189,1195,0,0,alpha, ("SP" 1) ("cap" 1) ("streetname" 1)] - PR [1198,1199,1198,1199,0,0,alpha, ("SP" 1) ("cap" 1) ("upper" 1) ("usps" "pr")] - _pincode [1201,1210,1201,1210,7,17,node] - 00923 [1201,1205,1201,1205,0,0,num, ("SP" 1) ("part1" 1) ("pin" 1)] - - [1206,1206,1206,1206,0,0,punct, ("NOSP" 1)] - 4567 [1207,1210,1207,1210,0,0,num, ("NOSP" 1) ("part2" 1)] - The [1213,1215,1213,1215,0,0,alpha, ("SP" 1) ("cap" 1)] - unique [1217,1222,1217,1222,0,0,alpha, ("SP" 1) ("lower" 1)] - blend [1224,1228,1224,1228,0,0,alpha, ("SP" 1) ("lower" 1)] - of [1230,1231,1230,1231,0,0,alpha, ("SP" 1) ("lower" 1)] - rural [1233,1237,1233,1237,0,0,alpha, ("SP" 1) ("lower" 1)] - and [1239,1241,1239,1241,0,0,alpha, ("SP" 1) ("lower" 1)] - tropical [1243,1250,1243,1250,0,0,alpha, ("SP" 1) ("lower" 1)] - elements [1252,1259,1252,1259,0,0,alpha, ("SP" 1) ("lower" 1)] - made [1261,1264,1261,1264,0,0,alpha, ("SP" 1) ("lower" 1)] - it [1266,1267,1266,1267,0,0,alpha, ("SP" 1) ("lower" 1)] - a [1269,1269,1269,1269,0,0,alpha, ("SP" 1) ("lower" 1)] - distinctive [1271,1281,1271,1281,0,0,alpha, ("SP" 1) ("lower" 1)] - location [1283,1290,1283,1290,0,0,alpha, ("SP" 1) ("lower" 1) ("address" 1)] - on [1292,1293,1292,1293,0,0,alpha, ("SP" 1) ("lower" 1)] - the [1295,1297,1295,1297,0,0,alpha, ("SP" 1) ("lower" 1)] - island [1299,1304,1299,1304,0,0,alpha, ("SP" 1) ("lower" 1) ("usps" "is")] - This [1307,1310,1307,1310,0,0,alpha, ("SP" 1) ("NL" 1) ("cap" 1)] - diverse [1312,1318,1312,1318,0,0,alpha, ("SP" 1) ("lower" 1)] - tapestry [1320,1327,1320,1327,0,0,alpha, ("SP" 1) ("lower" 1)] - of [1329,1330,1329,1330,0,0,alpha, ("SP" 1) ("lower" 1)] - addresses [1332,1340,1332,1340,0,0,alpha, ("SP" 1) ("lower" 1)] - reflects [1342,1349,1342,1349,0,0,alpha, ("SP" 1) ("lower" 1)] - the [1351,1353,1351,1353,0,0,alpha, ("SP" 1) ("lower" 1)] - richness [1355,1362,1355,1362,0,0,alpha, ("SP" 1) ("lower" 1)] - and [1364,1366,1364,1366,0,0,alpha, ("SP" 1) ("lower" 1)] - variety [1368,1374,1368,1374,0,0,alpha, ("SP" 1) ("lower" 1)] - of [1376,1377,1376,1377,0,0,alpha, ("SP" 1) ("lower" 1)] - American [1379,1386,1379,1386,0,0,alpha, ("SP" 1) ("cap" 1)] - landscapes [1388,1397,1388,1397,0,0,alpha, ("SP" 1) ("lower" 1)] - each [1400,1403,1400,1403,0,0,alpha, ("SP" 1) ("lower" 1)] - telling [1405,1411,1405,1411,0,0,alpha, ("SP" 1) ("lower" 1)] - its [1413,1415,1413,1415,0,0,alpha, ("SP" 1) ("lower" 1)] - own [1417,1419,1417,1419,0,0,alpha, ("SP" 1) ("lower" 1)] - story [1421,1425,1421,1425,0,0,alpha, ("SP" 1) ("lower" 1)] - within [1427,1432,1427,1432,0,0,alpha, ("SP" 1) ("lower" 1)] - the [1434,1436,1434,1436,0,0,alpha, ("SP" 1) ("lower" 1)] - fabric [1438,1443,1438,1443,0,0,alpha, ("SP" 1) ("lower" 1)] - of [1445,1446,1445,1446,0,0,alpha, ("SP" 1) ("lower" 1)] - the [1448,1450,1448,1450,0,0,alpha, ("SP" 1) ("lower" 1)] - nation [1452,1457,1452,1457,0,0,alpha, ("SP" 1) ("lower" 1)] diff --git a/tests/data/address-parser/text4.txt_log/output.json b/tests/data/address-parser/text4.txt_log/output.json deleted file mode 100644 index 876f216..0000000 --- a/tests/data/address-parser/text4.txt_log/output.json +++ /dev/null @@ -1,53 +0,0 @@ -{ - "addresses": [ - { - "add-id":"0", - "city":"Lancaster", - "state":"pennsylvania", - "pincode":"17601-4321", - "type":"RuralRoute", - "routenum":"2 ", - "boxnum":"123 " - }, - { - "add-id":"1", - "city":"Springfield", - "state":"missouri", - "pincode":"65804-5678", - "type":"HighwayContract", - "hcnum":"45 ", - "boxnum":"678 " - }, - { - "add-id":"2", - "streetnum":"789", - "streetname":"Sunset", - "streettype":"Lane", - "streetsuff":"ln", - "city":"Portland", - "state":"oregon", - "pincode":"97205-3456", - "type":"individual" - }, - { - "add-id":"3", - "streetnum":"456", - "streetname":"Country", - "streettype":"Road", - "streetsuff":"rd", - "city":"Lexington", - "state":"kentucky", - "pincode":"40503-7890", - "type":"individual" - }, - { - "add-id":"4", - "streetnum":"123", - "streetname":"Rural", - "streettype":"PR", - "streetsuff":"pr", - "pincode":"00923-4567", - "type":"individual" - } - ] -} \ No newline at end of file diff --git a/tests/data/emailaddress/text.txt_log/output.json b/tests/data/emailaddress/text.txt_log/output.json index 459b4f0..3631656 100644 --- a/tests/data/emailaddress/text.txt_log/output.json +++ b/tests/data/emailaddress/text.txt_log/output.json @@ -1,189 +1,189 @@ { - "email_address": [ - { - "emailadd-id":"0", - "local":"john.smith", - "domainname":"emailprovider", - "tld":"com" - }, - { - "emailadd-id":"1", - "local":"sarah_j", - "domainname":"example", - "tld":"org" - }, - { - "emailadd-id":"2", - "local":"support-team123", - "domainname":"techsupport", - "tld":"net" - }, - { - "emailadd-id":"3", - "local":"info.business", - "domainname":"company-inc", - "tld":"com" - }, - { - "emailadd-id":"4", - "local":"euhwu", - "domainname":"i3734h2ibfba", - "tld":"com" - }, - { - "emailadd-id":"5", - "local":"alice.smith", - "domainname":"dept.departmentname.university", - "country":"UnitedKingdom" - }, - { - "emailadd-id":"6", - "local":"support.team", - "domainname":"my_company-123", - "tld":"support" - }, - { - "emailadd-id":"7", - "local":"custom.name", - "domainname":"example-email", - "tld":"net" - }, - { - "emailadd-id":"8", - "local":"user_name", - "domainname":"subdomain", - "tld":"co", - "cd":"uk", - "country":"UnitedKingdom" - }, - { - "emailadd-id":"9", - "local":"kenethfpp", - "domainname":"mails", - "tld":"yahoo", - "cd":"uk", - "country":"UnitedKingdom" - }, - { - "emailadd-id":"10", - "local":"charitysc123", - "domainname":"243", - "tld":"org" - }, - { - "emailadd-id":"11", - "local":"john.doe", - "domainname":"example", - "tld":"com" - }, - { - "emailadd-id":"12", - "local":"mary.smith123", - "domainname":"gmail", - "tld":"com" - }, - { - "emailadd-id":"13", - "local":"jane_doe", - "domainname":"outlook", - "tld":"co", - "cd":"uk", - "country":"UnitedKingdom" - }, - { - "emailadd-id":"14", - "local":"steve-jones", - "domainname":"yahoo", - "tld":"com" - }, - { - "emailadd-id":"15", - "local":"jennifer.smith+work", - "domainname":"emaildomain", - "tld":"net" - }, - { - "emailadd-id":"16", - "local":"contact.us", - "domainname":"my-website", - "tld":"org" - }, - { - "emailadd-id":"17", - "local":"user1234", - "domainname":"subdomain.example.com" - }, - { - "emailadd-id":"18", - "local":"support_team", - "domainname":"company-inc", - "tld":"com" - }, - { - "emailadd-id":"19", - "local":"info", - "domainname":"mycompany", - "tld":"net" - }, - { - "emailadd-id":"20", - "local":"sales", - "domainname":"e-commerce-site", - "tld":"org" - }, - { - "emailadd-id":"21", - "local":"custom.er123", - "domainname":"domain-name", - "tld":"com" - }, - { - "emailadd-id":"22", - "local":"webmaster", - "domainname":"emailserver", - "tld":"biz" - }, - { - "emailadd-id":"23", - "local":"no_reply", - "domainname":"noreply-domain", - "tld":"info" - }, - { - "emailadd-id":"24", - "local":"admin", - "domainname":"server123", - "tld":"net" - }, - { - "emailadd-id":"25", - "local":"test.email", - "domainname":"emailprovider", - "tld":"co" - }, - { - "emailadd-id":"26", - "local":"marketing.department", - "domainname":"email", - "tld":"org" - }, - { - "emailadd-id":"27", - "local":"me", - "domainname":"myname", - "tld":"me" - }, - { - "emailadd-id":"28", - "local":"contact-sales", - "domainname":"email-service", - "tld":"io" - }, - { - "emailadd-id":"29", - "local":"support", - "domainname":"customer-service", - "tld":"pro" - } + "email_address": [ + { + "emailadd-id": "0", + "local": "john.smith", + "domainname": "emailprovider", + "tld": "com" + }, + { + "emailadd-id": "1", + "local": "sarah_j", + "domainname": "example", + "tld": "org" + }, + { + "emailadd-id": "2", + "local": "support-team123", + "domainname": "techsupport", + "tld": "net" + }, + { + "emailadd-id": "3", + "local": "info.business", + "domainname": "company-inc", + "tld": "com" + }, + { + "emailadd-id": "4", + "local": "euhwu", + "domainname": "i3734h2ibfba", + "tld": "com" + }, + { + "emailadd-id": "5", + "local": "alice.smith", + "domainname": "dept.departmentname.university", + "country": "UnitedKingdom" + }, + { + "emailadd-id": "6", + "local": "support.team", + "domainname": "my_company-123", + "tld": "support" + }, + { + "emailadd-id": "7", + "local": "custom.name", + "domainname": "example-email", + "tld": "net" + }, + { + "emailadd-id": "8", + "local": "user_name", + "domainname": "subdomain", + "tld": "co", + "cd": "uk", + "country": "UnitedKingdom" + }, + { + "emailadd-id": "9", + "local": "kenethfpp", + "domainname": "mails", + "tld": "yahoo", + "cd": "uk", + "country": "UnitedKingdom" + }, + { + "emailadd-id": "10", + "local": "charitysc123", + "domainname": "243", + "tld": "org" + }, + { + "emailadd-id": "11", + "local": "john.doe", + "domainname": "example", + "tld": "com" + }, + { + "emailadd-id": "12", + "local": "mary.smith123", + "domainname": "gmail", + "tld": "com" + }, + { + "emailadd-id": "13", + "local": "jane_doe", + "domainname": "outlook", + "tld": "co", + "cd": "uk", + "country": "UnitedKingdom" + }, + { + "emailadd-id": "14", + "local": "steve-jones", + "domainname": "yahoo", + "tld": "com" + }, + { + "emailadd-id": "15", + "local": "jennifer.smith+work", + "domainname": "emaildomain", + "tld": "net" + }, + { + "emailadd-id": "16", + "local": "contact.us", + "domainname": "my-website", + "tld": "org" + }, + { + "emailadd-id": "17", + "local": "user1234", + "domainname": "subdomain.example.com" + }, + { + "emailadd-id": "18", + "local": "support_team", + "domainname": "company-inc", + "tld": "com" + }, + { + "emailadd-id": "19", + "local": "info", + "domainname": "mycompany", + "tld": "net" + }, + { + "emailadd-id": "20", + "local": "sales", + "domainname": "e-commerce-site", + "tld": "org" + }, + { + "emailadd-id": "21", + "local": "custom.er123", + "domainname": "domain-name", + "tld": "com" + }, + { + "emailadd-id": "22", + "local": "webmaster", + "domainname": "emailserver", + "tld": "biz" + }, + { + "emailadd-id": "23", + "local": "no_reply", + "domainname": "noreply-domain", + "tld": "info" + }, + { + "emailadd-id": "24", + "local": "admin", + "domainname": "server123", + "tld": "net" + }, + { + "emailadd-id": "25", + "local": "test.email", + "domainname": "emailprovider", + "tld": "co" + }, + { + "emailadd-id": "26", + "local": "marketing.department", + "domainname": "email", + "tld": "org" + }, + { + "emailadd-id": "27", + "local": "me", + "domainname": "myname", + "tld": "me" + }, + { + "emailadd-id": "28", + "local": "contact-sales", + "domainname": "email-service", + "tld": "io" + }, + { + "emailadd-id": "29", + "local": "support", + "domainname": "customer-service", + "tld": "pro" + } ] -} +} \ No newline at end of file diff --git a/tests/data/hello.xml b/tests/data/out.xml similarity index 100% rename from tests/data/hello.xml rename to tests/data/out.xml diff --git a/tests/test_engine.py b/tests/test_engine.py index b4f51aa..c0570e0 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -13,6 +13,7 @@ import NLPPlus DATADIR = Path(__file__).parent / "data" +ANADATADIR = Path(__file__).parent.parent / "NLPPlus" / "data" def read_file(path): @@ -27,14 +28,15 @@ class ModuleTest(TestCase): def test_simple(self): """Test the simplest possible usage with parse-en-us.""" - xml = NLPPlus.analyze("Hello world.") - hello = read_file(DATADIR / "hello.xml") + xml = NLPPlus.analyze("Hello world!") + hello = read_file(DATADIR / "out.xml") self.assertEqual(xml, hello) def test_working_dir(self): """Test that set_working_folder works.""" tmpdir = TemporaryDirectory(prefix="test-nlpplus") copytree(DATADIR.parent / "analyzers", Path(tmpdir.name) / "analyzers") + copytree(ANADATADIR, Path(tmpdir.name) / "data") NLPPlus.set_working_folder(tmpdir.name) text = read_file(DATADIR / "basic" / "text.txt") results = NLPPlus.engine.analyze(text, "basic") @@ -61,9 +63,9 @@ def test_address_parser(self): """Run the address parser and verify that it works.""" self._run_analyzer("address-parser") - def test_emailaddress_parser(self): - """Run the emailaddress analyzer and verify that it works.""" - self._run_analyzer("emailaddress") + # def test_emailaddress_parser(self): + # """Run the emailaddress analyzer and verify that it works.""" + # self._run_analyzer("emailaddress") def test_telephone_parser(self): """Run the telephone analyzer and verify that it works."""