11using System . Text . RegularExpressions ;
2+ using System . Web ;
23using HtmlAgilityPack ;
34using Microsoft . Extensions . Logging ;
45
89
910namespace RealEstatesWatcher . AdsPortals . BezrealitkyCz ;
1011
11- public class BezrealitkyCzAdsPortal ( string watchedUrl ,
12- IWebScraper webScraper ,
13- ILogger < BezrealitkyCzAdsPortal > ? logger = null ) : RealEstateAdsPortalBase ( watchedUrl , webScraper , logger )
12+ public partial class BezrealitkyCzAdsPortal ( string watchedUrl ,
13+ IWebScraper webScraper ,
14+ ILogger < BezrealitkyCzAdsPortal > ? logger = null ) : RealEstateAdsPortalBase ( watchedUrl , webScraper , logger )
1415{
16+ [ GeneratedRegex ( "url=(.+?)&" ) ]
17+ private static partial Regex ImageUrlRegex ( ) ;
18+
1519 public override string Name => "Bezrealitky.cz" ;
1620
17- protected override string GetPathToAdsElements ( ) => "//article [contains(@class,\" product \" )] " ;
21+ protected override string GetPathToAdsElements ( ) => "(//section [contains(@class,'box')])[last()]/article " ;
1822
1923 protected override RealEstateAdPost ParseRealEstateAdPost ( HtmlNode node ) => new ( )
2024 {
2125 AdsPortalName = Name ,
2226 Title = ParseTitle ( node ) ,
23- Text = string . Empty ,
27+ Text = ParseText ( node ) ,
2428 Price = ParsePrice ( node ) ,
2529 Currency = Currency . CZK ,
2630 Layout = ParseLayout ( node ) ,
2731 Address = ParseAddress ( node ) ,
2832 WebUrl = ParseWebUrl ( node ) ,
2933 AdditionalFees = ParseAdditionalFees ( node ) ,
3034 FloorArea = ParseFloorArea ( node ) ,
31- ImageUrl = ParseImageUrl ( node , RootHost )
35+ ImageUrl = ParseImageUrl ( node )
3236 } ;
3337
34- private static string ParseTitle ( HtmlNode node ) => node . SelectSingleNode ( ".//p[@class=\" product__note\" ]" ) . InnerText ;
38+ private static string ParseTitle ( HtmlNode node )
39+ {
40+ var label = node . SelectSingleNode ( ".//span[contains(@class,'propertyCardLabel')]" ) . InnerText ;
41+ var address = node . SelectSingleNode ( ".//span[contains(@class,'propertyCardAddress')]" ) . InnerText ;
42+
43+ return $ "{ label } { address } ";
44+ }
45+
46+ private static string ParseText ( HtmlNode node ) => node . SelectSingleNode ( ".//div[contains(@class,'propertyCardContent')]//p" ) . InnerText ;
47+
48+ private static string ParseAddress ( HtmlNode node ) => node . SelectSingleNode ( ".//span[contains(@class,'propertyCardAddress')]" ) . InnerText ;
49+
50+ private static Uri ParseWebUrl ( HtmlNode node ) => new ( node . SelectSingleNode ( ".//h2[contains(@class,'propertyCardHeadline')]//a" ) . GetAttributeValue ( "href" , string . Empty ) ) ;
3551
3652 private static decimal ParsePrice ( HtmlNode node )
3753 {
38- var value = node . SelectSingleNode ( ".//strong[ @class= \" product__value \" ]" ) ? . InnerText ;
39- if ( value == null )
54+ var value = node . SelectSingleNode ( ".//span[contains( @class,'propertyPriceAmount') ]" ) ? . InnerText ;
55+ if ( value is null )
4056 return decimal . Zero ;
4157
42- if ( value . Contains ( '+' ) )
43- value = value . Split ( '+' ) [ 0 ] ; // get first value as primary
44-
4558 value = RegexMatchers . AllNonNumberValues ( ) . Replace ( value , string . Empty ) ;
4659
4760 return decimal . TryParse ( value , out var price )
@@ -70,7 +83,11 @@ private static decimal ParseAdditionalFees(HtmlNode node)
7083
7184 private static Layout ParseLayout ( HtmlNode node )
7285 {
73- var value = node . SelectSingleNode ( ".//p[@class=\" product__note\" ]" ) . InnerText ;
86+ var values = node . SelectNodes ( ".//li[contains(@class,'featuresListItem')]" ) ;
87+ if ( values . Count != 2 )
88+ return decimal . Zero ;
89+
90+ var value = HttpUtility . HtmlDecode ( values [ 0 ] . InnerText ) ;
7491
7592 var result = RegexMatchers . Layout ( ) . Match ( value ) ;
7693 if ( ! result . Success )
@@ -82,13 +99,13 @@ private static Layout ParseLayout(HtmlNode node)
8299 return LayoutExtensions . ToLayout ( layoutValue ) ;
83100 }
84101
85- private static string ParseAddress ( HtmlNode node ) => node . SelectSingleNode ( ".//a[contains(@class,\" product__link\" )]/strong" ) . InnerText ;
86-
87- private static Uri ParseWebUrl ( HtmlNode node ) => new ( node . SelectSingleNode ( ".//a[contains(@class,\" product__link\" )]" ) . GetAttributeValue ( "href" , string . Empty ) ) ;
88-
89102 private static decimal ParseFloorArea ( HtmlNode node )
90103 {
91- var value = ParseTitle ( node ) ;
104+ var values = node . SelectNodes ( ".//li[contains(@class,'featuresListItem')]" ) ;
105+ if ( values . Count != 2 )
106+ return decimal . Zero ;
107+
108+ var value = HttpUtility . HtmlDecode ( values [ ^ 1 ] . InnerText ) ;
92109
93110 var result = RegexMatchers . FloorArea ( ) . Match ( value ) ;
94111 if ( ! result . Success )
@@ -101,14 +118,20 @@ private static decimal ParseFloorArea(HtmlNode node)
101118 : decimal . Zero ;
102119 }
103120
104- private static Uri ? ParseImageUrl ( HtmlNode node , string hostUrlPart )
121+ private static Uri ? ParseImageUrl ( HtmlNode node )
105122 {
106- var path = node . SelectSingleNode ( ".//div[@class=\" slick-list\" ]//img" ) ? . GetAttributeValue ( "src" , null ) ;
107- if ( path is null )
108- return default ;
109-
110- return path . Contains ( hostUrlPart )
111- ? new Uri ( path )
112- : new Uri ( hostUrlPart + path ) ;
123+ var values = HttpUtility . UrlDecode ( node . SelectSingleNode ( ".//span[contains(@class,'image')]//img" ) ?
124+ . GetAttributeValue ( "srcset" , null ) ) ;
125+
126+ if ( values is null )
127+ return null ;
128+
129+ var result = ImageUrlRegex ( ) . Match ( values ) ;
130+ if ( ! result . Success )
131+ return null ;
132+
133+ var imageUrl = result . Groups [ 1 ] . Value ;
134+
135+ return new Uri ( imageUrl ) ;
113136 }
114137}
0 commit comments