AnsweredAssumed Answered

Parsing a web page

Question asked by michaelkirwan on Apr 14, 2018
Latest reply on Apr 15, 2018 by fmpdude

Hi All

Hope you can help me.. I have spent hours on this and can't seem to get it to work consistently so I'm turning to you experts!!

Below is a webpage I'm trying to scrape. I am only interested in scraping the information in RED below. Each piece of information can vary in length. Can anyone help me parse it?   I was trying to use regular expressions using the position and middle functions but failed miserably...     Please help..............!!

 

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">

<html xmlns="http://www.w3.org/1999/xhtml" >

<head><title>

An Post - Track &amp; Trace

</title><link href="include/css/pci_recaptcha.css" rel="stylesheet" type="text/css" media="all" /><link href="include/css/MasterPage.css" rel="stylesheet" type="text/css" media="all" /><link href="include/css/Controls.css" rel="stylesheet" type="text/css" media="all" />

 

<script type="text/javascript" src="include/scripts/jquery-1.7.2.min.js"></script>

<script src="include/scripts/jquery-ui-1.8.20.min.js" type="text/javascript"></script>

<link href="include/themes/anPost/jquery-ui-1.8.21.custom.css" rel="stylesheet" type="text/css" />

<script src="include/scripts/TTWebsite.js" type="text/javascript"></script>

<!-- Google Tag Manager -->

<script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':

new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],

j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src=

'https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);

})(window,document,'script','dataLayer','GTM-5XKMBJ');</script>

<!-- End Google Tag Manager -->

 

 

<!-- Google Tag Manager (noscript) -->

<noscript><iframe src="https://www.googletagmanager.com/ns.html?id=GTM-5XKMBJ"

height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript>

<!-- End Google Tag Manager (noscript) -->

<noscript>

  <meta http-equiv="refresh" content="0;url=NoScript.aspx" />

</noscript>

<script type="text/javascript">

  function OnInit() {

   InitForm("ResultList", "MainContent_RequestDelivery", null);

 

   if ($("input[type='checkbox']").length == 0) {

    var btn = $("#MainContent_RequestDelivery");

    btn.css("display", "none");

    btn.after("<br/>");

   }

  }

  function OnValidate() {

   var nSel = $("#ResultList input:checked").length;

   if (nSel > 10) {

    $("#errorMsg").html('<p>A maximum of 10 items can be selected for multiple Factsback.</p><p>Please uncheck a few items and try again...</p>');

    return false;

   }

   if (nSel < 1) {

    $("#errorMsg").html('<p>Please check delivery box item(s) and try again...</p>');

    return false;

   }

   return true;

  }

 

</script>

 

<script type="text/javascript">

  var scgHost = (("https:" == document.location.protocol) ? "https://cookiepolicy.anpost.ie" : "http://cookiepolicy.anpost.ie");

  document.write(unescape("%3Cscript src='" + scgHost + "/include/script' type='text/javascript'%3E%3C/script%3E"));

</script>

</head>

<body>

 

<div id="topBanner"></div>

<div id="topwrapper">

  <div id="top">

   <a href="#"><img class="logo" title="An Post" alt="An Post" src="include/images/logo.gif" /></a>

  </div>

</div>

<div id="navwrapper">

  <div id="nav"></div>

</div>

<div id="wrapper">

  <div id="body">

   <div id="breadcrum">

    <p>

     You are here: <a href="http://www.anpost.ie/AnPost/">Home</a> / Track Your Item

    </p>

      </div>

    

<div id="nonav">

  <div class="box-top-nonav">

   <img width="750" height="39" alt="Sitemap" src="include/images/box-header-trackyouritemwid.gif" />

   <div class="box-top-nonav-content">

    <form method="post" action="./TrackingResults.aspx?rtt=0&amp;site=website&amp;trackcode=rl694835714ie" id="ResultList">

<input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="/wEPDwUKMTIzOTEwNzA0NA9kFgJmD2QWBGYPZBYCAgcPFgIeBFRleHQFoQU8IS0tIEdvb2dsZSBUYWcgTWFuYWdlciAtLT4NCjxzY3JpcHQ+KGZ1bmN0aW9uKHcsZCxzLGwsaSl7d1tsXT13W2xdfHxbXTt3W2xdLnB1c2goeydndG0uc3RhcnQnOg0KbmV3IERhdGUoKS5nZXRUaW1lKCksZXZlbnQ6J2d0bS5qcyd9KTt2YXIgZj1kLmdldEVsZW1lbnRzQnlUYWdOYW1lKHMpWzBdLA0Kaj1kLmNyZWF0ZUVsZW1lbnQocyksZGw9bCE9J2RhdGFMYXllcic/JyZsPScrbDonJztqLmFzeW5jPXRydWU7ai5zcmM9DQonaHR0cHM6Ly93d3cuZ29vZ2xldGFnbWFuYWdlci5jb20vZ3RtLmpzP2lkPScraStkbDtmLnBhcmVudE5vZGUuaW5zZXJ0QmVmb3JlKGosZik7DQp9KSh3aW5kb3csZG9jdW1lbnQsJ3NjcmlwdCcsJ2RhdGFMYXllcicsJ0dUTS01WEtNQkonKTs8L3NjcmlwdD4NCjwhLS0gRW5kIEdvb2dsZSBUYWcgTWFuYWdlciAtLT4NCg0KDQo8IS0tIEdvb2dsZSBUYWcgTWFuYWdlciAobm9zY3JpcHQpIC0tPg0KPG5vc2NyaXB0PjxpZnJhbWUgc3JjPSJodHRwczovL3d3dy5nb29nbGV0YWdtYW5hZ2VyLmNvbS9ucy5odG1sP2lkPUdUTS01WEtNQkoiDQpoZWlnaHQ9IjAiIHdpZHRoPSIwIiBzdHlsZT0iZGlzcGxheTpub25lO3Zpc2liaWxpdHk6aGlkZGVuIj48L2lmcmFtZT48L25vc2NyaXB0Pg0KPCEtLSBFbmQgR29vZ2xlIFRhZyBNYW5hZ2VyIChub3NjcmlwdCkgLS0+ZAIBD2QWAgIBD2QWBAIDDzwrABECAA8WBB4LXyFEYXRhQm91bmRnHgtfIUl0ZW1Db3VudAIBZAEQFgAWABYAFgJmD2QWBAIBD2QWDmYPZBYCZg8PFgQfAAUlUkwmbmJzcDs2OTQmbmJzcDs4MzUmbmJzcDs3MTQmbmJzcDtJRR4LTmF2aWdhdGVVcmwFK1RyYWNrU3RhdHVzLmFzcHg/aXRlbT1STDY5NDgzNTcxNElFJnNlbmRlcj1kZAIBDw8WAh8ABQYmbmJzcDtkZAICDw8WAh8ABQlERUxJVkVSRURkZAIDD2QWAmYPFQEhTElNRVJJQ0sgRC5TLlUuPGJyIC8+IENPIExJTUVSSUNLZAIEDw8WAh8ABRMxMS1BcHItMTg8YnIvPjExOjAwZGQCBQ8PFgIfAAUDTUJSZGQCBg8PFgIfAAUHSVJFTEFORGRkAgIPDxYCHgdWaXNpYmxlaGRkAgcPDxYCHwRoZBYCAgEPDxYCHwMFE34vRGVsaXZlcnlDb25mLmFzcHhkZBgCBR5fX0NvbnRyb2xzUmVxdWlyZVBvc3RCYWNrS2V5X18WAQUiY3RsMDAkTWFpbkNvbnRlbnQkR3JpZCRjdGwwMiRjdGwwMQUWY3RsMDAkTWFpbkNvbnRlbnQkR3JpZA88KwAMAQgCAWTflefKf1oSGYdiiB5RpAyB3+i+Og==" />

 

<input type="hidden" name="__VIEWSTATEGENERATOR" id="__VIEWSTATEGENERATOR" value="DF5F4837" />

<input type="hidden" name="__EVENTVALIDATION" id="__EVENTVALIDATION" value="/wEWAwKQ19PkCwKptZXABQLOpsPGCkImBfMOa1og2Jbix+rpEaM2p4tH" /> 

                <h2>Tracking Results</h2>

  

    <p>

     The following are the results(s) of the item(s) for which we presently have an electronic file.

     For a full history of the item please click on the item number.

    </p>

    <br/>

    <p>

     If you require a delivery record, please select the item(s), placing a tick in the Delivery Record Check Box,

     and click on Request Delivery Record.

    </p>

    <br/>

    <p>

   

     <div>

<table class="resultView" cellspacing="0" rules="all" border="1" id="MainContent_Grid">

  <tr class="resultHeader">

   <th scope="col">An Post No.</th><th scope="col">Sender No.</th><th scope="col">Status</th><th scope="col">From</th><th scope="col">Date</th><th scope="col">Receiver Name</th><th scope="col">Country of Origin</th><th scope="col">Delivery Record?</th>

  </tr><tr class="resultRow">

   <td width="16%"><a href="TrackStatus.aspx?item=RL694835714IE&amp;sender=">RL 694 835 714 IE</a></td><td align="center" width="16%"> </td><td width="18%">DELIVERED</td><td align="center" width="18%">

                            LIMERICK D.S.U.<br /> CO LIMERICK

                            </td><td align="center" width="11%">11-Apr-18<br/>11:00</td><td align="center" width="14%">MBR</td><td align="center" width="12%">IRELAND</td><td align="center">

         <span class="rdCheck" ItemStyle-Width="7%"><input id="MainContent_Grid_ctl01_0" type="checkbox" name="ctl00$MainContent$Grid$ctl02$ctl01" /></span>                                

        </td>

  </tr>

</table>

</div>

    </p>

    <div style="text-align:right;margin:10px 10px 10px 0;">

 

  

     <a id="MainContent_RequestDelivery" class="button" NavigateUrl="~/" href="javascript:__doPostBack(&#39;ctl00$MainContent$RequestDelivery&#39;,&#39;&#39;)">

      Request Delivery Record

     </a>

 

    </div>

 

  

              

 

              

    <br/>

    <p>

    Please note that Track &amp; Trace records are available on our website for 16 weeks from the date of despatch.

    </p>

    <br/>

    <p>

            If the item history is incomplete or is not presently displayed, please contact our Customer Service section

            at <a href="mailto:customer.services@anpost.ie">customer.services@anpost.ie</a> or on 01-705 7600 between

      9 a.m. and 5:30 p.m. Monday to Friday. Customers calling from outside the Republic of Ireland should dial

      +353-1-705 7600. Delivery records are only available for items delivered within the Republic of Ireland.        

    </p>

    <br/>

    <p>

     For <a href="http://www.anpost.ie/AnPost/MainContent/Business+Customers/Sending+Mail/Speed/Same+day+delivery">Same Day delivery</a> items only,

     please call 01-8550000 between 8am and 6pm. The Same Day service relates to deliveries in specific Dublin areas only.

    </p>

                <br />

                <p>

                    Information provided by An Post through the An Post track and trace system is solely provided and may only be used to confirm delivery of a particular postal packet

                    which has been conveyed to An Post for delivery. It is an offence under Section 53 of the Communications Regulation (Postal Services) Act 2011 for any person to use for

                    any purpose any other information obtained from a postal packet.

                </p>

    <br/>

    <p>

     <a class="button" href="TrackList.aspx">Back to Track &amp; Trace</a>

    </p>

    </form>

    </div>

  </div>

</div>

 

      <div id="rightcol">

      

<div id="ToolsPageLinks_ControlPanel">

 

</div>

 

      </div> 

 

   <div class="clear"> </div>

 

  </div>

</div>

  <div id="bottomnavwrapper">

    <div id="bottomnav">

    </div>

  </div>

  <div id="footer">

    <img class="right" title="An Post" height="23" alt="An Post" src="include/images/logo-small.gif" width="57" />

      <ul>

        <li>All content &copy; <b>An Post 2018</b></li>

        <li><a href="http://www.anpost.ie/AnPost/Web+Policy.htm" title="Web Policy">Web Policy</a></li>

    <li><a href="http://www.anpost.ie/AnPost/Web+Policy.htm#cookie" title="Cookies">Cookies</a></li>

        <li><a href="http://www.anpost.ie/AnPost/Postal+Terms+and+Conditions.htm" title="Postal Terms &amp; Conditions">Postal Terms &amp; Conditions</a></li>

    <br/>

        <li>Site by: <a href="http://www.anpost.ie" target="_blank">An Post, Group Technology Solutions</a></li>

     </ul>

  </div>

 

<div class="validationError">

  <br/>

  <p id="errorMsg"></p>

</div>

 

</body>

</html>

Outcomes