Skip to content

Content of file DefaultDownloader.java.html

<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html xmlns="http://www.w3.org/1999/xhtml" lang="en"><head><meta http-equiv="Content-Type" content="text/html;charset=UTF-8"/><link rel="stylesheet" href="../../jacoco-resources/report.css" type="text/css"/><link rel="shortcut icon" href="../../jacoco-resources/report.gif" type="image/gif"/><title>DefaultDownloader.java</title><link rel="stylesheet" href="../../jacoco-resources/prettify.css" type="text/css"/><script type="text/javascript" src="../../jacoco-resources/prettify.js"></script></head><body onload="window['PR_TAB_WIDTH']=4;prettyPrint()"><div class="breadcrumb" id="breadcrumb"><span class="info"><a href="../../jacoco-sessions.html" class="el_session">Sessions</a></span><a href="../../index.html" class="el_report">blueMarine II :: Headless Service</a> &gt; <a href="../index.html" class="el_bundle">it-tidalwave-bluemarine2-downloader</a> &gt; <a href="index.source.html" class="el_package">it.tidalwave.bluemarine2.downloader.impl</a> &gt; <span class="el_source">DefaultDownloader.java</span></div><h1>DefaultDownloader.java</h1><pre class="source lang-java linenums"><span class="nc" id="L1">/*</span>
 * *********************************************************************************************************************
 *
 * blueMarine II: Semantic Media Centre
 * http://tidalwave.it/projects/bluemarine2
 *
 * Copyright (C) 2015 - 2021 by Tidalwave s.a.s. (http://tidalwave.it)
 *
 * *********************************************************************************************************************
 *
 * Licensed under the Apache License, Version 2.0 (the &quot;License&quot;); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an &quot;AS IS&quot; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations under the License.
 *
 * *********************************************************************************************************************
 *
 * git clone https://bitbucket.org/tidalwave/bluemarine2-src
 * git clone https://github.com/tidalwave-it/bluemarine2-src
 *
 * *********************************************************************************************************************
 */
package it.tidalwave.bluemarine2.downloader.impl;

import javax.annotation.Nonnull;
import javax.annotation.PostConstruct;
import javax.inject.Inject;
import java.util.Date;
import java.util.List;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.URISyntaxException;
import java.net.URL;
import org.apache.http.HttpRequest;
import org.apache.http.HttpResponse;
import org.apache.http.HttpResponseInterceptor;
import org.apache.http.ProtocolException;
import org.apache.http.client.RedirectStrategy;
import org.apache.http.client.cache.CacheResponseStatus;
import org.apache.http.client.cache.HttpCacheContext;
import org.apache.http.client.cache.HttpCacheEntry;
import org.apache.http.client.cache.Resource;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.cache.CacheConfig;
import org.apache.http.impl.client.cache.CachingHttpClients;
import org.apache.http.impl.client.cache.HeapResource;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.message.BasicHeader;
import org.apache.http.protocol.HttpContext;
import it.tidalwave.util.NotFoundException;
import it.tidalwave.util.annotation.VisibleForTesting;
import it.tidalwave.messagebus.MessageBus;
import it.tidalwave.messagebus.annotation.ListensTo;
import it.tidalwave.messagebus.annotation.SimpleMessageSubscriber;
import it.tidalwave.bluemarine2.message.PowerOnNotification;
import it.tidalwave.bluemarine2.downloader.DownloadComplete;
import it.tidalwave.bluemarine2.downloader.DownloadComplete.Origin;
import it.tidalwave.bluemarine2.downloader.DownloadRequest;
import lombok.Cleanup;
import lombok.extern.slf4j.Slf4j;
import static it.tidalwave.bluemarine2.downloader.DownloaderPropertyNames.CACHE_FOLDER_PATH;

/***********************************************************************************************************************
 *
 * @author  Fabrizio Giudici
 *
 **********************************************************************************************************************/
<span class="nc" id="L75">@SimpleMessageSubscriber @Slf4j</span>
<span class="nc" id="L76">public class DefaultDownloader</span>
  {
    @Inject
    private MessageBus messageBus;

    @Inject
    private SimpleHttpCacheStorage cacheStorage;

    private PoolingHttpClientConnectionManager connectionManager;

    private CacheConfig cacheConfig;

    private CloseableHttpClient httpClient;

<span class="nc" id="L90">private final HttpResponseInterceptor killCacheHeaders = (HttpResponse</span>
 response, HttpContext context) -&gt;
 {
<span class="nc" id="L93"> response.removeHeaders(&quot;Expires&quot;);</span>
<span class="nc" id="L94"> response.removeHeaders(&quot;Pragma&quot;);</span>
<span class="nc" id="L95"> response.removeHeaders(&quot;Cache-Control&quot;);</span>
<span class="nc" id="L96"> response.addHeader(&quot;Expires&quot;, &quot;Mon, 31 Dec 2099 00:00:00 GMT&quot;);</span>
<span class="nc" id="L97"> };</span>

    /*******************************************************************************************************************
     *
     *
     *
     ******************************************************************************************************************/
    // FIXME: this is because there's a fix, and we explicitly save stuff in the cache - see below
<span class="nc" id="L105">    private final RedirectStrategy dontFollowRedirect = new RedirectStrategy()</span>
<span class="nc" id="L106">      {</span>
        @Override
        public boolean isRedirected (HttpRequest request, HttpResponse response, HttpContext context)
          throws ProtocolException
          {
<span class="nc" id="L111">            return false;</span>
          }

        @Override
        public HttpUriRequest getRedirect (HttpRequest request, HttpResponse response, HttpContext context)
          throws ProtocolException
          {
<span class="nc" id="L118">            return null;</span>
          }
      };

    /*******************************************************************************************************************
     *
     *
     *
     ******************************************************************************************************************/
    @PostConstruct
    @VisibleForTesting void initialize()
      {
<span class="nc" id="L130">        connectionManager = new PoolingHttpClientConnectionManager();</span>
<span class="nc" id="L131">        connectionManager.setMaxTotal(200);</span>
<span class="nc" id="L132">        connectionManager.setDefaultMaxPerRoute(20);</span>

<span class="nc" id="L134">        cacheConfig = CacheConfig.custom()</span>
<span class="nc" id="L135">                .setAllow303Caching(true)</span>
<span class="nc" id="L136">                .setMaxCacheEntries(Integer.MAX_VALUE)</span>
<span class="nc" id="L137">                .setMaxObjectSize(Integer.MAX_VALUE)</span>
<span class="nc" id="L138">                .setSharedCache(false)</span>
<span class="nc" id="L139">                .setHeuristicCachingEnabled(true)</span>
<span class="nc" id="L140">                .build();</span>
<span class="nc" id="L141">        httpClient = CachingHttpClients.custom()</span>
<span class="nc" id="L142">                .setHttpCacheStorage(cacheStorage)</span>
<span class="nc" id="L143">                .setCacheConfig(cacheConfig)</span>
<span class="nc" id="L144">                .setRedirectStrategy(dontFollowRedirect)</span>
<span class="nc" id="L145">                .setUserAgent(&quot;blueMarine (fabrizio.giudici@tidalwave.it)&quot;)</span>
<span class="nc" id="L146">                .setDefaultHeaders(List.of(new BasicHeader(&quot;Accept&quot;, &quot;application/n3&quot;)))</span>
<span class="nc" id="L147">                .setConnectionManager(connectionManager)</span>
<span class="nc" id="L148">                .addInterceptorFirst(killCacheHeaders) // FIXME: only if  explicitly configured</span>
<span class="nc" id="L149">         .build();</span>
<span class="nc" id="L150">      }</span>

    /*******************************************************************************************************************
     *
     *
     *
     ******************************************************************************************************************/
    @VisibleForTesting void onPowerOnNotification (@ListensTo @Nonnull final PowerOnNotification notification)
      throws NotFoundException
      {
<span class="nc" id="L160">        log.info(&quot;onPowerOnNotification({})&quot;, notification);</span>
<span class="nc" id="L161">        cacheStorage.setFolderPath(notification.getProperties().get(CACHE_FOLDER_PATH));</span>
<span class="nc" id="L162">      }</span>

    /*******************************************************************************************************************
     *
     *
     *
     ******************************************************************************************************************/
    @VisibleForTesting void onDownloadRequest (@ListensTo @Nonnull final DownloadRequest request)
      throws URISyntaxException
      {
        try
          {
<span class="nc" id="L174">            log.info(&quot;onDownloadRequest({})&quot;, request);</span>

<span class="nc" id="L176">            URL url = request.getUrl();</span>

            for (;;)
              {
<span class="nc" id="L180">                final HttpCacheContext context = HttpCacheContext.create();</span>
<span class="nc bnc" id="L181" title="All 2 branches missed.">                @Cleanup final CloseableHttpResponse response = httpClient.execute(new HttpGet(url.toURI()), context);</span>
<span class="nc" id="L182">                final byte[] bytes = bytesFrom(response);</span>
<span class="nc" id="L183">                final CacheResponseStatus cacheResponseStatus = context.getCacheResponseStatus();</span>
<span class="nc" id="L184">                log.debug(&quot;&gt;&gt;&gt;&gt; cacheResponseStatus: {}&quot;, cacheResponseStatus);</span>

<span class="nc bnc" id="L186" title="All 2 branches missed.">                final Origin origin = cacheResponseStatus.equals(CacheResponseStatus.CACHE_HIT) ? Origin.CACHE</span>
<span class="nc" id="L187">                                                                                                : Origin.NETWORK;</span>

                // FIXME: shouldn't do this by myself
                // FIXME: upon configuration, everything should be cached (needed for supporting integration tests)
<span class="nc bnc" id="L191" title="All 4 branches missed.">                if (!origin.equals(Origin.CACHE) &amp;&amp; List.of(200, 303).contains(response.getStatusLine().getStatusCode()))</span>
                  {
<span class="nc" id="L193">                    final Date date = new Date();</span>
<span class="nc" id="L194">                    final Resource resource = new HeapResource(bytes);</span>
<span class="nc" id="L195">                    cacheStorage.putEntry(url.toExternalForm(),</span>
<span class="nc" id="L196">                            new HttpCacheEntry(date, date, response.getStatusLine(), response.getAllHeaders(), resource));</span>
                  }

                // FIXME: if the redirect were enabled, we could drop this check
if the redirect were enabled, we could drop this check
<span class="nc bnc" id="L200" title="All 2 branches missed."> if (request.isOptionPresent(DownloadRequest.Option.FOLLOW_REDIRECT)</span> <span class="nc bnc" id="L201" title="All 2 branches missed."> &amp;&amp; response.getStatusLine().getStatusCode() == 303) // SEE_OTHER FIXME</span> { <span class="nc" id="L203"> url = new URL(response.getFirstHeader(&quot;Location&quot;).getValue());</span> <span class="nc" id="L204"> log.info(&quot;&gt;&gt;&gt;&gt; following 'see also' to {} ...&quot;, url);</span> } else { <span class="nc" id="L208"> messageBus.publish(new DownloadComplete(request.getUrl(),</span> <span class="nc" id="L209"> response.getStatusLine().getStatusCode(),</span> bytes, origin)); <span class="nc" id="L212"> return;</span> } <span class="nc" id="L214"> }</span> } <span class="nc" id="L216"> catch (IOException e)</span> { <span class="nc" id="L218"> log.error(&quot;{}: {}&quot;, request.getUrl(), e.toString());</span> <span class="nc" id="L219"> messageBus.publish(new DownloadComplete(request.getUrl(), -1, new byte[0], Origin.NETWORK));</span> } <span class="nc" id="L221"> }</span> /******************************************************************************************************************* * * * ******************************************************************************************************************/ @Nonnull private byte[] bytesFrom (@Nonnull final HttpResponse response) throws IOException { <span class="nc" id="L232"> final ByteArrayOutputStream baos = new ByteArrayOutputStream();</span> <span class="nc bnc" id="L234" title="All 2 branches missed."> if (response.getEntity() != null)</span> { <span class="nc" id="L236"> response.getEntity().writeTo(baos);</span> } <span class="nc" id="L239"> return baos.toByteArray();</span> } } </pre><div class="footer"><span class="right">Created with <a href="http://www.jacoco.org/jacoco">JaCoCo</a> 0.8.7.202105040129</span></div></body></html>