Skip to content

Method: withLoaderConfig(Map)

1: /*
2: * *********************************************************************************************************************
3: *
4: * blueMarine II: Semantic Media Centre
5: * http://tidalwave.it/projects/bluemarine2
6: *
7: * Copyright (C) 2015 - 2021 by Tidalwave s.a.s. (http://tidalwave.it)
8: *
9: * *********************************************************************************************************************
10: *
11: * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
12: * the License. You may obtain a copy of the License at
13: *
14: * http://www.apache.org/licenses/LICENSE-2.0
15: *
16: * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
17: * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
18: * specific language governing permissions and limitations under the License.
19: *
20: * *********************************************************************************************************************
21: *
22: * git clone https://bitbucket.org/tidalwave/bluemarine2-src
23: * git clone https://github.com/tidalwave-it/bluemarine2-src
24: *
25: * *********************************************************************************************************************
26: */
27: package it.tidalwave.bluemarine2.mediascanner.impl.tika;
28:
29: import javax.annotation.Nonnegative;
30: import javax.annotation.Nonnull;
31: import javax.annotation.concurrent.Immutable;
32: import java.util.HashMap;
33: import java.util.Map;
34: import java.util.function.Consumer;
35: import java.util.function.Predicate;
36: import java.util.stream.Stream;
37: import java.io.IOException;
38: import java.nio.file.Files;
39: import java.nio.file.Path;
40: import org.apache.tika.exception.TikaException;
41: import org.xml.sax.SAXException;
42: import lombok.Getter;
43: import lombok.RequiredArgsConstructor;
44: import lombok.ToString;
45: import lombok.With;
46: import lombok.extern.slf4j.Slf4j;
47: import static java.nio.file.FileVisitOption.FOLLOW_LINKS;
48:
49: /***********************************************************************************************************************
50: *
51: * A temporary class to work with Tika, for performing tests. Should be merged to DefaultMediaScanner.
52: * TODO: This class could be kept (without references to Tika) as a SimpleMediaScanner that would allow to write simpler
53: * tests.
54: *
55: * @author Fabrizio Giudici
56: *
57: **********************************************************************************************************************/
58: @Slf4j @RequiredArgsConstructor
59: class Scanner
60: {
61: @Nonnull
62: private final TikaMetadataLoader metadataLoader;
63:
64: /*******************************************************************************************************************
65: *
66: * Parameters for the scanner.
67: *
68: ******************************************************************************************************************/
69: @Getter @ToString @RequiredArgsConstructor @Immutable
70: public static class Params
71: {
72: protected Params()
73: {
74: this((__ -> true), Integer.MAX_VALUE, TikaMetadataLoader.Config.DEFAULT_ASSOCIATIONS);
75: }
76:
77: /** A predicate to filter files to scan. */
78: @Nonnull @With
79: private final Predicate<Path> filter;
80:
81: /** The maximum number of files to scan. */
82: @Nonnegative @With
83: private final int limit;
84:
85: /** A map that associates TikaMetadataLoader.Config to a specific kind of file (described by file extension
86: * and/or MIME type). Default associations can be overridden by calling
87: * {@link #withMetadataLoaderConfig(ExtensionAndMimeType, TikaMetadataLoader.Config)} */
88:• @Nonnull @With
89: private final Map<ExtensionAndMimeType, TikaMetadataLoader.Config> loaderConfig;
90:
91: /***************************************************************************************************************
92: *
93: * Creates/overrides the metadata loader configuration for the given kind of file.
94: *
95: * @param extensionAndMimeType the kind of file
96: * @param config the loader configuration
97: * @return the updated parameters
98: *
99: **************************************************************************************************************/
100: @Nonnull
101: public Params withMetadataLoaderConfig (@Nonnull final ExtensionAndMimeType extensionAndMimeType,
102: @Nonnull final TikaMetadataLoader.Config config)
103: {
104: final Map<ExtensionAndMimeType, TikaMetadataLoader.Config> clone = new HashMap<>(this.loaderConfig);
105: clone.put(extensionAndMimeType, config);
106: return new Params(filter, limit, clone);
107: }
108:
109: /***************************************************************************************************************
110: *
111: * Retrieves the metadata loader configuration for the given file.
112: *
113: * @param path the file to analyze
114: * @return the metadata loader configuration
115: *
116: **************************************************************************************************************/
117: @Nonnull
118: public TikaMetadataLoader.Config loaderConfigFor (@Nonnull final Path path)
119: {
120: return loaderConfig.entrySet()
121: .stream()
122: .filter(e -> e.getKey().matches(path))
123: .findFirst()
124: .map(Map.Entry::getValue)
125: .orElse(TikaMetadataLoader.Config.DEFAULT);
126: }
127:
128: /***************************************************************************************************************
129: *
130: * Returns a predicate that matches the given extension.
131: *
132: * @param extension the extension
133: * @return the predicate
134: *
135: **************************************************************************************************************/
136: @Nonnull
137: public static Predicate<Path> extensionFilter (@Nonnull final String extension)
138: {
139: return p -> p.getFileName().toString().toLowerCase().endsWith("." + extension);
140: }
141: }
142:
143: /*******************************************************************************************************************
144: *
145: * Creates default parameters.
146: *
147: * @return default parameters
148: *
149: ******************************************************************************************************************/
150: @Nonnull
151: public static Params params()
152: {
153: return new Params();
154: }
155:
156: /*******************************************************************************************************************
157: *
158: * Scans a folder and applies the given metadata processor.
159: *
160: * @param basePath the path of the base folder to scan
161: * @param params the parameters of the scan
162: * @param processor the metadata processor
163: *
164: ******************************************************************************************************************/
165: public void scan (@Nonnull final Path basePath,
166: @Nonnull final Params params,
167: @Nonnull final Consumer<MetadataWithPath> processor)
168: throws IOException
169: {
170: log.info("Scanning {}", basePath);
171:
172: try (final Stream<Path> dirStream = Files.walk(basePath, FOLLOW_LINKS))
173: {
174: dirStream.parallel().filter(params.getFilter()).limit(params.getLimit()).forEach(path ->
175: {
176: try
177: {
178: final TikaMetadataLoader.Config loaderConfig = params.loaderConfigFor(path);
179: final MetadataWithPath metadata = metadataLoader.loadMetadata(path, loaderConfig);
180: processor.accept(metadata);
181: }
182: catch (TikaException | IOException | SAXException e)
183: {
184: log.error("While processing {}: {}", path, e.toString());
185: }
186: });
187: }
188: }
189: }