1 # Licensed to the Apache Software Foundation (ASF) under one or more
2 # contributor license agreements. See the NOTICE file distributed with
3 # this work for additional information regarding copyright ownership.
4 # The ASF licenses this file to You under the Apache License, Version 2.0
5 # (the "License"); you may not use this file except in compliance with
6 # the License. You may obtain a copy of the License at
8 # http://www.apache.org/licenses/LICENSE-2.0
10 # Unless required by applicable law or agreed to in writing, software
11 # distributed under the License is distributed on an "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 # See the License for the specific language governing permissions and
14 # limitations under the License.
18 from .loader import Loader
19 from .exceptions import LoaderException, DocumentNotFoundException
22 class FileTextLoader(Loader):
24 ARIA file text loader.
26 Extracts a text document from a file. The default encoding is UTF-8, but other supported
27 encoding can be specified instead.
30 def __init__(self, context, path, encoding='utf-8'):
31 self.context = context
33 self.encoding = encoding
38 self._file = codecs.open(self.path, mode='r', encoding=self.encoding, buffering=1)
41 raise DocumentNotFoundException('file not found: "%s"' % self.path, cause=e)
43 raise LoaderException('file I/O error: "%s"' % self.path, cause=e)
44 except Exception as e:
45 raise LoaderException('file error: "%s"' % self.path, cause=e)
48 if self._file is not None:
52 raise LoaderException('file I/O error: "%s"' % self.path, cause=e)
53 except Exception as e:
54 raise LoaderException('file error: "%s"' % self.path, cause=e)
57 if self._file is not None:
59 return self._file.read()
61 raise LoaderException('file I/O error: "%s"' % self.path, cause=e)
62 except Exception as e:
63 raise LoaderException('file error %s' % self.path, cause=e)