* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
public class TextIndex {
private static final int REC_SIZE=8;
-
+
private File file;
private DataFile dataFile=null;
-
+
public TextIndex(File theFile) {
file = theFile;
}
-
+
public void open() throws IOException {
dataFile = new DataFile(file,"r");
dataFile.open();
}
-
+
public void close() throws IOException {
- if(dataFile!=null) {
+ if (dataFile!=null) {
dataFile.close();
dataFile=null;
}
public int find(Object key, AbsData.Reuse reuse, int offset) throws IOException {
return find(key,reuse.tokenData,reuse.getFieldData(),offset);
}
-
+
public int find(Object key, DataFile.Token dtok, Field df, int offset) throws IOException {
- if(dataFile==null) {
+ if (dataFile==null) {
throw new IOException("File not opened");
}
long hash = hashToLong(key.hashCode());
IntBuffer tib = ttok.getIntBuffer();
long lhash;
int curr;
- while((max-min)>100) {
- ttok.pos((curr=(min+(max-min)/2))*REC_SIZE);
+ while ((max-min)>100) {
+ curr=(min+(max-min)/2);
+ ttok.pos(curr*REC_SIZE);
tib.rewind();
lhash = hashToLong(tib.get());
- if(lhash<hash) {
+ if (lhash<hash) {
min=curr+1;
- } else if(lhash>hash) {
+ } else if (lhash>hash) {
max=curr-1;
} else {
min=curr-40;
break;
}
}
-
+
List<Integer> entries = new ArrayList<>();
- for(int i=min;i<=max;++i) {
+ for (int i=min;i<=max;++i) {
ttok.pos(i*REC_SIZE);
tib.rewind();
lhash = hashToLong(tib.get());
- if(lhash==hash) {
+ if (lhash==hash) {
entries.add(tib.get());
- } else if(lhash>hash) {
+ } else if (lhash>hash) {
break;
}
}
-
- for(Integer i : entries) {
+
+ for (Integer i : entries) {
dtok.pos(i);
- if(df.at(offset).equals(key)) {
+ if (df.at(offset).equals(key)) {
return i;
}
}
return -1;
}
-
+
/*
* Have to change Bytes into a Long, to avoid the inevitable signs in the Hash
*/
private static long hashToLong(int hash) {
long rv;
- if(hash<0) {
+ if (hash<0) {
rv = 0xFFFFFFFFL & hash;
} else {
rv = hash;
}
return rv;
}
-
+
public void create(final Trans trans,final DataFile data, int maxLine, char delim, int fieldOffset, int skipLines) throws IOException {
FileChannel fos;
-
+
List<Idx> list = new LinkedList<>(); // Some hashcodes will double... DO NOT make a set
TimeTaken tt2 = trans.start("Open Files", Env.SUB);
RandomAccessFile raf=null;
} finally {
tt2.done();
}
-
+
try {
-
- Token t = data.new Token(maxLine);
+
+ Token t = data.new Token(maxLine);
Field f = t.new Field(delim);
-
+
int count = 0;
- if(skipLines>0) {
+ if (skipLines>0) {
trans.info().log("Skipping",skipLines,"line"+(skipLines==1?" in":"s in"),data.file().getName());
}
- for(int i=0;i<skipLines;++i) {
+ for (int i=0;i<skipLines;++i) {
t.nextLine();
}
tt2 = trans.start("Read", Env.SUB);
try {
- while(t.nextLine()) {
+ while (t.nextLine()) {
list.add(new Idx(f.at(fieldOffset),t.pos()));
++count;
}
try {
ByteBuffer bb = ByteBuffer.allocate(8*1024);
IntBuffer ib = bb.asIntBuffer();
- for(Idx idx : list) {
- if(!ib.hasRemaining()) {
+ for (Idx idx : list) {
+ if (!ib.hasRemaining()) {
fos.write(bb);
ib.clear();
bb.rewind();
}
} finally {
fos.close();
- }
+ }
} finally {
- if(raf!=null) {
+ if (raf!=null) {
raf.close(); // closed by fos
}
}
}
-
+
public class Iter {
private int idx;
private Token t;
end = -1L;
}
}
-
+
public int next() {
t.pos(idx);
ib.clear();
return idx<end;
}
}
-
+
private static class Idx implements Comparable<Idx> {
public int hash, pos;
public Idx(Object obj, int pos) {
hash = obj.hashCode();
this.pos = pos;
}
-
+
@Override
public int compareTo(Idx ib) {
long a = hashToLong(hash);
*/
@Override
public boolean equals(Object o) {
- if(o!=null && o instanceof Idx) {
+ if (o!=null && o instanceof Idx) {
return hash == ((Idx)o).hash;
}
return false;