// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "brightray/browser/devtools_file_system_indexer.h"

#include <stddef.h>

#include <algorithm>
#include <iterator>
#include <set>

#include "base/bind.h"
#include "base/files/file_enumerator.h"
#include "base/files/file_util.h"
#include "base/files/file_util_proxy.h"
#include "base/lazy_instance.h"
#include "base/logging.h"
#include "base/stl_util.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
#include "content/public/browser/browser_thread.h"

using base::Bind;
using base::Callback;
using base::FileEnumerator;
using base::FilePath;
using base::Time;
using base::TimeDelta;
using base::TimeTicks;
using content::BrowserThread;
using std::map;
using std::set;
using std::string;
using std::vector;

namespace brightray {

namespace {

typedef int32_t Trigram;
typedef char TrigramChar;
typedef uint16_t FileId;

const int kMinTimeoutBetweenWorkedNotification = 200;
// Trigram characters include all ASCII printable characters (32-126) except for
// the capital letters, because the index is case insensitive.
const size_t kTrigramCharacterCount = 126 - 'Z' - 1 + 'A' - ' ' + 1;
const size_t kTrigramCount =
    kTrigramCharacterCount * kTrigramCharacterCount * kTrigramCharacterCount;
const int kMaxReadLength = 10 * 1024;
const TrigramChar kUndefinedTrigramChar = -1;
const TrigramChar kBinaryTrigramChar = -2;
const Trigram kUndefinedTrigram = -1;

template <typename Char>
bool IsAsciiUpper(Char c) {
  return c >= 'A' && c <= 'Z';
}

class Index {
 public:
  Index();
  Time LastModifiedTimeForFile(const FilePath& file_path);
  void SetTrigramsForFile(const FilePath& file_path,
                          const vector<Trigram>& index,
                          const Time& time);
  vector<FilePath> Search(string query);
  void PrintStats();
  void NormalizeVectors();

 private:
  ~Index();

  FileId GetFileId(const FilePath& file_path);

  typedef map<FilePath, FileId> FileIdsMap;
  FileIdsMap file_ids_;
  FileId last_file_id_;
  // The index in this vector is the trigram id.
  vector<vector<FileId> > index_;
  typedef map<FilePath, Time> IndexedFilesMap;
  IndexedFilesMap index_times_;
  vector<bool> is_normalized_;

  DISALLOW_COPY_AND_ASSIGN(Index);
};

base::LazyInstance<Index>::Leaky g_trigram_index = LAZY_INSTANCE_INITIALIZER;

TrigramChar TrigramCharForChar(char c) {
  static TrigramChar* trigram_chars = nullptr;
  if (!trigram_chars) {
    trigram_chars = new TrigramChar[256];
    for (size_t i = 0; i < 256; ++i) {
      if (i > 127) {
        trigram_chars[i] = kUndefinedTrigramChar;
        continue;
      }
      char ch = static_cast<char>(i);
      if (ch == '\t')
        ch = ' ';
      if (IsAsciiUpper(ch))
        ch = ch - 'A' + 'a';

      bool is_binary_char = ch < 9 || (ch >= 14 && ch < 32) || ch == 127;
      if (is_binary_char) {
        trigram_chars[i] = kBinaryTrigramChar;
        continue;
      }

      if (ch < ' ') {
        trigram_chars[i] = kUndefinedTrigramChar;
        continue;
      }

      if (ch >= 'Z')
        ch = ch - 'Z' - 1 + 'A';
      ch -= ' ';
      char signed_trigram_count = static_cast<char>(kTrigramCharacterCount);
      CHECK(ch >= 0 && ch < signed_trigram_count);
      trigram_chars[i] = ch;
    }
  }
  unsigned char uc = static_cast<unsigned char>(c);
  return trigram_chars[uc];
}

Trigram TrigramAtIndex(const vector<TrigramChar>& trigram_chars, size_t index) {
  static int kTrigramCharacterCountSquared =
      kTrigramCharacterCount * kTrigramCharacterCount;
  if (trigram_chars[index] == kUndefinedTrigramChar ||
      trigram_chars[index + 1] == kUndefinedTrigramChar ||
      trigram_chars[index + 2] == kUndefinedTrigramChar)
    return kUndefinedTrigram;
  Trigram trigram = kTrigramCharacterCountSquared * trigram_chars[index] +
                    kTrigramCharacterCount * trigram_chars[index + 1] +
                    trigram_chars[index + 2];
  return trigram;
}

Index::Index() : last_file_id_(0) {
  index_.resize(kTrigramCount);
  is_normalized_.resize(kTrigramCount);
  std::fill(is_normalized_.begin(), is_normalized_.end(), true);
}

Index::~Index() {}

Time Index::LastModifiedTimeForFile(const FilePath& file_path) {
  DCHECK_CURRENTLY_ON(BrowserThread::FILE);
  Time last_modified_time;
  if (index_times_.find(file_path) != index_times_.end())
    last_modified_time = index_times_[file_path];
  return last_modified_time;
}

void Index::SetTrigramsForFile(const FilePath& file_path,
                               const vector<Trigram>& index,
                               const Time& time) {
  DCHECK_CURRENTLY_ON(BrowserThread::FILE);
  FileId file_id = GetFileId(file_path);
  auto it = index.begin();
  for (; it != index.end(); ++it) {
    Trigram trigram = *it;
    index_[trigram].push_back(file_id);
    is_normalized_[trigram] = false;
  }
  index_times_[file_path] = time;
}

vector<FilePath> Index::Search(string query) {
  DCHECK_CURRENTLY_ON(BrowserThread::FILE);
  const char* data = query.c_str();
  vector<TrigramChar> trigram_chars;
  trigram_chars.reserve(query.size());
  for (size_t i = 0; i < query.size(); ++i) {
      TrigramChar trigram_char = TrigramCharForChar(data[i]);
      if (trigram_char == kBinaryTrigramChar)
        trigram_char = kUndefinedTrigramChar;
      trigram_chars.push_back(trigram_char);
  }
  vector<Trigram> trigrams;
  for (size_t i = 0; i + 2 < query.size(); ++i) {
    Trigram trigram = TrigramAtIndex(trigram_chars, i);
    if (trigram != kUndefinedTrigram)
      trigrams.push_back(trigram);
  }
  set<FileId> file_ids;
  bool first = true;
  vector<Trigram>::const_iterator it = trigrams.begin();
  for (; it != trigrams.end(); ++it) {
    Trigram trigram = *it;
    if (first) {
      std::copy(index_[trigram].begin(),
                index_[trigram].end(),
                std::inserter(file_ids, file_ids.begin()));
      first = false;
      continue;
    }
    set<FileId> intersection = base::STLSetIntersection<set<FileId> >(
        file_ids, index_[trigram]);
    file_ids.swap(intersection);
  }
  vector<FilePath> result;
  FileIdsMap::const_iterator ids_it = file_ids_.begin();
  for (; ids_it != file_ids_.end(); ++ids_it) {
    if (trigrams.empty() ||
        file_ids.find(ids_it->second) != file_ids.end()) {
      result.push_back(ids_it->first);
    }
  }
  return result;
}

FileId Index::GetFileId(const FilePath& file_path) {
  DCHECK_CURRENTLY_ON(BrowserThread::FILE);
  string file_path_str = file_path.AsUTF8Unsafe();
  if (file_ids_.find(file_path) != file_ids_.end())
    return file_ids_[file_path];
  file_ids_[file_path] = ++last_file_id_;
  return last_file_id_;
}

void Index::NormalizeVectors() {
  DCHECK_CURRENTLY_ON(BrowserThread::FILE);
  for (size_t i = 0; i < kTrigramCount; ++i) {
    if (!is_normalized_[i]) {
      std::sort(index_[i].begin(), index_[i].end());
      if (index_[i].capacity() > index_[i].size())
        vector<FileId>(index_[i]).swap(index_[i]);
      is_normalized_[i] = true;
    }
  }
}

void Index::PrintStats() {
  DCHECK_CURRENTLY_ON(BrowserThread::FILE);
  LOG(ERROR) << "Index stats:";
  size_t size = 0;
  size_t maxSize = 0;
  size_t capacity = 0;
  for (size_t i = 0; i < kTrigramCount; ++i) {
    if (index_[i].size() > maxSize)
      maxSize = index_[i].size();
    size += index_[i].size();
    capacity += index_[i].capacity();
  }
  LOG(ERROR) << "  - total trigram count: " << size;
  LOG(ERROR) << "  - max file count per trigram: " << maxSize;
  LOG(ERROR) << "  - total vectors capacity " << capacity;
  size_t total_index_size =
      capacity * sizeof(FileId) + sizeof(vector<FileId>) * kTrigramCount;
  LOG(ERROR) << "  - estimated total index size " << total_index_size;
}

typedef Callback<void(bool, const vector<bool>&)> IndexerCallback;

}  // namespace

DevToolsFileSystemIndexer::FileSystemIndexingJob::FileSystemIndexingJob(
    const FilePath& file_system_path,
    const TotalWorkCallback& total_work_callback,
    const WorkedCallback& worked_callback,
    const DoneCallback& done_callback)
    : file_system_path_(file_system_path),
      total_work_callback_(total_work_callback),
      worked_callback_(worked_callback),
      done_callback_(done_callback),
      current_file_(
          BrowserThread::GetTaskRunnerForThread(BrowserThread::FILE).get()),
      files_indexed_(0),
      stopped_(false) {
  current_trigrams_set_.resize(kTrigramCount);
  current_trigrams_.reserve(kTrigramCount);
}

DevToolsFileSystemIndexer::FileSystemIndexingJob::~FileSystemIndexingJob() {}

void DevToolsFileSystemIndexer::FileSystemIndexingJob::Start() {
  DCHECK_CURRENTLY_ON(BrowserThread::UI);
  BrowserThread::PostTask(
      BrowserThread::FILE,
      FROM_HERE,
      Bind(&FileSystemIndexingJob::CollectFilesToIndex, this));
}

void DevToolsFileSystemIndexer::FileSystemIndexingJob::Stop() {
  DCHECK_CURRENTLY_ON(BrowserThread::UI);
  BrowserThread::PostTask(BrowserThread::FILE,
                          FROM_HERE,
                          Bind(&FileSystemIndexingJob::StopOnFileThread, this));
}

void DevToolsFileSystemIndexer::FileSystemIndexingJob::StopOnFileThread() {
  stopped_ = true;
}

void DevToolsFileSystemIndexer::FileSystemIndexingJob::CollectFilesToIndex() {
  DCHECK_CURRENTLY_ON(BrowserThread::FILE);
  if (stopped_)
    return;
  if (!file_enumerator_) {
    file_enumerator_.reset(
        new FileEnumerator(file_system_path_, true, FileEnumerator::FILES));
  }
  FilePath file_path = file_enumerator_->Next();
  if (file_path.empty()) {
    BrowserThread::PostTask(
        BrowserThread::UI,
        FROM_HERE,
        Bind(total_work_callback_, file_path_times_.size()));
    indexing_it_ = file_path_times_.begin();
    IndexFiles();
    return;
  }
  Time saved_last_modified_time =
      g_trigram_index.Get().LastModifiedTimeForFile(file_path);
  FileEnumerator::FileInfo file_info = file_enumerator_->GetInfo();
  Time current_last_modified_time = file_info.GetLastModifiedTime();
  if (current_last_modified_time > saved_last_modified_time) {
    file_path_times_[file_path] = current_last_modified_time;
  }
  BrowserThread::PostTask(
      BrowserThread::FILE,
      FROM_HERE,
      Bind(&FileSystemIndexingJob::CollectFilesToIndex, this));
}

void DevToolsFileSystemIndexer::FileSystemIndexingJob::IndexFiles() {
  DCHECK_CURRENTLY_ON(BrowserThread::FILE);
  if (stopped_)
    return;
  if (indexing_it_ == file_path_times_.end()) {
    g_trigram_index.Get().NormalizeVectors();
    BrowserThread::PostTask(BrowserThread::UI, FROM_HERE, done_callback_);
    return;
  }
  FilePath file_path = indexing_it_->first;
  current_file_.CreateOrOpen(
        file_path,
        base::File::FLAG_OPEN | base::File::FLAG_READ,
        Bind(&FileSystemIndexingJob::StartFileIndexing, this));
}

void DevToolsFileSystemIndexer::FileSystemIndexingJob::StartFileIndexing(
    base::File::Error error) {
  if (!current_file_.IsValid()) {
    FinishFileIndexing(false);
    return;
  }
  current_file_offset_ = 0;
  current_trigrams_.clear();
  std::fill(current_trigrams_set_.begin(), current_trigrams_set_.end(), false);
  ReadFromFile();
}

void DevToolsFileSystemIndexer::FileSystemIndexingJob::ReadFromFile() {
  if (stopped_) {
    CloseFile();
    return;
  }
  current_file_.Read(current_file_offset_, kMaxReadLength,
                     Bind(&FileSystemIndexingJob::OnRead, this));
}

void DevToolsFileSystemIndexer::FileSystemIndexingJob::OnRead(
    base::File::Error error,
    const char* data,
    int bytes_read) {
  if (error != base::File::FILE_OK) {
    FinishFileIndexing(false);
    return;
  }

  if (!bytes_read || bytes_read < 3) {
    FinishFileIndexing(true);
    return;
  }

  size_t size = static_cast<size_t>(bytes_read);
  vector<TrigramChar> trigram_chars;
  trigram_chars.reserve(size);
  for (size_t i = 0; i < size; ++i) {
    TrigramChar trigram_char = TrigramCharForChar(data[i]);
    if (trigram_char == kBinaryTrigramChar) {
      current_trigrams_.clear();
      FinishFileIndexing(true);
      return;
    }
    trigram_chars.push_back(trigram_char);
  }

  for (size_t i = 0; i + 2 < size; ++i) {
    Trigram trigram = TrigramAtIndex(trigram_chars, i);
    if ((trigram != kUndefinedTrigram) && !current_trigrams_set_[trigram]) {
      current_trigrams_set_[trigram] = true;
      current_trigrams_.push_back(trigram);
    }
  }
  current_file_offset_ += bytes_read - 2;
  ReadFromFile();
}

void DevToolsFileSystemIndexer::FileSystemIndexingJob::FinishFileIndexing(
    bool success) {
  DCHECK_CURRENTLY_ON(BrowserThread::FILE);
  CloseFile();
  if (success) {
    FilePath file_path = indexing_it_->first;
    g_trigram_index.Get().SetTrigramsForFile(
        file_path, current_trigrams_, file_path_times_[file_path]);
  }
  ReportWorked();
  ++indexing_it_;
  IndexFiles();
}

void DevToolsFileSystemIndexer::FileSystemIndexingJob::CloseFile() {
  if (current_file_.IsValid())
    current_file_.Close(Bind(&FileSystemIndexingJob::CloseCallback, this));
}

void DevToolsFileSystemIndexer::FileSystemIndexingJob::CloseCallback(
    base::File::Error error) {}

void DevToolsFileSystemIndexer::FileSystemIndexingJob::ReportWorked() {
  TimeTicks current_time = TimeTicks::Now();
  bool should_send_worked_nitification = true;
  if (!last_worked_notification_time_.is_null()) {
    TimeDelta delta = current_time - last_worked_notification_time_;
    if (delta.InMilliseconds() < kMinTimeoutBetweenWorkedNotification)
      should_send_worked_nitification = false;
  }
  ++files_indexed_;
  if (should_send_worked_nitification) {
    last_worked_notification_time_ = current_time;
    BrowserThread::PostTask(
        BrowserThread::UI, FROM_HERE, Bind(worked_callback_, files_indexed_));
    files_indexed_ = 0;
  }
}

DevToolsFileSystemIndexer::DevToolsFileSystemIndexer() {
}

DevToolsFileSystemIndexer::~DevToolsFileSystemIndexer() {}

scoped_refptr<DevToolsFileSystemIndexer::FileSystemIndexingJob>
DevToolsFileSystemIndexer::IndexPath(
    const string& file_system_path,
    const TotalWorkCallback& total_work_callback,
    const WorkedCallback& worked_callback,
    const DoneCallback& done_callback) {
  DCHECK_CURRENTLY_ON(BrowserThread::UI);
  scoped_refptr<FileSystemIndexingJob> indexing_job =
      new FileSystemIndexingJob(FilePath::FromUTF8Unsafe(file_system_path),
                                total_work_callback,
                                worked_callback,
                                done_callback);
  indexing_job->Start();
  return indexing_job;
}

void DevToolsFileSystemIndexer::SearchInPath(const string& file_system_path,
                                             const string& query,
                                             const SearchCallback& callback) {
  DCHECK_CURRENTLY_ON(BrowserThread::UI);
  BrowserThread::PostTask(
      BrowserThread::FILE,
      FROM_HERE,
      Bind(&DevToolsFileSystemIndexer::SearchInPathOnFileThread,
           this,
           file_system_path,
           query,
           callback));
}

void DevToolsFileSystemIndexer::SearchInPathOnFileThread(
    const string& file_system_path,
    const string& query,
    const SearchCallback& callback) {
  DCHECK_CURRENTLY_ON(BrowserThread::FILE);
  vector<FilePath> file_paths = g_trigram_index.Get().Search(query);
  vector<string> result;
  FilePath path = FilePath::FromUTF8Unsafe(file_system_path);
  vector<FilePath>::const_iterator it = file_paths.begin();
  for (; it != file_paths.end(); ++it) {
    if (path.IsParent(*it))
      result.push_back(it->AsUTF8Unsafe());
  }
  BrowserThread::PostTask(BrowserThread::UI, FROM_HERE, Bind(callback, result));
}

}  // namespace brightray