Fixed file list sorting

Now using a natural sort algorithm that is more consistent between JS
and PHP (although not perfect in some corner cases)

- added OC.Util.naturalSortComparator that uses the same algo that was
  used for the user list
- changed user list and files list to use OC.Util.naturalSortComparator
- removed toLowerCase() and changed the comparator to use
  String.localeCompare()
- added unit tests
- added OC_NaturalSort that is used by OCP\Util::naturalSortCompare()
This commit is contained in:
Vincent Petry 2014-02-18 12:29:05 +01:00
parent 86ae3bd1e7
commit 173059f6d0
9 changed files with 516 additions and 4 deletions

View file

@ -1860,7 +1860,7 @@
if (fileInfo1.type !== 'dir' && fileInfo2.type === 'dir') {
return 1;
}
return fileInfo1.name.localeCompare(fileInfo2.name);
return OC.Util.naturalSortCompare(fileInfo1.name, fileInfo2.name);
},
/**
* Compares two file infos by size.

View file

@ -66,7 +66,7 @@ class Helper
} elseif ($aType !== 'dir' and $bType === 'dir') {
return 1;
} else {
return strnatcasecmp($a->getName(), $b->getName());
return \OCP\Util::naturalSortCompare($a->getName(), $b->getName());
}
}

View file

@ -1365,8 +1365,56 @@ OC.Util = {
// FIXME: likely to break when crossing DST
// would be better to use a library like momentJS
return new Date(date.getFullYear(), date.getMonth(), date.getDate());
},
_chunkify: function(t) {
// Adapted from http://my.opera.com/GreyWyvern/blog/show.dml/1671288
var tz = [], x = 0, y = -1, n = 0, code, c;
while (x < t.length) {
c = t.charAt(x);
var m = (c === '.' || (c >= '0' && c <= '9'));
if (m !== n) {
// next chunk
y++;
tz[y] = '';
n = m;
}
tz[y] += c;
x++;
}
return tz;
},
/**
* Compare two strings to provide a natural sort
* @param a first string to compare
* @param b second string to compare
* @return -1 if b comes before a, 1 if a comes before b
* or 0 if the strings are identical
*/
naturalSortCompare: function(a, b) {
var aa = OC.Util._chunkify(a);
var bb = OC.Util._chunkify(b);
for (x = 0; aa[x] && bb[x]; x++) {
if (aa[x] !== bb[x]) {
var aNum = Number(aa[x]), bNum = Number(bb[x]);
// note: == is correct here
if (aNum == aa[x] && bNum == bb[x]) {
return aNum - bNum;
} else {
// Forcing 'en' locale to match the server-side locale which is
// always 'en'.
//
// Note: This setting isn't supported by all browsers but for the ones
// that do there will be more consistency between client-server sorting
return aa[x].localeCompare(bb[x], 'en');
}
}
}
return aa.length - bb.length;
}
};
}
/**
* Utility class for the history API,

View file

@ -77,6 +77,8 @@ window.Snap.prototype = {
close: function() {}
};
window.isPhantom = /phantom/i.test(navigator.userAgent);
// global setup for all tests
(function setupTests() {
var fakeServer = null,

View file

@ -473,5 +473,160 @@ describe('Core base tests', function() {
});
});
});
describe('naturalSortCompare', function() {
// cit() will skip tests if running on PhantomJS because it has issues with
// localeCompare(). See https://github.com/ariya/phantomjs/issues/11063
//
// Please make sure to run these tests in Chrome/Firefox manually
// to make sure they run.
var cit = window.isPhantom?xit:it;
// must provide the same results as \OC_Util::naturalSortCompare
it('sorts alphabetically', function() {
var a = [
'def',
'xyz',
'abc'
];
a.sort(OC.Util.naturalSortCompare);
expect(a).toEqual([
'abc',
'def',
'xyz'
]);
});
cit('sorts with different casing', function() {
var a = [
'aaa',
'bbb',
'BBB',
'AAA'
];
a.sort(OC.Util.naturalSortCompare);
expect(a).toEqual([
'aaa',
'AAA',
'bbb',
'BBB'
]);
});
it('sorts with numbers', function() {
var a = [
'124.txt',
'abc1',
'123.txt',
'abc',
'abc2',
'def (2).txt',
'ghi 10.txt',
'abc12',
'def.txt',
'def (1).txt',
'ghi 2.txt',
'def (10).txt',
'abc10',
'def (12).txt',
'z',
'ghi.txt',
'za',
'ghi 1.txt',
'ghi 12.txt',
'zz'
];
a.sort(OC.Util.naturalSortCompare);
expect(a).toEqual([
'123.txt',
'124.txt',
'abc',
'abc1',
'abc2',
'abc10',
'abc12',
'def.txt',
'def (1).txt',
'def (2).txt',
'def (10).txt',
'def (12).txt',
'ghi.txt',
'ghi 1.txt',
'ghi 2.txt',
'ghi 10.txt',
'ghi 12.txt',
'z',
'za',
'zz'
]);
});
it('sorts with chinese characters', function() {
var a = [
'十.txt',
'一.txt',
'二.txt',
'十 2.txt',
'三.txt',
'四.txt',
'abc.txt',
'五.txt',
'七.txt',
'八.txt',
'九.txt',
'六.txt',
'十一.txt',
'波.txt',
'破.txt',
'莫.txt',
'啊.txt',
'123.txt'
];
a.sort(OC.Util.naturalSortCompare);
expect(a).toEqual([
'123.txt',
'abc.txt',
'一.txt',
'七.txt',
'三.txt',
'九.txt',
'二.txt',
'五.txt',
'八.txt',
'六.txt',
'十.txt',
'十 2.txt',
'十一.txt',
'啊.txt',
'四.txt',
'波.txt',
'破.txt',
'莫.txt'
]);
});
cit('sorts with umlauts', function() {
var a = [
'öh.txt',
'Äh.txt',
'oh.txt',
'Üh 2.txt',
'Üh.txt',
'ah.txt',
'Öh.txt',
'uh.txt',
'üh.txt',
'äh.txt'
];
a.sort(OC.Util.naturalSortCompare);
expect(a).toEqual([
'ah.txt',
'äh.txt',
'Äh.txt',
'oh.txt',
'öh.txt',
'Öh.txt',
'uh.txt',
'üh.txt',
'Üh.txt',
'Üh 2.txt'
]);
});
});
});

116
lib/private/naturalsort.php Normal file
View file

@ -0,0 +1,116 @@
<?php
/**
* Copyright (c) 2014 Vincent Petry <PVince81@owncloud.com>
* This file is licensed under the Affero General Public License version 3 or
* later.
* See the COPYING-README file.
*
*/
namespace OC;
class NaturalSort_DefaultCollator {
public function compare($a, $b) {
if ($a === $b) {
return 0;
}
return ($a < $b) ? -1 : 1;
}
}
class NaturalSort {
private static $instance;
private $collator;
/**
* Split the given string in chunks of numbers and strings
* @param string $t string
* @return array of strings and number chunks
*/
private function naturalSortChunkify($t) {
// Adapted and ported to PHP from
// http://my.opera.com/GreyWyvern/blog/show.dml/1671288
$tz = array();
$x = 0;
$y = -1;
$n = null;
$length = strlen($t);
while ($x < $length) {
$c = $t[$x];
$m = ($c === '.' || ($c >= '0' && $c <= '9'));
if ($m !== $n) {
// next chunk
$y++;
$tz[$y] = '';
$n = $m;
}
$tz[$y] .= $c;
$x++;
}
return $tz;
}
/**
* Returns the string collator
* @return Collator string collator
*/
private function getCollator() {
if (!isset($this->collator)) {
// looks like the default is en_US_POSIX which yields wrong sorting with
// German umlauts, so using en_US instead
if (class_exists('Collator')) {
$this->collator = new \Collator('en_US');
}
else {
$this->collator = new \OC\NaturalSort_DefaultCollator();
}
}
return $this->collator;
}
/**
* Compare two strings to provide a natural sort
* @param $a first string to compare
* @param $b second string to compare
* @return -1 if $b comes before $a, 1 if $a comes before $b
* or 0 if the strings are identical
*/
public function compare($a, $b) {
// Needed because PHP doesn't sort correctly when numbers are enclosed in
// parenthesis, even with NUMERIC_COLLATION enabled.
// For example it gave ["test (2).txt", "test.txt"]
// instead of ["test.txt", "test (2).txt"]
$aa = self::naturalSortChunkify($a);
$bb = self::naturalSortChunkify($b);
$alen = count($aa);
$blen = count($bb);
for ($x = 0; $x < $alen && $x < $blen; $x++) {
$aChunk = $aa[$x];
$bChunk = $bb[$x];
if ($aChunk !== $bChunk) {
if (is_numeric($aChunk) && is_numeric($bChunk)) {
$aNum = (int)$aChunk;
$bNum = (int)$bChunk;
return $aNum - $bNum;
}
return self::getCollator()->compare($aChunk, $bChunk);
}
}
return $alen - $blen;
}
/**
* Returns a singleton
* @return \OC\NaturalSort instance
*/
public static function getInstance() {
if (!isset(self::$instance)) {
self::$instance = new \OC\NaturalSort();
}
return self::$instance;
}
}

View file

@ -510,6 +510,17 @@ class Util {
return \OC_Util::generateRandomBytes($length);
}
/**
* Compare two strings to provide a natural sort
* @param $a first string to compare
* @param $b second string to compare
* @return -1 if $b comes before $a, 1 if $a comes before $b
* or 0 if the strings are identical
*/
public static function naturalSortCompare($a, $b) {
return \OC\NaturalSort::getInstance()->compare($a, $b);
}
/**
* check if a password is required for each public link
* @return boolean

View file

@ -189,13 +189,15 @@ var UserList = {
var rows = $userListBody.find('tr').get();
rows.sort(function(a, b) {
// FIXME: inefficient way of getting the names,
// better use a data attribute
a = $(a).find('td.name').text();
b = $(b).find('td.name').text();
var firstSort = UserList.preSortSearchString(a, b);
if(typeof firstSort !== 'undefined') {
return firstSort;
}
return UserList.alphanum(a, b);
return OC.Util.naturalSortCompare(a, b);
});
var items = [];

178
tests/lib/naturalsort.php Normal file
View file

@ -0,0 +1,178 @@
<?php
/**
* Copyright (c) 2014 Vincent Petry <PVince81@owncloud.com>
* This file is licensed under the Affero General Public License version 3 or
* later.
* See the COPYING-README file.
*/
class Test_NaturalSort extends PHPUnit_Framework_TestCase {
public function setUp() {
if(!class_exists('Collator')) {
$this->markTestSkipped('The intl module is not available, natural sorting will not work as expected.');
return;
}
}
/**
* @dataProvider naturalSortDataProvider
*/
public function testNaturalSortCompare($array, $sorted)
{
$comparator = \OC\NaturalSort::getInstance();
usort($array, array($comparator, 'compare'));
$this->assertEquals($sorted, $array);
}
/**
* Data provider for natural sort.
* Must provide the same result as in core/js/tests/specs/coreSpec.js
* @return array test cases
*/
public function naturalSortDataProvider()
{
return array(
// different casing
array(
// unsorted
array(
'aaa',
'bbb',
'BBB',
'AAA'
),
// sorted
array(
'aaa',
'AAA',
'bbb',
'BBB'
)
),
// numbers
array(
// unsorted
array(
'124.txt',
'abc1',
'123.txt',
'abc',
'abc2',
'def (2).txt',
'ghi 10.txt',
'abc12',
'def.txt',
'def (1).txt',
'ghi 2.txt',
'def (10).txt',
'abc10',
'def (12).txt',
'z',
'ghi.txt',
'za',
'ghi 1.txt',
'ghi 12.txt',
'zz',
),
// sorted
array(
'123.txt',
'124.txt',
'abc',
'abc1',
'abc2',
'abc10',
'abc12',
'def.txt',
'def (1).txt',
'def (2).txt',
'def (10).txt',
'def (12).txt',
'ghi.txt',
'ghi 1.txt',
'ghi 2.txt',
'ghi 10.txt',
'ghi 12.txt',
'z',
'za',
'zz',
)
),
// chinese characters
array(
// unsorted
array(
'十.txt',
'一.txt',
'二.txt',
'十 2.txt',
'三.txt',
'四.txt',
'abc.txt',
'五.txt',
'七.txt',
'八.txt',
'九.txt',
'六.txt',
'十一.txt',
'波.txt',
'破.txt',
'莫.txt',
'啊.txt',
'123.txt',
),
// sorted
array(
'123.txt',
'abc.txt',
'一.txt',
'七.txt',
'三.txt',
'九.txt',
'二.txt',
'五.txt',
'八.txt',
'六.txt',
'十.txt',
'十 2.txt',
'十一.txt',
'啊.txt',
'四.txt',
'波.txt',
'破.txt',
'莫.txt',
)
),
// with umlauts
array(
// unsorted
array(
'öh.txt',
'Äh.txt',
'oh.txt',
'Üh 2.txt',
'Üh.txt',
'ah.txt',
'Öh.txt',
'uh.txt',
'üh.txt',
'äh.txt',
),
// sorted
array(
'ah.txt',
'äh.txt',
'Äh.txt',
'oh.txt',
'öh.txt',
'Öh.txt',
'uh.txt',
'üh.txt',
'Üh.txt',
'Üh 2.txt',
)
),
);
}
}